Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                   host_arm_isel.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2012 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    NEON support is
     14    Copyright (C) 2010-2012 Samsung Electronics
     15    contributed by Dmitry Zhurikhin <zhur (at) ispras.ru>
     16               and Kirill Batuzov <batuzovk (at) ispras.ru>
     17 
     18    This program is free software; you can redistribute it and/or
     19    modify it under the terms of the GNU General Public License as
     20    published by the Free Software Foundation; either version 2 of the
     21    License, or (at your option) any later version.
     22 
     23    This program is distributed in the hope that it will be useful, but
     24    WITHOUT ANY WARRANTY; without even the implied warranty of
     25    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     26    General Public License for more details.
     27 
     28    You should have received a copy of the GNU General Public License
     29    along with this program; if not, write to the Free Software
     30    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     31    02110-1301, USA.
     32 
     33    The GNU General Public License is contained in the file COPYING.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex_ir.h"
     38 #include "libvex.h"
     39 #include "ir_match.h"
     40 
     41 #include "main_util.h"
     42 #include "main_globals.h"
     43 #include "host_generic_regs.h"
     44 #include "host_generic_simd64.h"  // for 32-bit SIMD helpers
     45 #include "host_arm_defs.h"
     46 
     47 
     48 /*---------------------------------------------------------*/
     49 /*--- ARMvfp control word stuff                         ---*/
     50 /*---------------------------------------------------------*/
     51 
     52 /* Vex-generated code expects to run with the FPU set as follows: all
     53    exceptions masked, round-to-nearest, non-vector mode, with the NZCV
     54    flags cleared, and FZ (flush to zero) disabled.  Curiously enough,
     55    this corresponds to a FPSCR value of zero.
     56 
     57    fpscr should therefore be zero on entry to Vex-generated code, and
     58    should be unchanged at exit.  (Or at least the bottom 28 bits
     59    should be zero).
     60 */
     61 
     62 #define DEFAULT_FPSCR 0
     63 
     64 
     65 /*---------------------------------------------------------*/
     66 /*--- ISelEnv                                           ---*/
     67 /*---------------------------------------------------------*/
     68 
     69 /* This carries around:
     70 
     71    - A mapping from IRTemp to IRType, giving the type of any IRTemp we
     72      might encounter.  This is computed before insn selection starts,
     73      and does not change.
     74 
     75    - A mapping from IRTemp to HReg.  This tells the insn selector
     76      which virtual register(s) are associated with each IRTemp
     77      temporary.  This is computed before insn selection starts, and
     78      does not change.  We expect this mapping to map precisely the
     79      same set of IRTemps as the type mapping does.
     80 
     81         - vregmap   holds the primary register for the IRTemp.
     82         - vregmapHI is only used for 64-bit integer-typed
     83              IRTemps.  It holds the identity of a second
     84              32-bit virtual HReg, which holds the high half
     85              of the value.
     86 
     87    - The code array, that is, the insns selected so far.
     88 
     89    - A counter, for generating new virtual registers.
     90 
     91    - The host hardware capabilities word.  This is set at the start
     92      and does not change.
     93 
     94    - A Bool for indicating whether we may generate chain-me
     95      instructions for control flow transfers, or whether we must use
     96      XAssisted.
     97 
     98    - The maximum guest address of any guest insn in this block.
     99      Actually, the address of the highest-addressed byte from any insn
    100      in this block.  Is set at the start and does not change.  This is
    101      used for detecting jumps which are definitely forward-edges from
    102      this block, and therefore can be made (chained) to the fast entry
    103      point of the destination, thereby avoiding the destination's
    104      event check.
    105 
    106    Note, this is all (well, mostly) host-independent.
    107 */
    108 
    109 typedef
    110    struct {
    111       /* Constant -- are set at the start and do not change. */
    112       IRTypeEnv*   type_env;
    113 
    114       HReg*        vregmap;
    115       HReg*        vregmapHI;
    116       Int          n_vregmap;
    117 
    118       UInt         hwcaps;
    119 
    120       Bool         chainingAllowed;
    121       Addr64       max_ga;
    122 
    123       /* These are modified as we go along. */
    124       HInstrArray* code;
    125       Int          vreg_ctr;
    126    }
    127    ISelEnv;
    128 
    129 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
    130 {
    131    vassert(tmp >= 0);
    132    vassert(tmp < env->n_vregmap);
    133    return env->vregmap[tmp];
    134 }
    135 
    136 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
    137 {
    138    vassert(tmp >= 0);
    139    vassert(tmp < env->n_vregmap);
    140    vassert(env->vregmapHI[tmp] != INVALID_HREG);
    141    *vrLO = env->vregmap[tmp];
    142    *vrHI = env->vregmapHI[tmp];
    143 }
    144 
    145 static void addInstr ( ISelEnv* env, ARMInstr* instr )
    146 {
    147    addHInstr(env->code, instr);
    148    if (vex_traceflags & VEX_TRACE_VCODE) {
    149       ppARMInstr(instr);
    150       vex_printf("\n");
    151    }
    152 #if 0
    153    if (instr->tag == ARMin_NUnary || instr->tag == ARMin_NBinary
    154          || instr->tag == ARMin_NUnaryS || instr->tag == ARMin_NBinaryS
    155          || instr->tag == ARMin_NDual || instr->tag == ARMin_NShift) {
    156       ppARMInstr(instr);
    157       vex_printf("\n");
    158    }
    159 #endif
    160 }
    161 
    162 static HReg newVRegI ( ISelEnv* env )
    163 {
    164    HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/);
    165    env->vreg_ctr++;
    166    return reg;
    167 }
    168 
    169 static HReg newVRegD ( ISelEnv* env )
    170 {
    171    HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
    172    env->vreg_ctr++;
    173    return reg;
    174 }
    175 
    176 static HReg newVRegF ( ISelEnv* env )
    177 {
    178    HReg reg = mkHReg(env->vreg_ctr, HRcFlt32, True/*virtual reg*/);
    179    env->vreg_ctr++;
    180    return reg;
    181 }
    182 
    183 static HReg newVRegV ( ISelEnv* env )
    184 {
    185    HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
    186    env->vreg_ctr++;
    187    return reg;
    188 }
    189 
    190 /* These are duplicated in guest_arm_toIR.c */
    191 static IRExpr* unop ( IROp op, IRExpr* a )
    192 {
    193    return IRExpr_Unop(op, a);
    194 }
    195 
    196 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
    197 {
    198    return IRExpr_Binop(op, a1, a2);
    199 }
    200 
    201 static IRExpr* bind ( Int binder )
    202 {
    203    return IRExpr_Binder(binder);
    204 }
    205 
    206 
    207 /*---------------------------------------------------------*/
    208 /*--- ISEL: Forward declarations                        ---*/
    209 /*---------------------------------------------------------*/
    210 
    211 /* These are organised as iselXXX and iselXXX_wrk pairs.  The
    212    iselXXX_wrk do the real work, but are not to be called directly.
    213    For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
    214    checks that all returned registers are virtual.  You should not
    215    call the _wrk version directly.
    216 */
    217 static ARMAMode1*  iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e );
    218 static ARMAMode1*  iselIntExpr_AMode1     ( ISelEnv* env, IRExpr* e );
    219 
    220 static ARMAMode2*  iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e );
    221 static ARMAMode2*  iselIntExpr_AMode2     ( ISelEnv* env, IRExpr* e );
    222 
    223 static ARMAModeV*  iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e );
    224 static ARMAModeV*  iselIntExpr_AModeV     ( ISelEnv* env, IRExpr* e );
    225 
    226 static ARMAModeN*  iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e );
    227 static ARMAModeN*  iselIntExpr_AModeN     ( ISelEnv* env, IRExpr* e );
    228 
    229 static ARMRI84*    iselIntExpr_RI84_wrk
    230         ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
    231 static ARMRI84*    iselIntExpr_RI84
    232         ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
    233 
    234 static ARMRI5*     iselIntExpr_RI5_wrk    ( ISelEnv* env, IRExpr* e );
    235 static ARMRI5*     iselIntExpr_RI5        ( ISelEnv* env, IRExpr* e );
    236 
    237 static ARMCondCode iselCondCode_wrk       ( ISelEnv* env, IRExpr* e );
    238 static ARMCondCode iselCondCode           ( ISelEnv* env, IRExpr* e );
    239 
    240 static HReg        iselIntExpr_R_wrk      ( ISelEnv* env, IRExpr* e );
    241 static HReg        iselIntExpr_R          ( ISelEnv* env, IRExpr* e );
    242 
    243 static void        iselInt64Expr_wrk      ( HReg* rHi, HReg* rLo,
    244                                             ISelEnv* env, IRExpr* e );
    245 static void        iselInt64Expr          ( HReg* rHi, HReg* rLo,
    246                                             ISelEnv* env, IRExpr* e );
    247 
    248 static HReg        iselDblExpr_wrk        ( ISelEnv* env, IRExpr* e );
    249 static HReg        iselDblExpr            ( ISelEnv* env, IRExpr* e );
    250 
    251 static HReg        iselFltExpr_wrk        ( ISelEnv* env, IRExpr* e );
    252 static HReg        iselFltExpr            ( ISelEnv* env, IRExpr* e );
    253 
    254 static HReg        iselNeon64Expr_wrk     ( ISelEnv* env, IRExpr* e );
    255 static HReg        iselNeon64Expr         ( ISelEnv* env, IRExpr* e );
    256 
    257 static HReg        iselNeonExpr_wrk       ( ISelEnv* env, IRExpr* e );
    258 static HReg        iselNeonExpr           ( ISelEnv* env, IRExpr* e );
    259 
    260 /*---------------------------------------------------------*/
    261 /*--- ISEL: Misc helpers                                ---*/
    262 /*---------------------------------------------------------*/
    263 
    264 static UInt ROR32 ( UInt x, UInt sh ) {
    265    vassert(sh >= 0 && sh < 32);
    266    if (sh == 0)
    267       return x;
    268    else
    269       return (x << (32-sh)) | (x >> sh);
    270 }
    271 
    272 /* Figure out if 'u' fits in the special shifter-operand 8x4 immediate
    273    form, and if so return the components. */
    274 static Bool fitsIn8x4 ( /*OUT*/UInt* u8, /*OUT*/UInt* u4, UInt u )
    275 {
    276    UInt i;
    277    for (i = 0; i < 16; i++) {
    278       if (0 == (u & 0xFFFFFF00)) {
    279          *u8 = u;
    280          *u4 = i;
    281          return True;
    282       }
    283       u = ROR32(u, 30);
    284    }
    285    vassert(i == 16);
    286    return False;
    287 }
    288 
    289 /* Make a int reg-reg move. */
    290 static ARMInstr* mk_iMOVds_RR ( HReg dst, HReg src )
    291 {
    292    vassert(hregClass(src) == HRcInt32);
    293    vassert(hregClass(dst) == HRcInt32);
    294    return ARMInstr_Mov(dst, ARMRI84_R(src));
    295 }
    296 
    297 /* Set the VFP unit's rounding mode to default (round to nearest). */
    298 static void set_VFP_rounding_default ( ISelEnv* env )
    299 {
    300    /* mov rTmp, #DEFAULT_FPSCR
    301       fmxr fpscr, rTmp
    302    */
    303    HReg rTmp = newVRegI(env);
    304    addInstr(env, ARMInstr_Imm32(rTmp, DEFAULT_FPSCR));
    305    addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, rTmp));
    306 }
    307 
    308 /* Mess with the VFP unit's rounding mode: 'mode' is an I32-typed
    309    expression denoting a value in the range 0 .. 3, indicating a round
    310    mode encoded as per type IRRoundingMode.  Set FPSCR to have the
    311    same rounding.
    312 */
    313 static
    314 void set_VFP_rounding_mode ( ISelEnv* env, IRExpr* mode )
    315 {
    316    /* This isn't simple, because 'mode' carries an IR rounding
    317       encoding, and we need to translate that to an ARMvfp one:
    318       The IR encoding:
    319          00  to nearest (the default)
    320          10  to +infinity
    321          01  to -infinity
    322          11  to zero
    323       The ARMvfp encoding:
    324          00  to nearest
    325          01  to +infinity
    326          10  to -infinity
    327          11  to zero
    328       Easy enough to do; just swap the two bits.
    329    */
    330    HReg irrm = iselIntExpr_R(env, mode);
    331    HReg tL   = newVRegI(env);
    332    HReg tR   = newVRegI(env);
    333    HReg t3   = newVRegI(env);
    334    /* tL = irrm << 1;
    335       tR = irrm >> 1;  if we're lucky, these will issue together
    336       tL &= 2;
    337       tR &= 1;         ditto
    338       t3 = tL | tR;
    339       t3 <<= 22;
    340       fmxr fpscr, t3
    341    */
    342    addInstr(env, ARMInstr_Shift(ARMsh_SHL, tL, irrm, ARMRI5_I5(1)));
    343    addInstr(env, ARMInstr_Shift(ARMsh_SHR, tR, irrm, ARMRI5_I5(1)));
    344    addInstr(env, ARMInstr_Alu(ARMalu_AND, tL, tL, ARMRI84_I84(2,0)));
    345    addInstr(env, ARMInstr_Alu(ARMalu_AND, tR, tR, ARMRI84_I84(1,0)));
    346    addInstr(env, ARMInstr_Alu(ARMalu_OR, t3, tL, ARMRI84_R(tR)));
    347    addInstr(env, ARMInstr_Shift(ARMsh_SHL, t3, t3, ARMRI5_I5(22)));
    348    addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, t3));
    349 }
    350 
    351 
    352 /*---------------------------------------------------------*/
    353 /*--- ISEL: Function call helpers                       ---*/
    354 /*---------------------------------------------------------*/
    355 
    356 /* Used only in doHelperCall.  See big comment in doHelperCall re
    357    handling of register-parameter args.  This function figures out
    358    whether evaluation of an expression might require use of a fixed
    359    register.  If in doubt return True (safe but suboptimal).
    360 */
    361 static
    362 Bool mightRequireFixedRegs ( IRExpr* e )
    363 {
    364    switch (e->tag) {
    365    case Iex_RdTmp: case Iex_Const: case Iex_Get:
    366       return False;
    367    default:
    368       return True;
    369    }
    370 }
    371 
    372 
    373 /* Do a complete function call.  guard is a Ity_Bit expression
    374    indicating whether or not the call happens.  If guard==NULL, the
    375    call is unconditional.  Returns True iff it managed to handle this
    376    combination of arg/return types, else returns False. */
    377 
    378 static
    379 Bool doHelperCall ( ISelEnv* env,
    380                     Bool passBBP,
    381                     IRExpr* guard, IRCallee* cee, IRExpr** args )
    382 {
    383    ARMCondCode cc;
    384    HReg        argregs[ARM_N_ARGREGS];
    385    HReg        tmpregs[ARM_N_ARGREGS];
    386    Bool        go_fast;
    387    Int         n_args, i, nextArgReg;
    388    ULong       target;
    389 
    390    vassert(ARM_N_ARGREGS == 4);
    391 
    392    /* Marshal args for a call and do the call.
    393 
    394       If passBBP is True, r8 (the baseblock pointer) is to be passed
    395       as the first arg.
    396 
    397       This function only deals with a tiny set of possibilities, which
    398       cover all helpers in practice.  The restrictions are that only
    399       arguments in registers are supported, hence only ARM_N_REGPARMS
    400       x 32 integer bits in total can be passed.  In fact the only
    401       supported arg types are I32 and I64.
    402 
    403       Generating code which is both efficient and correct when
    404       parameters are to be passed in registers is difficult, for the
    405       reasons elaborated in detail in comments attached to
    406       doHelperCall() in priv/host-x86/isel.c.  Here, we use a variant
    407       of the method described in those comments.
    408 
    409       The problem is split into two cases: the fast scheme and the
    410       slow scheme.  In the fast scheme, arguments are computed
    411       directly into the target (real) registers.  This is only safe
    412       when we can be sure that computation of each argument will not
    413       trash any real registers set by computation of any other
    414       argument.
    415 
    416       In the slow scheme, all args are first computed into vregs, and
    417       once they are all done, they are moved to the relevant real
    418       regs.  This always gives correct code, but it also gives a bunch
    419       of vreg-to-rreg moves which are usually redundant but are hard
    420       for the register allocator to get rid of.
    421 
    422       To decide which scheme to use, all argument expressions are
    423       first examined.  If they are all so simple that it is clear they
    424       will be evaluated without use of any fixed registers, use the
    425       fast scheme, else use the slow scheme.  Note also that only
    426       unconditional calls may use the fast scheme, since having to
    427       compute a condition expression could itself trash real
    428       registers.
    429 
    430       Note this requires being able to examine an expression and
    431       determine whether or not evaluation of it might use a fixed
    432       register.  That requires knowledge of how the rest of this insn
    433       selector works.  Currently just the following 3 are regarded as
    434       safe -- hopefully they cover the majority of arguments in
    435       practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
    436    */
    437 
    438    /* Note that the cee->regparms field is meaningless on ARM hosts
    439       (since there is only one calling convention) and so we always
    440       ignore it. */
    441 
    442    n_args = 0;
    443    for (i = 0; args[i]; i++)
    444       n_args++;
    445 
    446    argregs[0] = hregARM_R0();
    447    argregs[1] = hregARM_R1();
    448    argregs[2] = hregARM_R2();
    449    argregs[3] = hregARM_R3();
    450 
    451    tmpregs[0] = tmpregs[1] = tmpregs[2] =
    452    tmpregs[3] = INVALID_HREG;
    453 
    454    /* First decide which scheme (slow or fast) is to be used.  First
    455       assume the fast scheme, and select slow if any contraindications
    456       (wow) appear. */
    457 
    458    go_fast = True;
    459 
    460    if (guard) {
    461       if (guard->tag == Iex_Const
    462           && guard->Iex.Const.con->tag == Ico_U1
    463           && guard->Iex.Const.con->Ico.U1 == True) {
    464          /* unconditional */
    465       } else {
    466          /* Not manifestly unconditional -- be conservative. */
    467          go_fast = False;
    468       }
    469    }
    470 
    471    if (go_fast) {
    472       for (i = 0; i < n_args; i++) {
    473          if (mightRequireFixedRegs(args[i])) {
    474             go_fast = False;
    475             break;
    476          }
    477       }
    478    }
    479    /* At this point the scheme to use has been established.  Generate
    480       code to get the arg values into the argument rregs.  If we run
    481       out of arg regs, give up. */
    482 
    483    if (go_fast) {
    484 
    485       /* FAST SCHEME */
    486       nextArgReg = 0;
    487       if (passBBP) {
    488          addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
    489                                      hregARM_R8() ));
    490          nextArgReg++;
    491       }
    492 
    493       for (i = 0; i < n_args; i++) {
    494          IRType aTy = typeOfIRExpr(env->type_env, args[i]);
    495          if (nextArgReg >= ARM_N_ARGREGS)
    496             return False; /* out of argregs */
    497          if (aTy == Ity_I32) {
    498             addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
    499                                         iselIntExpr_R(env, args[i]) ));
    500             nextArgReg++;
    501          }
    502          else if (aTy == Ity_I64) {
    503             /* 64-bit args must be passed in an a reg-pair of the form
    504                n:n+1, where n is even.  Hence either r0:r1 or r2:r3.
    505                On a little-endian host, the less significant word is
    506                passed in the lower-numbered register. */
    507             if (nextArgReg & 1) {
    508                if (nextArgReg >= ARM_N_ARGREGS)
    509                   return False; /* out of argregs */
    510                addInstr(env, ARMInstr_Imm32( argregs[nextArgReg], 0xAA ));
    511                nextArgReg++;
    512             }
    513             if (nextArgReg >= ARM_N_ARGREGS)
    514                return False; /* out of argregs */
    515             HReg raHi, raLo;
    516             iselInt64Expr(&raHi, &raLo, env, args[i]);
    517             addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raLo ));
    518             nextArgReg++;
    519             addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raHi ));
    520             nextArgReg++;
    521          }
    522          else
    523             return False; /* unhandled arg type */
    524       }
    525 
    526       /* Fast scheme only applies for unconditional calls.  Hence: */
    527       cc = ARMcc_AL;
    528 
    529    } else {
    530 
    531       /* SLOW SCHEME; move via temporaries */
    532       nextArgReg = 0;
    533 
    534       if (passBBP) {
    535          /* This is pretty stupid; better to move directly to r0
    536             after the rest of the args are done. */
    537          tmpregs[nextArgReg] = newVRegI(env);
    538          addInstr(env, mk_iMOVds_RR( tmpregs[nextArgReg],
    539                                      hregARM_R8() ));
    540          nextArgReg++;
    541       }
    542 
    543       for (i = 0; i < n_args; i++) {
    544          IRType aTy = typeOfIRExpr(env->type_env, args[i]);
    545          if (nextArgReg >= ARM_N_ARGREGS)
    546             return False; /* out of argregs */
    547          if (aTy == Ity_I32) {
    548             tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
    549             nextArgReg++;
    550          }
    551          else if (aTy == Ity_I64) {
    552             /* Same comment applies as in the Fast-scheme case. */
    553             if (nextArgReg & 1)
    554                nextArgReg++;
    555             if (nextArgReg + 1 >= ARM_N_ARGREGS)
    556                return False; /* out of argregs */
    557             HReg raHi, raLo;
    558             iselInt64Expr(&raHi, &raLo, env, args[i]);
    559             tmpregs[nextArgReg] = raLo;
    560             nextArgReg++;
    561             tmpregs[nextArgReg] = raHi;
    562             nextArgReg++;
    563          }
    564       }
    565 
    566       /* Now we can compute the condition.  We can't do it earlier
    567          because the argument computations could trash the condition
    568          codes.  Be a bit clever to handle the common case where the
    569          guard is 1:Bit. */
    570       cc = ARMcc_AL;
    571       if (guard) {
    572          if (guard->tag == Iex_Const
    573              && guard->Iex.Const.con->tag == Ico_U1
    574              && guard->Iex.Const.con->Ico.U1 == True) {
    575             /* unconditional -- do nothing */
    576          } else {
    577             cc = iselCondCode( env, guard );
    578          }
    579       }
    580 
    581       /* Move the args to their final destinations. */
    582       for (i = 0; i < nextArgReg; i++) {
    583          if (tmpregs[i] == INVALID_HREG) { // Skip invalid regs
    584             addInstr(env, ARMInstr_Imm32( argregs[i], 0xAA ));
    585             continue;
    586          }
    587          /* None of these insns, including any spill code that might
    588             be generated, may alter the condition codes. */
    589          addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
    590       }
    591 
    592    }
    593 
    594    /* Should be assured by checks above */
    595    vassert(nextArgReg <= ARM_N_ARGREGS);
    596 
    597    target = (HWord)Ptr_to_ULong(cee->addr);
    598 
    599    /* nextArgReg doles out argument registers.  Since these are
    600       assigned in the order r0, r1, r2, r3, its numeric value at this
    601       point, which must be between 0 and 4 inclusive, is going to be
    602       equal to the number of arg regs in use for the call.  Hence bake
    603       that number into the call (we'll need to know it when doing
    604       register allocation, to know what regs the call reads.)
    605 
    606       There is a bit of a twist -- harmless but worth recording.
    607       Suppose the arg types are (Ity_I32, Ity_I64).  Then we will have
    608       the first arg in r0 and the second in r3:r2, but r1 isn't used.
    609       We nevertheless have nextArgReg==4 and bake that into the call
    610       instruction.  This will mean the register allocator wil believe
    611       this insn reads r1 when in fact it doesn't.  But that's
    612       harmless; it just artificially extends the live range of r1
    613       unnecessarily.  The best fix would be to put into the
    614       instruction, a bitmask indicating which of r0/1/2/3 carry live
    615       values.  But that's too much hassle. */
    616 
    617    /* Finally, the call itself. */
    618    addInstr(env, ARMInstr_Call( cc, target, nextArgReg ));
    619 
    620    return True; /* success */
    621 }
    622 
    623 
    624 /*---------------------------------------------------------*/
    625 /*--- ISEL: Integer expressions (32/16/8 bit)           ---*/
    626 /*---------------------------------------------------------*/
    627 
    628 /* Select insns for an integer-typed expression, and add them to the
    629    code list.  Return a reg holding the result.  This reg will be a
    630    virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
    631    want to modify it, ask for a new vreg, copy it in there, and modify
    632    the copy.  The register allocator will do its best to map both
    633    vregs to the same real register, so the copies will often disappear
    634    later in the game.
    635 
    636    This should handle expressions of 32, 16 and 8-bit type.  All
    637    results are returned in a 32-bit register.  For 16- and 8-bit
    638    expressions, the upper 16/24 bits are arbitrary, so you should mask
    639    or sign extend partial values if necessary.
    640 */
    641 
    642 /* --------------------- AMode1 --------------------- */
    643 
    644 /* Return an AMode1 which computes the value of the specified
    645    expression, possibly also adding insns to the code list as a
    646    result.  The expression may only be a 32-bit one.
    647 */
    648 
    649 static Bool sane_AMode1 ( ARMAMode1* am )
    650 {
    651    switch (am->tag) {
    652       case ARMam1_RI:
    653          return
    654             toBool( hregClass(am->ARMam1.RI.reg) == HRcInt32
    655                     && (hregIsVirtual(am->ARMam1.RI.reg)
    656                         || am->ARMam1.RI.reg == hregARM_R8())
    657                     && am->ARMam1.RI.simm13 >= -4095
    658                     && am->ARMam1.RI.simm13 <= 4095 );
    659       case ARMam1_RRS:
    660          return
    661             toBool( hregClass(am->ARMam1.RRS.base) == HRcInt32
    662                     && hregIsVirtual(am->ARMam1.RRS.base)
    663                     && hregClass(am->ARMam1.RRS.index) == HRcInt32
    664                     && hregIsVirtual(am->ARMam1.RRS.index)
    665                     && am->ARMam1.RRS.shift >= 0
    666                     && am->ARMam1.RRS.shift <= 3 );
    667       default:
    668          vpanic("sane_AMode: unknown ARM AMode1 tag");
    669    }
    670 }
    671 
    672 static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e )
    673 {
    674    ARMAMode1* am = iselIntExpr_AMode1_wrk(env, e);
    675    vassert(sane_AMode1(am));
    676    return am;
    677 }
    678 
    679 static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e )
    680 {
    681    IRType ty = typeOfIRExpr(env->type_env,e);
    682    vassert(ty == Ity_I32);
    683 
    684    /* FIXME: add RRS matching */
    685 
    686    /* {Add32,Sub32}(expr,simm13) */
    687    if (e->tag == Iex_Binop
    688        && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
    689        && e->Iex.Binop.arg2->tag == Iex_Const
    690        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
    691       Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
    692       if (simm >= -4095 && simm <= 4095) {
    693          HReg reg;
    694          if (e->Iex.Binop.op == Iop_Sub32)
    695             simm = -simm;
    696          reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
    697          return ARMAMode1_RI(reg, simm);
    698       }
    699    }
    700 
    701    /* Doesn't match anything in particular.  Generate it into
    702       a register and use that. */
    703    {
    704       HReg reg = iselIntExpr_R(env, e);
    705       return ARMAMode1_RI(reg, 0);
    706    }
    707 
    708 }
    709 
    710 
    711 /* --------------------- AMode2 --------------------- */
    712 
    713 /* Return an AMode2 which computes the value of the specified
    714    expression, possibly also adding insns to the code list as a
    715    result.  The expression may only be a 32-bit one.
    716 */
    717 
    718 static Bool sane_AMode2 ( ARMAMode2* am )
    719 {
    720    switch (am->tag) {
    721       case ARMam2_RI:
    722          return
    723             toBool( hregClass(am->ARMam2.RI.reg) == HRcInt32
    724                     && hregIsVirtual(am->ARMam2.RI.reg)
    725                     && am->ARMam2.RI.simm9 >= -255
    726                     && am->ARMam2.RI.simm9 <= 255 );
    727       case ARMam2_RR:
    728          return
    729             toBool( hregClass(am->ARMam2.RR.base) == HRcInt32
    730                     && hregIsVirtual(am->ARMam2.RR.base)
    731                     && hregClass(am->ARMam2.RR.index) == HRcInt32
    732                     && hregIsVirtual(am->ARMam2.RR.index) );
    733       default:
    734          vpanic("sane_AMode: unknown ARM AMode2 tag");
    735    }
    736 }
    737 
    738 static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e )
    739 {
    740    ARMAMode2* am = iselIntExpr_AMode2_wrk(env, e);
    741    vassert(sane_AMode2(am));
    742    return am;
    743 }
    744 
    745 static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e )
    746 {
    747    IRType ty = typeOfIRExpr(env->type_env,e);
    748    vassert(ty == Ity_I32);
    749 
    750    /* FIXME: add RR matching */
    751 
    752    /* {Add32,Sub32}(expr,simm8) */
    753    if (e->tag == Iex_Binop
    754        && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
    755        && e->Iex.Binop.arg2->tag == Iex_Const
    756        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
    757       Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
    758       if (simm >= -255 && simm <= 255) {
    759          HReg reg;
    760          if (e->Iex.Binop.op == Iop_Sub32)
    761             simm = -simm;
    762          reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
    763          return ARMAMode2_RI(reg, simm);
    764       }
    765    }
    766 
    767    /* Doesn't match anything in particular.  Generate it into
    768       a register and use that. */
    769    {
    770       HReg reg = iselIntExpr_R(env, e);
    771       return ARMAMode2_RI(reg, 0);
    772    }
    773 
    774 }
    775 
    776 
    777 /* --------------------- AModeV --------------------- */
    778 
    779 /* Return an AModeV which computes the value of the specified
    780    expression, possibly also adding insns to the code list as a
    781    result.  The expression may only be a 32-bit one.
    782 */
    783 
    784 static Bool sane_AModeV ( ARMAModeV* am )
    785 {
    786   return toBool( hregClass(am->reg) == HRcInt32
    787                  && hregIsVirtual(am->reg)
    788                  && am->simm11 >= -1020 && am->simm11 <= 1020
    789                  && 0 == (am->simm11 & 3) );
    790 }
    791 
    792 static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
    793 {
    794    ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
    795    vassert(sane_AModeV(am));
    796    return am;
    797 }
    798 
    799 static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
    800 {
    801    IRType ty = typeOfIRExpr(env->type_env,e);
    802    vassert(ty == Ity_I32);
    803 
    804    /* {Add32,Sub32}(expr, simm8 << 2) */
    805    if (e->tag == Iex_Binop
    806        && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
    807        && e->Iex.Binop.arg2->tag == Iex_Const
    808        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
    809       Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
    810       if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
    811          HReg reg;
    812          if (e->Iex.Binop.op == Iop_Sub32)
    813             simm = -simm;
    814          reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
    815          return mkARMAModeV(reg, simm);
    816       }
    817    }
    818 
    819    /* Doesn't match anything in particular.  Generate it into
    820       a register and use that. */
    821    {
    822       HReg reg = iselIntExpr_R(env, e);
    823       return mkARMAModeV(reg, 0);
    824    }
    825 
    826 }
    827 
    828 /* -------------------- AModeN -------------------- */
    829 
    830 static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
    831 {
    832    return iselIntExpr_AModeN_wrk(env, e);
    833 }
    834 
    835 static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
    836 {
    837    HReg reg = iselIntExpr_R(env, e);
    838    return mkARMAModeN_R(reg);
    839 }
    840 
    841 
    842 /* --------------------- RI84 --------------------- */
    843 
    844 /* Select instructions to generate 'e' into a RI84.  If mayInv is
    845    true, then the caller will also accept an I84 form that denotes
    846    'not e'.  In this case didInv may not be NULL, and *didInv is set
    847    to True.  This complication is so as to allow generation of an RI84
    848    which is suitable for use in either an AND or BIC instruction,
    849    without knowing (before this call) which one.
    850 */
    851 static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
    852                                    ISelEnv* env, IRExpr* e )
    853 {
    854    ARMRI84* ri;
    855    if (mayInv)
    856       vassert(didInv != NULL);
    857    ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
    858    /* sanity checks ... */
    859    switch (ri->tag) {
    860       case ARMri84_I84:
    861          return ri;
    862       case ARMri84_R:
    863          vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
    864          vassert(hregIsVirtual(ri->ARMri84.R.reg));
    865          return ri;
    866       default:
    867          vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
    868    }
    869 }
    870 
    871 /* DO NOT CALL THIS DIRECTLY ! */
    872 static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
    873                                        ISelEnv* env, IRExpr* e )
    874 {
    875    IRType ty = typeOfIRExpr(env->type_env,e);
    876    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
    877 
    878    if (didInv) *didInv = False;
    879 
    880    /* special case: immediate */
    881    if (e->tag == Iex_Const) {
    882       UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
    883       switch (e->Iex.Const.con->tag) {
    884          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
    885          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
    886          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
    887          default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
    888       }
    889       if (fitsIn8x4(&u8, &u4, u)) {
    890          return ARMRI84_I84( (UShort)u8, (UShort)u4 );
    891       }
    892       if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
    893          vassert(didInv);
    894          *didInv = True;
    895          return ARMRI84_I84( (UShort)u8, (UShort)u4 );
    896       }
    897       /* else fail, fall through to default case */
    898    }
    899 
    900    /* default case: calculate into a register and return that */
    901    {
    902       HReg r = iselIntExpr_R ( env, e );
    903       return ARMRI84_R(r);
    904    }
    905 }
    906 
    907 
    908 /* --------------------- RI5 --------------------- */
    909 
    910 /* Select instructions to generate 'e' into a RI5. */
    911 
    912 static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e )
    913 {
    914    ARMRI5* ri = iselIntExpr_RI5_wrk(env, e);
    915    /* sanity checks ... */
    916    switch (ri->tag) {
    917       case ARMri5_I5:
    918          return ri;
    919       case ARMri5_R:
    920          vassert(hregClass(ri->ARMri5.R.reg) == HRcInt32);
    921          vassert(hregIsVirtual(ri->ARMri5.R.reg));
    922          return ri;
    923       default:
    924          vpanic("iselIntExpr_RI5: unknown arm RI5 tag");
    925    }
    926 }
    927 
    928 /* DO NOT CALL THIS DIRECTLY ! */
    929 static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e )
    930 {
    931    IRType ty = typeOfIRExpr(env->type_env,e);
    932    vassert(ty == Ity_I32 || ty == Ity_I8);
    933 
    934    /* special case: immediate */
    935    if (e->tag == Iex_Const) {
    936       UInt u; /* both invalid */
    937       switch (e->Iex.Const.con->tag) {
    938          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
    939          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
    940          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
    941          default: vpanic("iselIntExpr_RI5.Iex_Const(armh)");
    942       }
    943       if (u >= 1 && u <= 31) {
    944          return ARMRI5_I5(u);
    945       }
    946       /* else fail, fall through to default case */
    947    }
    948 
    949    /* default case: calculate into a register and return that */
    950    {
    951       HReg r = iselIntExpr_R ( env, e );
    952       return ARMRI5_R(r);
    953    }
    954 }
    955 
    956 
    957 /* ------------------- CondCode ------------------- */
    958 
    959 /* Generate code to evaluated a bit-typed expression, returning the
    960    condition code which would correspond when the expression would
    961    notionally have returned 1. */
    962 
    963 static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e )
    964 {
    965    ARMCondCode cc = iselCondCode_wrk(env,e);
    966    vassert(cc != ARMcc_NV);
    967    return cc;
    968 }
    969 
    970 static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
    971 {
    972    vassert(e);
    973    vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
    974 
    975    /* var */
    976    if (e->tag == Iex_RdTmp) {
    977       HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
    978       /* CmpOrTst doesn't modify rTmp; so this is OK. */
    979       ARMRI84* one  = ARMRI84_I84(1,0);
    980       addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
    981       return ARMcc_NE;
    982    }
    983 
    984    /* Not1(e) */
    985    if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
    986       /* Generate code for the arg, and negate the test condition */
    987       return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
    988    }
    989 
    990    /* --- patterns rooted at: 32to1 --- */
    991 
    992    if (e->tag == Iex_Unop
    993        && e->Iex.Unop.op == Iop_32to1) {
    994       HReg     rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
    995       ARMRI84* one  = ARMRI84_I84(1,0);
    996       addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
    997       return ARMcc_NE;
    998    }
    999 
   1000    /* --- patterns rooted at: CmpNEZ8 --- */
   1001 
   1002    if (e->tag == Iex_Unop
   1003        && e->Iex.Unop.op == Iop_CmpNEZ8) {
   1004       HReg     r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
   1005       ARMRI84* xFF  = ARMRI84_I84(0xFF,0);
   1006       addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r1, xFF));
   1007       return ARMcc_NE;
   1008    }
   1009 
   1010    /* --- patterns rooted at: CmpNEZ32 --- */
   1011 
   1012    if (e->tag == Iex_Unop
   1013        && e->Iex.Unop.op == Iop_CmpNEZ32) {
   1014       HReg     r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
   1015       ARMRI84* zero = ARMRI84_I84(0,0);
   1016       addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r1, zero));
   1017       return ARMcc_NE;
   1018    }
   1019 
   1020    /* --- patterns rooted at: CmpNEZ64 --- */
   1021 
   1022    if (e->tag == Iex_Unop
   1023        && e->Iex.Unop.op == Iop_CmpNEZ64) {
   1024       HReg     tHi, tLo;
   1025       HReg     tmp  = newVRegI(env);
   1026       ARMRI84* zero = ARMRI84_I84(0,0);
   1027       iselInt64Expr(&tHi, &tLo, env, e->Iex.Unop.arg);
   1028       addInstr(env, ARMInstr_Alu(ARMalu_OR, tmp, tHi, ARMRI84_R(tLo)));
   1029       addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, tmp, zero));
   1030       return ARMcc_NE;
   1031    }
   1032 
   1033    /* --- Cmp*32*(x,y) --- */
   1034    if (e->tag == Iex_Binop
   1035        && (e->Iex.Binop.op == Iop_CmpEQ32
   1036            || e->Iex.Binop.op == Iop_CmpNE32
   1037            || e->Iex.Binop.op == Iop_CmpLT32S
   1038            || e->Iex.Binop.op == Iop_CmpLT32U
   1039            || e->Iex.Binop.op == Iop_CmpLE32S
   1040            || e->Iex.Binop.op == Iop_CmpLE32U)) {
   1041       HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1042       ARMRI84* argR = iselIntExpr_RI84(NULL,False,
   1043                                        env, e->Iex.Binop.arg2);
   1044       addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL, argR));
   1045       switch (e->Iex.Binop.op) {
   1046          case Iop_CmpEQ32:  return ARMcc_EQ;
   1047          case Iop_CmpNE32:  return ARMcc_NE;
   1048          case Iop_CmpLT32S: return ARMcc_LT;
   1049          case Iop_CmpLT32U: return ARMcc_LO;
   1050          case Iop_CmpLE32S: return ARMcc_LE;
   1051          case Iop_CmpLE32U: return ARMcc_LS;
   1052          default: vpanic("iselCondCode(arm): CmpXX32");
   1053       }
   1054    }
   1055 
   1056    /* --- CasCmpEQ* --- */
   1057    /* Ist_Cas has a dummy argument to compare with, so comparison is
   1058       always true. */
   1059    if (e->tag == Iex_Binop
   1060        && (e->Iex.Binop.op == Iop_CasCmpEQ32
   1061            || e->Iex.Binop.op == Iop_CasCmpEQ16
   1062            || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
   1063       return ARMcc_AL;
   1064    }
   1065 
   1066    ppIRExpr(e);
   1067    vpanic("iselCondCode");
   1068 }
   1069 
   1070 
   1071 /* --------------------- Reg --------------------- */
   1072 
   1073 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
   1074 {
   1075    HReg r = iselIntExpr_R_wrk(env, e);
   1076    /* sanity checks ... */
   1077 #  if 0
   1078    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   1079 #  endif
   1080    vassert(hregClass(r) == HRcInt32);
   1081    vassert(hregIsVirtual(r));
   1082    return r;
   1083 }
   1084 
   1085 /* DO NOT CALL THIS DIRECTLY ! */
   1086 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
   1087 {
   1088    IRType ty = typeOfIRExpr(env->type_env,e);
   1089    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
   1090 //   vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
   1091 
   1092    switch (e->tag) {
   1093 
   1094    /* --------- TEMP --------- */
   1095    case Iex_RdTmp: {
   1096       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
   1097    }
   1098 
   1099    /* --------- LOAD --------- */
   1100    case Iex_Load: {
   1101       HReg dst  = newVRegI(env);
   1102 
   1103       if (e->Iex.Load.end != Iend_LE)
   1104          goto irreducible;
   1105 
   1106       if (ty == Ity_I32) {
   1107          ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
   1108          addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, dst, amode));
   1109          return dst;
   1110       }
   1111       if (ty == Ity_I16) {
   1112          ARMAMode2* amode = iselIntExpr_AMode2 ( env, e->Iex.Load.addr );
   1113          addInstr(env, ARMInstr_LdSt16(True/*isLoad*/, False/*!signedLoad*/,
   1114                                        dst, amode));
   1115          return dst;
   1116       }
   1117       if (ty == Ity_I8) {
   1118          ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
   1119          addInstr(env, ARMInstr_LdSt8U(True/*isLoad*/, dst, amode));
   1120          return dst;
   1121       }
   1122 
   1123 //zz      if (ty == Ity_I16) {
   1124 //zz         addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
   1125 //zz         return dst;
   1126 //zz      }
   1127 //zz      if (ty == Ity_I8) {
   1128 //zz         addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
   1129 //zz         return dst;
   1130 //zz      }
   1131       break;
   1132    }
   1133 
   1134 //zz   /* --------- TERNARY OP --------- */
   1135 //zz   case Iex_Triop: {
   1136 //zz      IRTriop *triop = e->Iex.Triop.details;
   1137 //zz      /* C3210 flags following FPU partial remainder (fprem), both
   1138 //zz         IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
   1139 //zz      if (triop->op == Iop_PRemC3210F64
   1140 //zz          || triop->op == Iop_PRem1C3210F64) {
   1141 //zz         HReg junk = newVRegF(env);
   1142 //zz         HReg dst  = newVRegI(env);
   1143 //zz         HReg srcL = iselDblExpr(env, triop->arg2);
   1144 //zz         HReg srcR = iselDblExpr(env, triop->arg3);
   1145 //zz         /* XXXROUNDINGFIXME */
   1146 //zz         /* set roundingmode here */
   1147 //zz         addInstr(env, X86Instr_FpBinary(
   1148 //zz                           e->Iex.Binop.op==Iop_PRemC3210F64
   1149 //zz                              ? Xfp_PREM : Xfp_PREM1,
   1150 //zz                           srcL,srcR,junk
   1151 //zz                 ));
   1152 //zz         /* The previous pseudo-insn will have left the FPU's C3210
   1153 //zz            flags set correctly.  So bag them. */
   1154 //zz         addInstr(env, X86Instr_FpStSW_AX());
   1155 //zz         addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
   1156 //zz         addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
   1157 //zz         return dst;
   1158 //zz      }
   1159 //zz
   1160 //zz      break;
   1161 //zz   }
   1162 
   1163    /* --------- BINARY OP --------- */
   1164    case Iex_Binop: {
   1165 
   1166       ARMAluOp   aop = 0; /* invalid */
   1167       ARMShiftOp sop = 0; /* invalid */
   1168 
   1169       /* ADD/SUB/AND/OR/XOR */
   1170       switch (e->Iex.Binop.op) {
   1171          case Iop_And32: {
   1172             Bool     didInv = False;
   1173             HReg     dst    = newVRegI(env);
   1174             HReg     argL   = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1175             ARMRI84* argR   = iselIntExpr_RI84(&didInv, True/*mayInv*/,
   1176                                                env, e->Iex.Binop.arg2);
   1177             addInstr(env, ARMInstr_Alu(didInv ? ARMalu_BIC : ARMalu_AND,
   1178                                        dst, argL, argR));
   1179             return dst;
   1180          }
   1181          case Iop_Or32:  aop = ARMalu_OR;  goto std_binop;
   1182          case Iop_Xor32: aop = ARMalu_XOR; goto std_binop;
   1183          case Iop_Sub32: aop = ARMalu_SUB; goto std_binop;
   1184          case Iop_Add32: aop = ARMalu_ADD; goto std_binop;
   1185          std_binop: {
   1186             HReg     dst  = newVRegI(env);
   1187             HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1188             ARMRI84* argR = iselIntExpr_RI84(NULL, False/*mayInv*/,
   1189                                              env, e->Iex.Binop.arg2);
   1190             addInstr(env, ARMInstr_Alu(aop, dst, argL, argR));
   1191             return dst;
   1192          }
   1193          default: break;
   1194       }
   1195 
   1196       /* SDIV/UDIV */
   1197       if (e->Iex.Binop.op == Iop_DivU32 || e->Iex.Binop.op == Iop_DivS32) {
   1198          HReg     dst  = newVRegI(env);
   1199          HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1200          HReg     argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   1201 
   1202          addInstr(env,
   1203                   ARMInstr_Div(e->Iex.Binop.op == Iop_DivU32 ?
   1204                                   ARMdiv_U : ARMdiv_S,
   1205                                dst, argL, argR));
   1206          return dst;
   1207       }
   1208 
   1209       /* SHL/SHR/SAR */
   1210       switch (e->Iex.Binop.op) {
   1211          case Iop_Shl32: sop = ARMsh_SHL; goto sh_binop;
   1212          case Iop_Shr32: sop = ARMsh_SHR; goto sh_binop;
   1213          case Iop_Sar32: sop = ARMsh_SAR; goto sh_binop;
   1214          sh_binop: {
   1215             HReg    dst  = newVRegI(env);
   1216             HReg    argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1217             ARMRI5* argR = iselIntExpr_RI5(env, e->Iex.Binop.arg2);
   1218             addInstr(env, ARMInstr_Shift(sop, dst, argL, argR));
   1219             vassert(ty == Ity_I32); /* else the IR is ill-typed */
   1220             return dst;
   1221          }
   1222          default: break;
   1223       }
   1224 
   1225       /* MUL */
   1226       if (e->Iex.Binop.op == Iop_Mul32) {
   1227          HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1228          HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   1229          HReg dst  = newVRegI(env);
   1230          addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
   1231          addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
   1232          addInstr(env, ARMInstr_Mul(ARMmul_PLAIN));
   1233          addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
   1234          return dst;
   1235       }
   1236 
   1237       /* Handle misc other ops. */
   1238 
   1239       if (e->Iex.Binop.op == Iop_Max32U) {
   1240          HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1241          HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   1242          HReg dst  = newVRegI(env);
   1243          addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL,
   1244                                          ARMRI84_R(argR)));
   1245          addInstr(env, mk_iMOVds_RR(dst, argL));
   1246          addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR)));
   1247          return dst;
   1248       }
   1249 
   1250       if (e->Iex.Binop.op == Iop_CmpF64) {
   1251          HReg dL = iselDblExpr(env, e->Iex.Binop.arg1);
   1252          HReg dR = iselDblExpr(env, e->Iex.Binop.arg2);
   1253          HReg dst = newVRegI(env);
   1254          /* Do the compare (FCMPD) and set NZCV in FPSCR.  Then also do
   1255             FMSTAT, so we can examine the results directly. */
   1256          addInstr(env, ARMInstr_VCmpD(dL, dR));
   1257          /* Create in dst, the IRCmpF64Result encoded result. */
   1258          addInstr(env, ARMInstr_Imm32(dst, 0));
   1259          addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, ARMRI84_I84(0x40,0))); //EQ
   1260          addInstr(env, ARMInstr_CMov(ARMcc_MI, dst, ARMRI84_I84(0x01,0))); //LT
   1261          addInstr(env, ARMInstr_CMov(ARMcc_GT, dst, ARMRI84_I84(0x00,0))); //GT
   1262          addInstr(env, ARMInstr_CMov(ARMcc_VS, dst, ARMRI84_I84(0x45,0))); //UN
   1263          return dst;
   1264       }
   1265 
   1266       if (e->Iex.Binop.op == Iop_F64toI32S
   1267           || e->Iex.Binop.op == Iop_F64toI32U) {
   1268          /* Wretched uglyness all round, due to having to deal
   1269             with rounding modes.  Oh well. */
   1270          /* FIXME: if arg1 is a constant indicating round-to-zero,
   1271             then we could skip all this arsing around with FPSCR and
   1272             simply emit FTO{S,U}IZD. */
   1273          Bool syned = e->Iex.Binop.op == Iop_F64toI32S;
   1274          HReg valD  = iselDblExpr(env, e->Iex.Binop.arg2);
   1275          set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
   1276          /* FTO{S,U}ID valF, valD */
   1277          HReg valF = newVRegF(env);
   1278          addInstr(env, ARMInstr_VCvtID(False/*!iToD*/, syned,
   1279                                        valF, valD));
   1280          set_VFP_rounding_default(env);
   1281          /* VMOV dst, valF */
   1282          HReg dst = newVRegI(env);
   1283          addInstr(env, ARMInstr_VXferS(False/*!toS*/, valF, dst));
   1284          return dst;
   1285       }
   1286 
   1287       if (e->Iex.Binop.op == Iop_GetElem8x8
   1288           || e->Iex.Binop.op == Iop_GetElem16x4
   1289           || e->Iex.Binop.op == Iop_GetElem32x2) {
   1290          HReg res = newVRegI(env);
   1291          HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
   1292          UInt index, size;
   1293          if (e->Iex.Binop.arg2->tag != Iex_Const ||
   1294              typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   1295             vpanic("ARM target supports GetElem with constant "
   1296                    "second argument only\n");
   1297          }
   1298          index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   1299          switch (e->Iex.Binop.op) {
   1300             case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
   1301             case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
   1302             case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
   1303             default: vassert(0);
   1304          }
   1305          addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
   1306                                         mkARMNRS(ARMNRS_Reg, res, 0),
   1307                                         mkARMNRS(ARMNRS_Scalar, arg, index),
   1308                                         size, False));
   1309          return res;
   1310       }
   1311 
   1312       if (e->Iex.Binop.op == Iop_GetElem8x16
   1313           || e->Iex.Binop.op == Iop_GetElem16x8
   1314           || e->Iex.Binop.op == Iop_GetElem32x4) {
   1315          HReg res = newVRegI(env);
   1316          HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
   1317          UInt index, size;
   1318          if (e->Iex.Binop.arg2->tag != Iex_Const ||
   1319              typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   1320             vpanic("ARM target supports GetElem with constant "
   1321                    "second argument only\n");
   1322          }
   1323          index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   1324          switch (e->Iex.Binop.op) {
   1325             case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
   1326             case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
   1327             case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
   1328             default: vassert(0);
   1329          }
   1330          addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
   1331                                         mkARMNRS(ARMNRS_Reg, res, 0),
   1332                                         mkARMNRS(ARMNRS_Scalar, arg, index),
   1333                                         size, True));
   1334          return res;
   1335       }
   1336 
   1337       /* All cases involving host-side helper calls. */
   1338       void* fn = NULL;
   1339       switch (e->Iex.Binop.op) {
   1340          case Iop_Add16x2:
   1341             fn = &h_generic_calc_Add16x2; break;
   1342          case Iop_Sub16x2:
   1343             fn = &h_generic_calc_Sub16x2; break;
   1344          case Iop_HAdd16Ux2:
   1345             fn = &h_generic_calc_HAdd16Ux2; break;
   1346          case Iop_HAdd16Sx2:
   1347             fn = &h_generic_calc_HAdd16Sx2; break;
   1348          case Iop_HSub16Ux2:
   1349             fn = &h_generic_calc_HSub16Ux2; break;
   1350          case Iop_HSub16Sx2:
   1351             fn = &h_generic_calc_HSub16Sx2; break;
   1352          case Iop_QAdd16Sx2:
   1353             fn = &h_generic_calc_QAdd16Sx2; break;
   1354          case Iop_QSub16Sx2:
   1355             fn = &h_generic_calc_QSub16Sx2; break;
   1356          case Iop_Add8x4:
   1357             fn = &h_generic_calc_Add8x4; break;
   1358          case Iop_Sub8x4:
   1359             fn = &h_generic_calc_Sub8x4; break;
   1360          case Iop_HAdd8Ux4:
   1361             fn = &h_generic_calc_HAdd8Ux4; break;
   1362          case Iop_HAdd8Sx4:
   1363             fn = &h_generic_calc_HAdd8Sx4; break;
   1364          case Iop_HSub8Ux4:
   1365             fn = &h_generic_calc_HSub8Ux4; break;
   1366          case Iop_HSub8Sx4:
   1367             fn = &h_generic_calc_HSub8Sx4; break;
   1368          case Iop_QAdd8Sx4:
   1369             fn = &h_generic_calc_QAdd8Sx4; break;
   1370          case Iop_QAdd8Ux4:
   1371             fn = &h_generic_calc_QAdd8Ux4; break;
   1372          case Iop_QSub8Sx4:
   1373             fn = &h_generic_calc_QSub8Sx4; break;
   1374          case Iop_QSub8Ux4:
   1375             fn = &h_generic_calc_QSub8Ux4; break;
   1376          case Iop_Sad8Ux4:
   1377             fn = &h_generic_calc_Sad8Ux4; break;
   1378          case Iop_QAdd32S:
   1379             fn = &h_generic_calc_QAdd32S; break;
   1380          case Iop_QSub32S:
   1381             fn = &h_generic_calc_QSub32S; break;
   1382          case Iop_QSub16Ux2:
   1383             fn = &h_generic_calc_QSub16Ux2; break;
   1384          default:
   1385             break;
   1386       }
   1387 
   1388       if (fn) {
   1389          HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1390          HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   1391          HReg res  = newVRegI(env);
   1392          addInstr(env, mk_iMOVds_RR(hregARM_R0(), regL));
   1393          addInstr(env, mk_iMOVds_RR(hregARM_R1(), regR));
   1394          addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 2 ));
   1395          addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
   1396          return res;
   1397       }
   1398 
   1399       break;
   1400    }
   1401 
   1402    /* --------- UNARY OP --------- */
   1403    case Iex_Unop: {
   1404 
   1405 //zz      /* 1Uto8(32to1(expr32)) */
   1406 //zz      if (e->Iex.Unop.op == Iop_1Uto8) {
   1407 //zz         DECLARE_PATTERN(p_32to1_then_1Uto8);
   1408 //zz         DEFINE_PATTERN(p_32to1_then_1Uto8,
   1409 //zz                        unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
   1410 //zz         if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
   1411 //zz            IRExpr* expr32 = mi.bindee[0];
   1412 //zz            HReg dst = newVRegI(env);
   1413 //zz            HReg src = iselIntExpr_R(env, expr32);
   1414 //zz            addInstr(env, mk_iMOVsd_RR(src,dst) );
   1415 //zz            addInstr(env, X86Instr_Alu32R(Xalu_AND,
   1416 //zz                                          X86RMI_Imm(1), dst));
   1417 //zz            return dst;
   1418 //zz         }
   1419 //zz      }
   1420 //zz
   1421 //zz      /* 8Uto32(LDle(expr32)) */
   1422 //zz      if (e->Iex.Unop.op == Iop_8Uto32) {
   1423 //zz         DECLARE_PATTERN(p_LDle8_then_8Uto32);
   1424 //zz         DEFINE_PATTERN(p_LDle8_then_8Uto32,
   1425 //zz                        unop(Iop_8Uto32,
   1426 //zz                             IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
   1427 //zz         if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
   1428 //zz            HReg dst = newVRegI(env);
   1429 //zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
   1430 //zz            addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
   1431 //zz            return dst;
   1432 //zz         }
   1433 //zz      }
   1434 //zz
   1435 //zz      /* 8Sto32(LDle(expr32)) */
   1436 //zz      if (e->Iex.Unop.op == Iop_8Sto32) {
   1437 //zz         DECLARE_PATTERN(p_LDle8_then_8Sto32);
   1438 //zz         DEFINE_PATTERN(p_LDle8_then_8Sto32,
   1439 //zz                        unop(Iop_8Sto32,
   1440 //zz                             IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
   1441 //zz         if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
   1442 //zz            HReg dst = newVRegI(env);
   1443 //zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
   1444 //zz            addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
   1445 //zz            return dst;
   1446 //zz         }
   1447 //zz      }
   1448 //zz
   1449 //zz      /* 16Uto32(LDle(expr32)) */
   1450 //zz      if (e->Iex.Unop.op == Iop_16Uto32) {
   1451 //zz         DECLARE_PATTERN(p_LDle16_then_16Uto32);
   1452 //zz         DEFINE_PATTERN(p_LDle16_then_16Uto32,
   1453 //zz                        unop(Iop_16Uto32,
   1454 //zz                             IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
   1455 //zz         if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
   1456 //zz            HReg dst = newVRegI(env);
   1457 //zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
   1458 //zz            addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
   1459 //zz            return dst;
   1460 //zz         }
   1461 //zz      }
   1462 //zz
   1463 //zz      /* 8Uto32(GET:I8) */
   1464 //zz      if (e->Iex.Unop.op == Iop_8Uto32) {
   1465 //zz         if (e->Iex.Unop.arg->tag == Iex_Get) {
   1466 //zz            HReg      dst;
   1467 //zz            X86AMode* amode;
   1468 //zz            vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
   1469 //zz            dst = newVRegI(env);
   1470 //zz            amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
   1471 //zz                                hregX86_EBP());
   1472 //zz            addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
   1473 //zz            return dst;
   1474 //zz         }
   1475 //zz      }
   1476 //zz
   1477 //zz      /* 16to32(GET:I16) */
   1478 //zz      if (e->Iex.Unop.op == Iop_16Uto32) {
   1479 //zz         if (e->Iex.Unop.arg->tag == Iex_Get) {
   1480 //zz            HReg      dst;
   1481 //zz            X86AMode* amode;
   1482 //zz            vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
   1483 //zz            dst = newVRegI(env);
   1484 //zz            amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
   1485 //zz                                hregX86_EBP());
   1486 //zz            addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
   1487 //zz            return dst;
   1488 //zz         }
   1489 //zz      }
   1490 
   1491       switch (e->Iex.Unop.op) {
   1492          case Iop_8Uto32: {
   1493             HReg dst = newVRegI(env);
   1494             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1495             addInstr(env, ARMInstr_Alu(ARMalu_AND,
   1496                                        dst, src, ARMRI84_I84(0xFF,0)));
   1497             return dst;
   1498          }
   1499 //zz         case Iop_8Uto16:
   1500 //zz         case Iop_8Uto32:
   1501 //zz         case Iop_16Uto32: {
   1502 //zz            HReg dst = newVRegI(env);
   1503 //zz            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1504 //zz            UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
   1505 //zz            addInstr(env, mk_iMOVsd_RR(src,dst) );
   1506 //zz            addInstr(env, X86Instr_Alu32R(Xalu_AND,
   1507 //zz                                          X86RMI_Imm(mask), dst));
   1508 //zz            return dst;
   1509 //zz         }
   1510 //zz         case Iop_8Sto16:
   1511 //zz         case Iop_8Sto32:
   1512          case Iop_16Uto32: {
   1513             HReg dst = newVRegI(env);
   1514             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1515             ARMRI5* amt = ARMRI5_I5(16);
   1516             addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
   1517             addInstr(env, ARMInstr_Shift(ARMsh_SHR, dst, dst, amt));
   1518             return dst;
   1519          }
   1520          case Iop_8Sto32:
   1521          case Iop_16Sto32: {
   1522             HReg dst = newVRegI(env);
   1523             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1524             ARMRI5* amt = ARMRI5_I5(e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24);
   1525             addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
   1526             addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
   1527             return dst;
   1528          }
   1529 //zz         case Iop_Not8:
   1530 //zz         case Iop_Not16:
   1531          case Iop_Not32: {
   1532             HReg dst = newVRegI(env);
   1533             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1534             addInstr(env, ARMInstr_Unary(ARMun_NOT, dst, src));
   1535             return dst;
   1536          }
   1537          case Iop_64HIto32: {
   1538             HReg rHi, rLo;
   1539             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
   1540             return rHi; /* and abandon rLo .. poor wee thing :-) */
   1541          }
   1542          case Iop_64to32: {
   1543             HReg rHi, rLo;
   1544             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
   1545             return rLo; /* similar stupid comment to the above ... */
   1546          }
   1547          case Iop_64to8: {
   1548             HReg rHi, rLo;
   1549             if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
   1550                HReg tHi = newVRegI(env);
   1551                HReg tLo = newVRegI(env);
   1552                HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
   1553                addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
   1554                rHi = tHi;
   1555                rLo = tLo;
   1556             } else {
   1557                iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
   1558             }
   1559             return rLo;
   1560          }
   1561 //zz         case Iop_16HIto8:
   1562 //zz         case Iop_32HIto16: {
   1563 //zz            HReg dst  = newVRegI(env);
   1564 //zz            HReg src  = iselIntExpr_R(env, e->Iex.Unop.arg);
   1565 //zz            Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
   1566 //zz            addInstr(env, mk_iMOVsd_RR(src,dst) );
   1567 //zz            addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
   1568 //zz            return dst;
   1569 //zz         }
   1570          case Iop_1Uto32:
   1571          case Iop_1Uto8: {
   1572             HReg        dst  = newVRegI(env);
   1573             ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
   1574             addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
   1575             addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
   1576             return dst;
   1577          }
   1578 
   1579          case Iop_1Sto32: {
   1580             HReg        dst  = newVRegI(env);
   1581             ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
   1582             ARMRI5*     amt  = ARMRI5_I5(31);
   1583             /* This is really rough.  We could do much better here;
   1584                perhaps mvn{cond} dst, #0 as the second insn?
   1585                (same applies to 1Sto64) */
   1586             addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
   1587             addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
   1588             addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
   1589             addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
   1590             return dst;
   1591          }
   1592 
   1593 
   1594 //zz         case Iop_1Sto8:
   1595 //zz         case Iop_1Sto16:
   1596 //zz         case Iop_1Sto32: {
   1597 //zz            /* could do better than this, but for now ... */
   1598 //zz            HReg dst         = newVRegI(env);
   1599 //zz            X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
   1600 //zz            addInstr(env, X86Instr_Set32(cond,dst));
   1601 //zz            addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
   1602 //zz            addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
   1603 //zz            return dst;
   1604 //zz         }
   1605 //zz         case Iop_Ctz32: {
   1606 //zz            /* Count trailing zeroes, implemented by x86 'bsfl' */
   1607 //zz            HReg dst = newVRegI(env);
   1608 //zz            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1609 //zz            addInstr(env, X86Instr_Bsfr32(True,src,dst));
   1610 //zz            return dst;
   1611 //zz         }
   1612          case Iop_Clz32: {
   1613             /* Count leading zeroes; easy on ARM. */
   1614             HReg dst = newVRegI(env);
   1615             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1616             addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
   1617             return dst;
   1618          }
   1619 
   1620          case Iop_CmpwNEZ32: {
   1621             HReg dst = newVRegI(env);
   1622             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1623             addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
   1624             addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
   1625             addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
   1626             return dst;
   1627          }
   1628 
   1629          case Iop_Left32: {
   1630             HReg dst = newVRegI(env);
   1631             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1632             addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
   1633             addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
   1634             return dst;
   1635          }
   1636 
   1637 //zz         case Iop_V128to32: {
   1638 //zz            HReg      dst  = newVRegI(env);
   1639 //zz            HReg      vec  = iselVecExpr(env, e->Iex.Unop.arg);
   1640 //zz            X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
   1641 //zz            sub_from_esp(env, 16);
   1642 //zz            addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
   1643 //zz            addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
   1644 //zz            add_to_esp(env, 16);
   1645 //zz            return dst;
   1646 //zz         }
   1647 //zz
   1648          case Iop_ReinterpF32asI32: {
   1649             HReg dst = newVRegI(env);
   1650             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
   1651             addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
   1652             return dst;
   1653          }
   1654 
   1655 //zz
   1656 //zz         case Iop_16to8:
   1657          case Iop_32to8:
   1658          case Iop_32to16:
   1659             /* These are no-ops. */
   1660             return iselIntExpr_R(env, e->Iex.Unop.arg);
   1661 
   1662          default:
   1663             break;
   1664       }
   1665 
   1666       /* All Unop cases involving host-side helper calls. */
   1667       void* fn = NULL;
   1668       switch (e->Iex.Unop.op) {
   1669          case Iop_CmpNEZ16x2:
   1670             fn = &h_generic_calc_CmpNEZ16x2; break;
   1671          case Iop_CmpNEZ8x4:
   1672             fn = &h_generic_calc_CmpNEZ8x4; break;
   1673          default:
   1674             break;
   1675       }
   1676 
   1677       if (fn) {
   1678          HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
   1679          HReg res = newVRegI(env);
   1680          addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
   1681          addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 1 ));
   1682          addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
   1683          return res;
   1684       }
   1685 
   1686       break;
   1687    }
   1688 
   1689    /* --------- GET --------- */
   1690    case Iex_Get: {
   1691       if (ty == Ity_I32
   1692           && 0 == (e->Iex.Get.offset & 3)
   1693           && e->Iex.Get.offset < 4096-4) {
   1694          HReg dst = newVRegI(env);
   1695          addInstr(env, ARMInstr_LdSt32(
   1696                           True/*isLoad*/,
   1697                           dst,
   1698                           ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset)));
   1699          return dst;
   1700       }
   1701 //zz      if (ty == Ity_I8 || ty == Ity_I16) {
   1702 //zz         HReg dst = newVRegI(env);
   1703 //zz         addInstr(env, X86Instr_LoadEX(
   1704 //zz                          toUChar(ty==Ity_I8 ? 1 : 2),
   1705 //zz                          False,
   1706 //zz                          X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
   1707 //zz                          dst));
   1708 //zz         return dst;
   1709 //zz      }
   1710       break;
   1711    }
   1712 
   1713 //zz   case Iex_GetI: {
   1714 //zz      X86AMode* am
   1715 //zz         = genGuestArrayOffset(
   1716 //zz              env, e->Iex.GetI.descr,
   1717 //zz                   e->Iex.GetI.ix, e->Iex.GetI.bias );
   1718 //zz      HReg dst = newVRegI(env);
   1719 //zz      if (ty == Ity_I8) {
   1720 //zz         addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
   1721 //zz         return dst;
   1722 //zz      }
   1723 //zz      if (ty == Ity_I32) {
   1724 //zz         addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
   1725 //zz         return dst;
   1726 //zz      }
   1727 //zz      break;
   1728 //zz   }
   1729 
   1730    /* --------- CCALL --------- */
   1731    case Iex_CCall: {
   1732       HReg    dst = newVRegI(env);
   1733       vassert(ty == e->Iex.CCall.retty);
   1734 
   1735       /* be very restrictive for now.  Only 32/64-bit ints allowed
   1736          for args, and 32 bits for return type. */
   1737       if (e->Iex.CCall.retty != Ity_I32)
   1738          goto irreducible;
   1739 
   1740       /* Marshal args, do the call, clear stack. */
   1741       Bool ok = doHelperCall( env, False,
   1742                               NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
   1743       if (ok) {
   1744          addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
   1745          return dst;
   1746       }
   1747       /* else fall through; will hit the irreducible: label */
   1748    }
   1749 
   1750    /* --------- LITERAL --------- */
   1751    /* 32 literals */
   1752    case Iex_Const: {
   1753       UInt u   = 0;
   1754       HReg dst = newVRegI(env);
   1755       switch (e->Iex.Const.con->tag) {
   1756          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
   1757          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
   1758          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
   1759          default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm)");
   1760       }
   1761       addInstr(env, ARMInstr_Imm32(dst, u));
   1762       return dst;
   1763    }
   1764 
   1765    /* --------- MULTIPLEX --------- */
   1766    case Iex_Mux0X: {
   1767       IRExpr* cond = e->Iex.Mux0X.cond;
   1768 
   1769       /* Mux0X( 32to8(1Uto32(ccexpr)), expr0, exprX ) */
   1770       if (ty == Ity_I32
   1771           && cond->tag == Iex_Unop
   1772           && cond->Iex.Unop.op == Iop_32to8
   1773           && cond->Iex.Unop.arg->tag == Iex_Unop
   1774           && cond->Iex.Unop.arg->Iex.Unop.op == Iop_1Uto32) {
   1775          ARMCondCode cc;
   1776          HReg     rX  = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
   1777          ARMRI84* r0  = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0);
   1778          HReg     dst = newVRegI(env);
   1779          addInstr(env, mk_iMOVds_RR(dst, rX));
   1780          cc = iselCondCode(env, cond->Iex.Unop.arg->Iex.Unop.arg);
   1781          addInstr(env, ARMInstr_CMov(cc ^ 1, dst, r0));
   1782          return dst;
   1783       }
   1784 
   1785       /* Mux0X(cond, expr0, exprX) (general case) */
   1786       if (ty == Ity_I32) {
   1787          HReg     r8;
   1788          HReg     rX  = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
   1789          ARMRI84* r0  = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0);
   1790          HReg     dst = newVRegI(env);
   1791          addInstr(env, mk_iMOVds_RR(dst, rX));
   1792          r8 = iselIntExpr_R(env, cond);
   1793          addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
   1794                                          ARMRI84_I84(0xFF,0)));
   1795          addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, r0));
   1796          return dst;
   1797       }
   1798       break;
   1799    }
   1800 
   1801    default:
   1802    break;
   1803    } /* switch (e->tag) */
   1804 
   1805    /* We get here if no pattern matched. */
   1806   irreducible:
   1807    ppIRExpr(e);
   1808    vpanic("iselIntExpr_R: cannot reduce tree");
   1809 }
   1810 
   1811 
   1812 /* -------------------- 64-bit -------------------- */
   1813 
   1814 /* Compute a 64-bit value into a register pair, which is returned as
   1815    the first two parameters.  As with iselIntExpr_R, these may be
   1816    either real or virtual regs; in any case they must not be changed
   1817    by subsequent code emitted by the caller.  */
   1818 
   1819 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
   1820 {
   1821    iselInt64Expr_wrk(rHi, rLo, env, e);
   1822 #  if 0
   1823    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   1824 #  endif
   1825    vassert(hregClass(*rHi) == HRcInt32);
   1826    vassert(hregIsVirtual(*rHi));
   1827    vassert(hregClass(*rLo) == HRcInt32);
   1828    vassert(hregIsVirtual(*rLo));
   1829 }
   1830 
   1831 /* DO NOT CALL THIS DIRECTLY ! */
   1832 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
   1833 {
   1834    vassert(e);
   1835    vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
   1836 
   1837    /* 64-bit literal */
   1838    if (e->tag == Iex_Const) {
   1839       ULong   w64 = e->Iex.Const.con->Ico.U64;
   1840       UInt    wHi = toUInt(w64 >> 32);
   1841       UInt    wLo = toUInt(w64);
   1842       HReg    tHi = newVRegI(env);
   1843       HReg    tLo = newVRegI(env);
   1844       vassert(e->Iex.Const.con->tag == Ico_U64);
   1845       addInstr(env, ARMInstr_Imm32(tHi, wHi));
   1846       addInstr(env, ARMInstr_Imm32(tLo, wLo));
   1847       *rHi = tHi;
   1848       *rLo = tLo;
   1849       return;
   1850    }
   1851 
   1852    /* read 64-bit IRTemp */
   1853    if (e->tag == Iex_RdTmp) {
   1854       if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
   1855          HReg tHi = newVRegI(env);
   1856          HReg tLo = newVRegI(env);
   1857          HReg tmp = iselNeon64Expr(env, e);
   1858          addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
   1859          *rHi = tHi;
   1860          *rLo = tLo;
   1861       } else {
   1862          lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
   1863       }
   1864       return;
   1865    }
   1866 
   1867    /* 64-bit load */
   1868    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
   1869       HReg      tLo, tHi, rA;
   1870       vassert(e->Iex.Load.ty == Ity_I64);
   1871       rA  = iselIntExpr_R(env, e->Iex.Load.addr);
   1872       tHi = newVRegI(env);
   1873       tLo = newVRegI(env);
   1874       addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tHi, ARMAMode1_RI(rA, 4)));
   1875       addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tLo, ARMAMode1_RI(rA, 0)));
   1876       *rHi = tHi;
   1877       *rLo = tLo;
   1878       return;
   1879    }
   1880 
   1881    /* 64-bit GET */
   1882    if (e->tag == Iex_Get) {
   1883       ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
   1884       ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
   1885       HReg tHi = newVRegI(env);
   1886       HReg tLo = newVRegI(env);
   1887       addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tHi, am4));
   1888       addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tLo, am0));
   1889       *rHi = tHi;
   1890       *rLo = tLo;
   1891       return;
   1892    }
   1893 
   1894    /* --------- BINARY ops --------- */
   1895    if (e->tag == Iex_Binop) {
   1896       switch (e->Iex.Binop.op) {
   1897 
   1898          /* 32 x 32 -> 64 multiply */
   1899          case Iop_MullS32:
   1900          case Iop_MullU32: {
   1901             HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1902             HReg     argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   1903             HReg     tHi  = newVRegI(env);
   1904             HReg     tLo  = newVRegI(env);
   1905             ARMMulDivOp mop  = e->Iex.Binop.op == Iop_MullS32
   1906                                ? ARMmul_SX : ARMmul_ZX;
   1907             addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
   1908             addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
   1909             addInstr(env, ARMInstr_Mul(mop));
   1910             addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
   1911             addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
   1912             *rHi = tHi;
   1913             *rLo = tLo;
   1914             return;
   1915          }
   1916 
   1917          case Iop_Or64: {
   1918             HReg xLo, xHi, yLo, yHi;
   1919             HReg tHi = newVRegI(env);
   1920             HReg tLo = newVRegI(env);
   1921             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
   1922             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
   1923             addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
   1924             addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
   1925             *rHi = tHi;
   1926             *rLo = tLo;
   1927             return;
   1928          }
   1929 
   1930          case Iop_Add64: {
   1931             HReg xLo, xHi, yLo, yHi;
   1932             HReg tHi = newVRegI(env);
   1933             HReg tLo = newVRegI(env);
   1934             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
   1935             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
   1936             addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
   1937             addInstr(env, ARMInstr_Alu(ARMalu_ADC,  tHi, xHi, ARMRI84_R(yHi)));
   1938             *rHi = tHi;
   1939             *rLo = tLo;
   1940             return;
   1941          }
   1942 
   1943          /* 32HLto64(e1,e2) */
   1944          case Iop_32HLto64: {
   1945             *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1946             *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
   1947             return;
   1948          }
   1949 
   1950          default:
   1951             break;
   1952       }
   1953    }
   1954 
   1955    /* --------- UNARY ops --------- */
   1956    if (e->tag == Iex_Unop) {
   1957       switch (e->Iex.Unop.op) {
   1958 
   1959          /* ReinterpF64asI64 */
   1960          case Iop_ReinterpF64asI64: {
   1961             HReg dstHi = newVRegI(env);
   1962             HReg dstLo = newVRegI(env);
   1963             HReg src   = iselDblExpr(env, e->Iex.Unop.arg);
   1964             addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
   1965             *rHi = dstHi;
   1966             *rLo = dstLo;
   1967             return;
   1968          }
   1969 
   1970          /* Left64(e) */
   1971          case Iop_Left64: {
   1972             HReg yLo, yHi;
   1973             HReg tHi  = newVRegI(env);
   1974             HReg tLo  = newVRegI(env);
   1975             HReg zero = newVRegI(env);
   1976             /* yHi:yLo = arg */
   1977             iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
   1978             /* zero = 0 */
   1979             addInstr(env, ARMInstr_Imm32(zero, 0));
   1980             /* tLo = 0 - yLo, and set carry */
   1981             addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
   1982                                        tLo, zero, ARMRI84_R(yLo)));
   1983             /* tHi = 0 - yHi - carry */
   1984             addInstr(env, ARMInstr_Alu(ARMalu_SBC,
   1985                                        tHi, zero, ARMRI84_R(yHi)));
   1986             /* So now we have tHi:tLo = -arg.  To finish off, or 'arg'
   1987                back in, so as to give the final result
   1988                tHi:tLo = arg | -arg. */
   1989             addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
   1990             addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
   1991             *rHi = tHi;
   1992             *rLo = tLo;
   1993             return;
   1994          }
   1995 
   1996          /* CmpwNEZ64(e) */
   1997          case Iop_CmpwNEZ64: {
   1998             HReg srcLo, srcHi;
   1999             HReg tmp1 = newVRegI(env);
   2000             HReg tmp2 = newVRegI(env);
   2001             /* srcHi:srcLo = arg */
   2002             iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
   2003             /* tmp1 = srcHi | srcLo */
   2004             addInstr(env, ARMInstr_Alu(ARMalu_OR,
   2005                                        tmp1, srcHi, ARMRI84_R(srcLo)));
   2006             /* tmp2 = (tmp1 | -tmp1) >>s 31 */
   2007             addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
   2008             addInstr(env, ARMInstr_Alu(ARMalu_OR,
   2009                                        tmp2, tmp2, ARMRI84_R(tmp1)));
   2010             addInstr(env, ARMInstr_Shift(ARMsh_SAR,
   2011                                          tmp2, tmp2, ARMRI5_I5(31)));
   2012             *rHi = tmp2;
   2013             *rLo = tmp2;
   2014             return;
   2015          }
   2016 
   2017          case Iop_1Sto64: {
   2018             HReg        dst  = newVRegI(env);
   2019             ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
   2020             ARMRI5*     amt  = ARMRI5_I5(31);
   2021             /* This is really rough.  We could do much better here;
   2022                perhaps mvn{cond} dst, #0 as the second insn?
   2023                (same applies to 1Sto32) */
   2024             addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
   2025             addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
   2026             addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
   2027             addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
   2028             *rHi = dst;
   2029             *rLo = dst;
   2030             return;
   2031          }
   2032 
   2033          default:
   2034             break;
   2035       }
   2036    } /* if (e->tag == Iex_Unop) */
   2037 
   2038    /* --------- MULTIPLEX --------- */
   2039    if (e->tag == Iex_Mux0X) {
   2040       IRType ty8;
   2041       HReg   r8, rXhi, rXlo, r0hi, r0lo, dstHi, dstLo;
   2042       ty8 = typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond);
   2043       vassert(ty8 == Ity_I8);
   2044       iselInt64Expr(&rXhi, &rXlo, env, e->Iex.Mux0X.exprX);
   2045       iselInt64Expr(&r0hi, &r0lo, env, e->Iex.Mux0X.expr0);
   2046       dstHi = newVRegI(env);
   2047       dstLo = newVRegI(env);
   2048       addInstr(env, mk_iMOVds_RR(dstHi, rXhi));
   2049       addInstr(env, mk_iMOVds_RR(dstLo, rXlo));
   2050       r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
   2051       addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
   2052                                       ARMRI84_I84(0xFF,0)));
   2053       addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstHi, ARMRI84_R(r0hi)));
   2054       addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstLo, ARMRI84_R(r0lo)));
   2055       *rHi = dstHi;
   2056       *rLo = dstLo;
   2057       return;
   2058    }
   2059 
   2060    /* It is convenient sometimes to call iselInt64Expr even when we
   2061       have NEON support (e.g. in do_helper_call we need 64-bit
   2062       arguments as 2 x 32 regs). */
   2063    if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
   2064       HReg tHi = newVRegI(env);
   2065       HReg tLo = newVRegI(env);
   2066       HReg tmp = iselNeon64Expr(env, e);
   2067       addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
   2068       *rHi = tHi;
   2069       *rLo = tLo;
   2070       return ;
   2071    }
   2072 
   2073    ppIRExpr(e);
   2074    vpanic("iselInt64Expr");
   2075 }
   2076 
   2077 
   2078 /*---------------------------------------------------------*/
   2079 /*--- ISEL: Vector (NEON) expressions (64 or 128 bit)   ---*/
   2080 /*---------------------------------------------------------*/
   2081 
   2082 static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e )
   2083 {
   2084    HReg r = iselNeon64Expr_wrk( env, e );
   2085    vassert(hregClass(r) == HRcFlt64);
   2086    vassert(hregIsVirtual(r));
   2087    return r;
   2088 }
   2089 
   2090 /* DO NOT CALL THIS DIRECTLY */
   2091 static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e )
   2092 {
   2093    IRType ty = typeOfIRExpr(env->type_env, e);
   2094    MatchInfo mi;
   2095    vassert(e);
   2096    vassert(ty == Ity_I64);
   2097 
   2098    if (e->tag == Iex_RdTmp) {
   2099       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
   2100    }
   2101 
   2102    if (e->tag == Iex_Const) {
   2103       HReg rLo, rHi;
   2104       HReg res = newVRegD(env);
   2105       iselInt64Expr(&rHi, &rLo, env, e);
   2106       addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
   2107       return res;
   2108    }
   2109 
   2110    /* 64-bit load */
   2111    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
   2112       HReg res = newVRegD(env);
   2113       ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
   2114       vassert(ty == Ity_I64);
   2115       addInstr(env, ARMInstr_NLdStD(True, res, am));
   2116       return res;
   2117    }
   2118 
   2119    /* 64-bit GET */
   2120    if (e->tag == Iex_Get) {
   2121       HReg addr = newVRegI(env);
   2122       HReg res = newVRegD(env);
   2123       vassert(ty == Ity_I64);
   2124       addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
   2125       addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
   2126       return res;
   2127    }
   2128 
   2129    /* --------- BINARY ops --------- */
   2130    if (e->tag == Iex_Binop) {
   2131       switch (e->Iex.Binop.op) {
   2132 
   2133          /* 32 x 32 -> 64 multiply */
   2134          case Iop_MullS32:
   2135          case Iop_MullU32: {
   2136             HReg rLo, rHi;
   2137             HReg res = newVRegD(env);
   2138             iselInt64Expr(&rHi, &rLo, env, e);
   2139             addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
   2140             return res;
   2141          }
   2142 
   2143          case Iop_And64: {
   2144             HReg res = newVRegD(env);
   2145             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2146             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2147             addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
   2148                                            res, argL, argR, 4, False));
   2149             return res;
   2150          }
   2151          case Iop_Or64: {
   2152             HReg res = newVRegD(env);
   2153             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2154             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2155             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
   2156                                            res, argL, argR, 4, False));
   2157             return res;
   2158          }
   2159          case Iop_Xor64: {
   2160             HReg res = newVRegD(env);
   2161             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2162             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2163             addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
   2164                                            res, argL, argR, 4, False));
   2165             return res;
   2166          }
   2167 
   2168          /* 32HLto64(e1,e2) */
   2169          case Iop_32HLto64: {
   2170             HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
   2171             HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
   2172             HReg res = newVRegD(env);
   2173             addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
   2174             return res;
   2175          }
   2176 
   2177          case Iop_Add8x8:
   2178          case Iop_Add16x4:
   2179          case Iop_Add32x2:
   2180          case Iop_Add64: {
   2181             HReg res = newVRegD(env);
   2182             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2183             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2184             UInt size;
   2185             switch (e->Iex.Binop.op) {
   2186                case Iop_Add8x8: size = 0; break;
   2187                case Iop_Add16x4: size = 1; break;
   2188                case Iop_Add32x2: size = 2; break;
   2189                case Iop_Add64: size = 3; break;
   2190                default: vassert(0);
   2191             }
   2192             addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
   2193                                            res, argL, argR, size, False));
   2194             return res;
   2195          }
   2196          case Iop_Add32Fx2: {
   2197             HReg res = newVRegD(env);
   2198             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2199             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2200             UInt size = 0;
   2201             addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
   2202                                            res, argL, argR, size, False));
   2203             return res;
   2204          }
   2205          case Iop_Recps32Fx2: {
   2206             HReg res = newVRegD(env);
   2207             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2208             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2209             UInt size = 0;
   2210             addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
   2211                                            res, argL, argR, size, False));
   2212             return res;
   2213          }
   2214          case Iop_Rsqrts32Fx2: {
   2215             HReg res = newVRegD(env);
   2216             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2217             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2218             UInt size = 0;
   2219             addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
   2220                                            res, argL, argR, size, False));
   2221             return res;
   2222          }
   2223          case Iop_InterleaveOddLanes8x8:
   2224          case Iop_InterleaveOddLanes16x4:
   2225          case Iop_InterleaveLO32x2:
   2226          case Iop_InterleaveEvenLanes8x8:
   2227          case Iop_InterleaveEvenLanes16x4:
   2228          case Iop_InterleaveHI32x2: {
   2229             HReg tmp = newVRegD(env);
   2230             HReg res = newVRegD(env);
   2231             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2232             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2233             UInt size;
   2234             UInt is_lo;
   2235             switch (e->Iex.Binop.op) {
   2236                case Iop_InterleaveOddLanes8x8: is_lo = 1; size = 0; break;
   2237                case Iop_InterleaveEvenLanes8x8: is_lo = 0; size = 0; break;
   2238                case Iop_InterleaveOddLanes16x4: is_lo = 1; size = 1; break;
   2239                case Iop_InterleaveEvenLanes16x4: is_lo = 0; size = 1; break;
   2240                case Iop_InterleaveLO32x2: is_lo = 1; size = 2; break;
   2241                case Iop_InterleaveHI32x2: is_lo = 0; size = 2; break;
   2242                default: vassert(0);
   2243             }
   2244             if (is_lo) {
   2245                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   2246                                              tmp, argL, 4, False));
   2247                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   2248                                              res, argR, 4, False));
   2249                addInstr(env, ARMInstr_NDual(ARMneon_TRN,
   2250                                             res, tmp, size, False));
   2251             } else {
   2252                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   2253                                              tmp, argR, 4, False));
   2254                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   2255                                              res, argL, 4, False));
   2256                addInstr(env, ARMInstr_NDual(ARMneon_TRN,
   2257                                             tmp, res, size, False));
   2258             }
   2259             return res;
   2260          }
   2261          case Iop_InterleaveHI8x8:
   2262          case Iop_InterleaveHI16x4:
   2263          case Iop_InterleaveLO8x8:
   2264          case Iop_InterleaveLO16x4: {
   2265             HReg tmp = newVRegD(env);
   2266             HReg res = newVRegD(env);
   2267             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2268             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2269             UInt size;
   2270             UInt is_lo;
   2271             switch (e->Iex.Binop.op) {
   2272                case Iop_InterleaveHI8x8: is_lo = 1; size = 0; break;
   2273                case Iop_InterleaveLO8x8: is_lo = 0; size = 0; break;
   2274                case Iop_InterleaveHI16x4: is_lo = 1; size = 1; break;
   2275                case Iop_InterleaveLO16x4: is_lo = 0; size = 1; break;
   2276                default: vassert(0);
   2277             }
   2278             if (is_lo) {
   2279                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   2280                                              tmp, argL, 4, False));
   2281                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   2282                                              res, argR, 4, False));
   2283                addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
   2284                                             res, tmp, size, False));
   2285             } else {
   2286                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   2287                                              tmp, argR, 4, False));
   2288                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   2289                                              res, argL, 4, False));
   2290                addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
   2291                                             tmp, res, size, False));
   2292             }
   2293             return res;
   2294          }
   2295          case Iop_CatOddLanes8x8:
   2296          case Iop_CatOddLanes16x4:
   2297          case Iop_CatEvenLanes8x8:
   2298          case Iop_CatEvenLanes16x4: {
   2299             HReg tmp = newVRegD(env);
   2300             HReg res = newVRegD(env);
   2301             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2302             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2303             UInt size;
   2304             UInt is_lo;
   2305             switch (e->Iex.Binop.op) {
   2306                case Iop_CatOddLanes8x8: is_lo = 1; size = 0; break;
   2307                case Iop_CatEvenLanes8x8: is_lo = 0; size = 0; break;
   2308                case Iop_CatOddLanes16x4: is_lo = 1; size = 1; break;
   2309                case Iop_CatEvenLanes16x4: is_lo = 0; size = 1; break;
   2310                default: vassert(0);
   2311             }
   2312             if (is_lo) {
   2313                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   2314                                              tmp, argL, 4, False));
   2315                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   2316                                              res, argR, 4, False));
   2317                addInstr(env, ARMInstr_NDual(ARMneon_UZP,
   2318                                             res, tmp, size, False));
   2319             } else {
   2320                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   2321                                              tmp, argR, 4, False));
   2322                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   2323                                              res, argL, 4, False));
   2324                addInstr(env, ARMInstr_NDual(ARMneon_UZP,
   2325                                             tmp, res, size, False));
   2326             }
   2327             return res;
   2328          }
   2329          case Iop_QAdd8Ux8:
   2330          case Iop_QAdd16Ux4:
   2331          case Iop_QAdd32Ux2:
   2332          case Iop_QAdd64Ux1: {
   2333             HReg res = newVRegD(env);
   2334             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2335             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2336             UInt size;
   2337             switch (e->Iex.Binop.op) {
   2338                case Iop_QAdd8Ux8: size = 0; break;
   2339                case Iop_QAdd16Ux4: size = 1; break;
   2340                case Iop_QAdd32Ux2: size = 2; break;
   2341                case Iop_QAdd64Ux1: size = 3; break;
   2342                default: vassert(0);
   2343             }
   2344             addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
   2345                                            res, argL, argR, size, False));
   2346             return res;
   2347          }
   2348          case Iop_QAdd8Sx8:
   2349          case Iop_QAdd16Sx4:
   2350          case Iop_QAdd32Sx2:
   2351          case Iop_QAdd64Sx1: {
   2352             HReg res = newVRegD(env);
   2353             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2354             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2355             UInt size;
   2356             switch (e->Iex.Binop.op) {
   2357                case Iop_QAdd8Sx8: size = 0; break;
   2358                case Iop_QAdd16Sx4: size = 1; break;
   2359                case Iop_QAdd32Sx2: size = 2; break;
   2360                case Iop_QAdd64Sx1: size = 3; break;
   2361                default: vassert(0);
   2362             }
   2363             addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
   2364                                            res, argL, argR, size, False));
   2365             return res;
   2366          }
   2367          case Iop_Sub8x8:
   2368          case Iop_Sub16x4:
   2369          case Iop_Sub32x2:
   2370          case Iop_Sub64: {
   2371             HReg res = newVRegD(env);
   2372             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2373             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2374             UInt size;
   2375             switch (e->Iex.Binop.op) {
   2376                case Iop_Sub8x8: size = 0; break;
   2377                case Iop_Sub16x4: size = 1; break;
   2378                case Iop_Sub32x2: size = 2; break;
   2379                case Iop_Sub64: size = 3; break;
   2380                default: vassert(0);
   2381             }
   2382             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
   2383                                            res, argL, argR, size, False));
   2384             return res;
   2385          }
   2386          case Iop_Sub32Fx2: {
   2387             HReg res = newVRegD(env);
   2388             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2389             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2390             UInt size = 0;
   2391             addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
   2392                                            res, argL, argR, size, False));
   2393             return res;
   2394          }
   2395          case Iop_QSub8Ux8:
   2396          case Iop_QSub16Ux4:
   2397          case Iop_QSub32Ux2:
   2398          case Iop_QSub64Ux1: {
   2399             HReg res = newVRegD(env);
   2400             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2401             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2402             UInt size;
   2403             switch (e->Iex.Binop.op) {
   2404                case Iop_QSub8Ux8: size = 0; break;
   2405                case Iop_QSub16Ux4: size = 1; break;
   2406                case Iop_QSub32Ux2: size = 2; break;
   2407                case Iop_QSub64Ux1: size = 3; break;
   2408                default: vassert(0);
   2409             }
   2410             addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
   2411                                            res, argL, argR, size, False));
   2412             return res;
   2413          }
   2414          case Iop_QSub8Sx8:
   2415          case Iop_QSub16Sx4:
   2416          case Iop_QSub32Sx2:
   2417          case Iop_QSub64Sx1: {
   2418             HReg res = newVRegD(env);
   2419             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2420             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2421             UInt size;
   2422             switch (e->Iex.Binop.op) {
   2423                case Iop_QSub8Sx8: size = 0; break;
   2424                case Iop_QSub16Sx4: size = 1; break;
   2425                case Iop_QSub32Sx2: size = 2; break;
   2426                case Iop_QSub64Sx1: size = 3; break;
   2427                default: vassert(0);
   2428             }
   2429             addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
   2430                                            res, argL, argR, size, False));
   2431             return res;
   2432          }
   2433          case Iop_Max8Ux8:
   2434          case Iop_Max16Ux4:
   2435          case Iop_Max32Ux2: {
   2436             HReg res = newVRegD(env);
   2437             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2438             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2439             UInt size;
   2440             switch (e->Iex.Binop.op) {
   2441                case Iop_Max8Ux8: size = 0; break;
   2442                case Iop_Max16Ux4: size = 1; break;
   2443                case Iop_Max32Ux2: size = 2; break;
   2444                default: vassert(0);
   2445             }
   2446             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
   2447                                            res, argL, argR, size, False));
   2448             return res;
   2449          }
   2450          case Iop_Max8Sx8:
   2451          case Iop_Max16Sx4:
   2452          case Iop_Max32Sx2: {
   2453             HReg res = newVRegD(env);
   2454             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2455             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2456             UInt size;
   2457             switch (e->Iex.Binop.op) {
   2458                case Iop_Max8Sx8: size = 0; break;
   2459                case Iop_Max16Sx4: size = 1; break;
   2460                case Iop_Max32Sx2: size = 2; break;
   2461                default: vassert(0);
   2462             }
   2463             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
   2464                                            res, argL, argR, size, False));
   2465             return res;
   2466          }
   2467          case Iop_Min8Ux8:
   2468          case Iop_Min16Ux4:
   2469          case Iop_Min32Ux2: {
   2470             HReg res = newVRegD(env);
   2471             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2472             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2473             UInt size;
   2474             switch (e->Iex.Binop.op) {
   2475                case Iop_Min8Ux8: size = 0; break;
   2476                case Iop_Min16Ux4: size = 1; break;
   2477                case Iop_Min32Ux2: size = 2; break;
   2478                default: vassert(0);
   2479             }
   2480             addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
   2481                                            res, argL, argR, size, False));
   2482             return res;
   2483          }
   2484          case Iop_Min8Sx8:
   2485          case Iop_Min16Sx4:
   2486          case Iop_Min32Sx2: {
   2487             HReg res = newVRegD(env);
   2488             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2489             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2490             UInt size;
   2491             switch (e->Iex.Binop.op) {
   2492                case Iop_Min8Sx8: size = 0; break;
   2493                case Iop_Min16Sx4: size = 1; break;
   2494                case Iop_Min32Sx2: size = 2; break;
   2495                default: vassert(0);
   2496             }
   2497             addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
   2498                                            res, argL, argR, size, False));
   2499             return res;
   2500          }
   2501          case Iop_Sar8x8:
   2502          case Iop_Sar16x4:
   2503          case Iop_Sar32x2: {
   2504             HReg res = newVRegD(env);
   2505             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2506             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2507             HReg argR2 = newVRegD(env);
   2508             HReg zero = newVRegD(env);
   2509             UInt size;
   2510             switch (e->Iex.Binop.op) {
   2511                case Iop_Sar8x8: size = 0; break;
   2512                case Iop_Sar16x4: size = 1; break;
   2513                case Iop_Sar32x2: size = 2; break;
   2514                case Iop_Sar64: size = 3; break;
   2515                default: vassert(0);
   2516             }
   2517             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
   2518             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
   2519                                            argR2, zero, argR, size, False));
   2520             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
   2521                                           res, argL, argR2, size, False));
   2522             return res;
   2523          }
   2524          case Iop_Sal8x8:
   2525          case Iop_Sal16x4:
   2526          case Iop_Sal32x2:
   2527          case Iop_Sal64x1: {
   2528             HReg res = newVRegD(env);
   2529             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2530             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2531             UInt size;
   2532             switch (e->Iex.Binop.op) {
   2533                case Iop_Sal8x8: size = 0; break;
   2534                case Iop_Sal16x4: size = 1; break;
   2535                case Iop_Sal32x2: size = 2; break;
   2536                case Iop_Sal64x1: size = 3; break;
   2537                default: vassert(0);
   2538             }
   2539             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
   2540                                           res, argL, argR, size, False));
   2541             return res;
   2542          }
   2543          case Iop_Shr8x8:
   2544          case Iop_Shr16x4:
   2545          case Iop_Shr32x2: {
   2546             HReg res = newVRegD(env);
   2547             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2548             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2549             HReg argR2 = newVRegD(env);
   2550             HReg zero = newVRegD(env);
   2551             UInt size;
   2552             switch (e->Iex.Binop.op) {
   2553                case Iop_Shr8x8: size = 0; break;
   2554                case Iop_Shr16x4: size = 1; break;
   2555                case Iop_Shr32x2: size = 2; break;
   2556                default: vassert(0);
   2557             }
   2558             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
   2559             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
   2560                                            argR2, zero, argR, size, False));
   2561             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   2562                                           res, argL, argR2, size, False));
   2563             return res;
   2564          }
   2565          case Iop_Shl8x8:
   2566          case Iop_Shl16x4:
   2567          case Iop_Shl32x2: {
   2568             HReg res = newVRegD(env);
   2569             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2570             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2571             UInt size;
   2572             switch (e->Iex.Binop.op) {
   2573                case Iop_Shl8x8: size = 0; break;
   2574                case Iop_Shl16x4: size = 1; break;
   2575                case Iop_Shl32x2: size = 2; break;
   2576                default: vassert(0);
   2577             }
   2578             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   2579                                           res, argL, argR, size, False));
   2580             return res;
   2581          }
   2582          case Iop_QShl8x8:
   2583          case Iop_QShl16x4:
   2584          case Iop_QShl32x2:
   2585          case Iop_QShl64x1: {
   2586             HReg res = newVRegD(env);
   2587             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2588             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2589             UInt size;
   2590             switch (e->Iex.Binop.op) {
   2591                case Iop_QShl8x8: size = 0; break;
   2592                case Iop_QShl16x4: size = 1; break;
   2593                case Iop_QShl32x2: size = 2; break;
   2594                case Iop_QShl64x1: size = 3; break;
   2595                default: vassert(0);
   2596             }
   2597             addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
   2598                                           res, argL, argR, size, False));
   2599             return res;
   2600          }
   2601          case Iop_QSal8x8:
   2602          case Iop_QSal16x4:
   2603          case Iop_QSal32x2:
   2604          case Iop_QSal64x1: {
   2605             HReg res = newVRegD(env);
   2606             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2607             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2608             UInt size;
   2609             switch (e->Iex.Binop.op) {
   2610                case Iop_QSal8x8: size = 0; break;
   2611                case Iop_QSal16x4: size = 1; break;
   2612                case Iop_QSal32x2: size = 2; break;
   2613                case Iop_QSal64x1: size = 3; break;
   2614                default: vassert(0);
   2615             }
   2616             addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
   2617                                           res, argL, argR, size, False));
   2618             return res;
   2619          }
   2620          case Iop_QShlN8x8:
   2621          case Iop_QShlN16x4:
   2622          case Iop_QShlN32x2:
   2623          case Iop_QShlN64x1: {
   2624             HReg res = newVRegD(env);
   2625             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2626             UInt size, imm;
   2627             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   2628                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   2629                vpanic("ARM taget supports Iop_QShlNAxB with constant "
   2630                       "second argument only\n");
   2631             }
   2632             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   2633             switch (e->Iex.Binop.op) {
   2634                case Iop_QShlN8x8: size = 8 | imm; break;
   2635                case Iop_QShlN16x4: size = 16 | imm; break;
   2636                case Iop_QShlN32x2: size = 32 | imm; break;
   2637                case Iop_QShlN64x1: size = 64 | imm; break;
   2638                default: vassert(0);
   2639             }
   2640             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
   2641                                           res, argL, size, False));
   2642             return res;
   2643          }
   2644          case Iop_QShlN8Sx8:
   2645          case Iop_QShlN16Sx4:
   2646          case Iop_QShlN32Sx2:
   2647          case Iop_QShlN64Sx1: {
   2648             HReg res = newVRegD(env);
   2649             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2650             UInt size, imm;
   2651             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   2652                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   2653                vpanic("ARM taget supports Iop_QShlNAxB with constant "
   2654                       "second argument only\n");
   2655             }
   2656             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   2657             switch (e->Iex.Binop.op) {
   2658                case Iop_QShlN8Sx8: size = 8 | imm; break;
   2659                case Iop_QShlN16Sx4: size = 16 | imm; break;
   2660                case Iop_QShlN32Sx2: size = 32 | imm; break;
   2661                case Iop_QShlN64Sx1: size = 64 | imm; break;
   2662                default: vassert(0);
   2663             }
   2664             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
   2665                                           res, argL, size, False));
   2666             return res;
   2667          }
   2668          case Iop_QSalN8x8:
   2669          case Iop_QSalN16x4:
   2670          case Iop_QSalN32x2:
   2671          case Iop_QSalN64x1: {
   2672             HReg res = newVRegD(env);
   2673             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2674             UInt size, imm;
   2675             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   2676                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   2677                vpanic("ARM taget supports Iop_QShlNAxB with constant "
   2678                       "second argument only\n");
   2679             }
   2680             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   2681             switch (e->Iex.Binop.op) {
   2682                case Iop_QSalN8x8: size = 8 | imm; break;
   2683                case Iop_QSalN16x4: size = 16 | imm; break;
   2684                case Iop_QSalN32x2: size = 32 | imm; break;
   2685                case Iop_QSalN64x1: size = 64 | imm; break;
   2686                default: vassert(0);
   2687             }
   2688             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
   2689                                           res, argL, size, False));
   2690             return res;
   2691          }
   2692          case Iop_ShrN8x8:
   2693          case Iop_ShrN16x4:
   2694          case Iop_ShrN32x2:
   2695          case Iop_Shr64: {
   2696             HReg res = newVRegD(env);
   2697             HReg tmp = newVRegD(env);
   2698             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2699             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   2700             HReg argR2 = newVRegI(env);
   2701             UInt size;
   2702             switch (e->Iex.Binop.op) {
   2703                case Iop_ShrN8x8: size = 0; break;
   2704                case Iop_ShrN16x4: size = 1; break;
   2705                case Iop_ShrN32x2: size = 2; break;
   2706                case Iop_Shr64: size = 3; break;
   2707                default: vassert(0);
   2708             }
   2709             addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
   2710             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
   2711             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   2712                                           res, argL, tmp, size, False));
   2713             return res;
   2714          }
   2715          case Iop_ShlN8x8:
   2716          case Iop_ShlN16x4:
   2717          case Iop_ShlN32x2:
   2718          case Iop_Shl64: {
   2719             HReg res = newVRegD(env);
   2720             HReg tmp = newVRegD(env);
   2721             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2722             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   2723             UInt size;
   2724             switch (e->Iex.Binop.op) {
   2725                case Iop_ShlN8x8: size = 0; break;
   2726                case Iop_ShlN16x4: size = 1; break;
   2727                case Iop_ShlN32x2: size = 2; break;
   2728                case Iop_Shl64: size = 3; break;
   2729                default: vassert(0);
   2730             }
   2731             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, False));
   2732             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   2733                                           res, argL, tmp, size, False));
   2734             return res;
   2735          }
   2736          case Iop_SarN8x8:
   2737          case Iop_SarN16x4:
   2738          case Iop_SarN32x2:
   2739          case Iop_Sar64: {
   2740             HReg res = newVRegD(env);
   2741             HReg tmp = newVRegD(env);
   2742             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2743             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   2744             HReg argR2 = newVRegI(env);
   2745             UInt size;
   2746             switch (e->Iex.Binop.op) {
   2747                case Iop_SarN8x8: size = 0; break;
   2748                case Iop_SarN16x4: size = 1; break;
   2749                case Iop_SarN32x2: size = 2; break;
   2750                case Iop_Sar64: size = 3; break;
   2751                default: vassert(0);
   2752             }
   2753             addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
   2754             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
   2755             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
   2756                                           res, argL, tmp, size, False));
   2757             return res;
   2758          }
   2759          case Iop_CmpGT8Ux8:
   2760          case Iop_CmpGT16Ux4:
   2761          case Iop_CmpGT32Ux2: {
   2762             HReg res = newVRegD(env);
   2763             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2764             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2765             UInt size;
   2766             switch (e->Iex.Binop.op) {
   2767                case Iop_CmpGT8Ux8: size = 0; break;
   2768                case Iop_CmpGT16Ux4: size = 1; break;
   2769                case Iop_CmpGT32Ux2: size = 2; break;
   2770                default: vassert(0);
   2771             }
   2772             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
   2773                                            res, argL, argR, size, False));
   2774             return res;
   2775          }
   2776          case Iop_CmpGT8Sx8:
   2777          case Iop_CmpGT16Sx4:
   2778          case Iop_CmpGT32Sx2: {
   2779             HReg res = newVRegD(env);
   2780             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2781             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2782             UInt size;
   2783             switch (e->Iex.Binop.op) {
   2784                case Iop_CmpGT8Sx8: size = 0; break;
   2785                case Iop_CmpGT16Sx4: size = 1; break;
   2786                case Iop_CmpGT32Sx2: size = 2; break;
   2787                default: vassert(0);
   2788             }
   2789             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
   2790                                            res, argL, argR, size, False));
   2791             return res;
   2792          }
   2793          case Iop_CmpEQ8x8:
   2794          case Iop_CmpEQ16x4:
   2795          case Iop_CmpEQ32x2: {
   2796             HReg res = newVRegD(env);
   2797             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2798             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2799             UInt size;
   2800             switch (e->Iex.Binop.op) {
   2801                case Iop_CmpEQ8x8: size = 0; break;
   2802                case Iop_CmpEQ16x4: size = 1; break;
   2803                case Iop_CmpEQ32x2: size = 2; break;
   2804                default: vassert(0);
   2805             }
   2806             addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
   2807                                            res, argL, argR, size, False));
   2808             return res;
   2809          }
   2810          case Iop_Mul8x8:
   2811          case Iop_Mul16x4:
   2812          case Iop_Mul32x2: {
   2813             HReg res = newVRegD(env);
   2814             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2815             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2816             UInt size = 0;
   2817             switch(e->Iex.Binop.op) {
   2818                case Iop_Mul8x8: size = 0; break;
   2819                case Iop_Mul16x4: size = 1; break;
   2820                case Iop_Mul32x2: size = 2; break;
   2821                default: vassert(0);
   2822             }
   2823             addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
   2824                                            res, argL, argR, size, False));
   2825             return res;
   2826          }
   2827          case Iop_Mul32Fx2: {
   2828             HReg res = newVRegD(env);
   2829             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2830             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2831             UInt size = 0;
   2832             addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
   2833                                            res, argL, argR, size, False));
   2834             return res;
   2835          }
   2836          case Iop_QDMulHi16Sx4:
   2837          case Iop_QDMulHi32Sx2: {
   2838             HReg res = newVRegD(env);
   2839             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2840             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2841             UInt size = 0;
   2842             switch(e->Iex.Binop.op) {
   2843                case Iop_QDMulHi16Sx4: size = 1; break;
   2844                case Iop_QDMulHi32Sx2: size = 2; break;
   2845                default: vassert(0);
   2846             }
   2847             addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
   2848                                            res, argL, argR, size, False));
   2849             return res;
   2850          }
   2851 
   2852          case Iop_QRDMulHi16Sx4:
   2853          case Iop_QRDMulHi32Sx2: {
   2854             HReg res = newVRegD(env);
   2855             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2856             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2857             UInt size = 0;
   2858             switch(e->Iex.Binop.op) {
   2859                case Iop_QRDMulHi16Sx4: size = 1; break;
   2860                case Iop_QRDMulHi32Sx2: size = 2; break;
   2861                default: vassert(0);
   2862             }
   2863             addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
   2864                                            res, argL, argR, size, False));
   2865             return res;
   2866          }
   2867 
   2868          case Iop_PwAdd8x8:
   2869          case Iop_PwAdd16x4:
   2870          case Iop_PwAdd32x2: {
   2871             HReg res = newVRegD(env);
   2872             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2873             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2874             UInt size = 0;
   2875             switch(e->Iex.Binop.op) {
   2876                case Iop_PwAdd8x8: size = 0; break;
   2877                case Iop_PwAdd16x4: size = 1; break;
   2878                case Iop_PwAdd32x2: size = 2; break;
   2879                default: vassert(0);
   2880             }
   2881             addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
   2882                                            res, argL, argR, size, False));
   2883             return res;
   2884          }
   2885          case Iop_PwAdd32Fx2: {
   2886             HReg res = newVRegD(env);
   2887             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2888             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2889             UInt size = 0;
   2890             addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
   2891                                            res, argL, argR, size, False));
   2892             return res;
   2893          }
   2894          case Iop_PwMin8Ux8:
   2895          case Iop_PwMin16Ux4:
   2896          case Iop_PwMin32Ux2: {
   2897             HReg res = newVRegD(env);
   2898             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2899             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2900             UInt size = 0;
   2901             switch(e->Iex.Binop.op) {
   2902                case Iop_PwMin8Ux8: size = 0; break;
   2903                case Iop_PwMin16Ux4: size = 1; break;
   2904                case Iop_PwMin32Ux2: size = 2; break;
   2905                default: vassert(0);
   2906             }
   2907             addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
   2908                                            res, argL, argR, size, False));
   2909             return res;
   2910          }
   2911          case Iop_PwMin8Sx8:
   2912          case Iop_PwMin16Sx4:
   2913          case Iop_PwMin32Sx2: {
   2914             HReg res = newVRegD(env);
   2915             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2916             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2917             UInt size = 0;
   2918             switch(e->Iex.Binop.op) {
   2919                case Iop_PwMin8Sx8: size = 0; break;
   2920                case Iop_PwMin16Sx4: size = 1; break;
   2921                case Iop_PwMin32Sx2: size = 2; break;
   2922                default: vassert(0);
   2923             }
   2924             addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
   2925                                            res, argL, argR, size, False));
   2926             return res;
   2927          }
   2928          case Iop_PwMax8Ux8:
   2929          case Iop_PwMax16Ux4:
   2930          case Iop_PwMax32Ux2: {
   2931             HReg res = newVRegD(env);
   2932             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2933             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2934             UInt size = 0;
   2935             switch(e->Iex.Binop.op) {
   2936                case Iop_PwMax8Ux8: size = 0; break;
   2937                case Iop_PwMax16Ux4: size = 1; break;
   2938                case Iop_PwMax32Ux2: size = 2; break;
   2939                default: vassert(0);
   2940             }
   2941             addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
   2942                                            res, argL, argR, size, False));
   2943             return res;
   2944          }
   2945          case Iop_PwMax8Sx8:
   2946          case Iop_PwMax16Sx4:
   2947          case Iop_PwMax32Sx2: {
   2948             HReg res = newVRegD(env);
   2949             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2950             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2951             UInt size = 0;
   2952             switch(e->Iex.Binop.op) {
   2953                case Iop_PwMax8Sx8: size = 0; break;
   2954                case Iop_PwMax16Sx4: size = 1; break;
   2955                case Iop_PwMax32Sx2: size = 2; break;
   2956                default: vassert(0);
   2957             }
   2958             addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
   2959                                            res, argL, argR, size, False));
   2960             return res;
   2961          }
   2962          case Iop_Perm8x8: {
   2963             HReg res = newVRegD(env);
   2964             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2965             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2966             addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
   2967                                            res, argL, argR, 0, False));
   2968             return res;
   2969          }
   2970          case Iop_PolynomialMul8x8: {
   2971             HReg res = newVRegD(env);
   2972             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2973             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2974             UInt size = 0;
   2975             addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
   2976                                            res, argL, argR, size, False));
   2977             return res;
   2978          }
   2979          case Iop_Max32Fx2: {
   2980             HReg res = newVRegD(env);
   2981             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2982             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2983             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
   2984                                            res, argL, argR, 2, False));
   2985             return res;
   2986          }
   2987          case Iop_Min32Fx2: {
   2988             HReg res = newVRegD(env);
   2989             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2990             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2991             addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
   2992                                            res, argL, argR, 2, False));
   2993             return res;
   2994          }
   2995          case Iop_PwMax32Fx2: {
   2996             HReg res = newVRegD(env);
   2997             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2998             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2999             addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
   3000                                            res, argL, argR, 2, False));
   3001             return res;
   3002          }
   3003          case Iop_PwMin32Fx2: {
   3004             HReg res = newVRegD(env);
   3005             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   3006             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   3007             addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
   3008                                            res, argL, argR, 2, False));
   3009             return res;
   3010          }
   3011          case Iop_CmpGT32Fx2: {
   3012             HReg res = newVRegD(env);
   3013             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   3014             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   3015             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
   3016                                            res, argL, argR, 2, False));
   3017             return res;
   3018          }
   3019          case Iop_CmpGE32Fx2: {
   3020             HReg res = newVRegD(env);
   3021             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   3022             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   3023             addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
   3024                                            res, argL, argR, 2, False));
   3025             return res;
   3026          }
   3027          case Iop_CmpEQ32Fx2: {
   3028             HReg res = newVRegD(env);
   3029             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   3030             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   3031             addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
   3032                                            res, argL, argR, 2, False));
   3033             return res;
   3034          }
   3035          case Iop_F32ToFixed32Ux2_RZ:
   3036          case Iop_F32ToFixed32Sx2_RZ:
   3037          case Iop_Fixed32UToF32x2_RN:
   3038          case Iop_Fixed32SToF32x2_RN: {
   3039             HReg res = newVRegD(env);
   3040             HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
   3041             ARMNeonUnOp op;
   3042             UInt imm6;
   3043             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   3044                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   3045                   vpanic("ARM supports FP <-> Fixed conversion with constant "
   3046                          "second argument less than 33 only\n");
   3047             }
   3048             imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   3049             vassert(imm6 <= 32 && imm6 > 0);
   3050             imm6 = 64 - imm6;
   3051             switch(e->Iex.Binop.op) {
   3052                case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
   3053                case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
   3054                case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
   3055                case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
   3056                default: vassert(0);
   3057             }
   3058             addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
   3059             return res;
   3060          }
   3061          /*
   3062          FIXME: is this here or not?
   3063          case Iop_VDup8x8:
   3064          case Iop_VDup16x4:
   3065          case Iop_VDup32x2: {
   3066             HReg res = newVRegD(env);
   3067             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   3068             UInt index;
   3069             UInt imm4;
   3070             UInt size = 0;
   3071             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   3072                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   3073                   vpanic("ARM supports Iop_VDup with constant "
   3074                          "second argument less than 16 only\n");
   3075             }
   3076             index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   3077             switch(e->Iex.Binop.op) {
   3078                case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
   3079                case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
   3080                case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
   3081                default: vassert(0);
   3082             }
   3083             if (imm4 >= 16) {
   3084                vpanic("ARM supports Iop_VDup with constant "
   3085                       "second argument less than 16 only\n");
   3086             }
   3087             addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
   3088                                           res, argL, imm4, False));
   3089             return res;
   3090          }
   3091          */
   3092          default:
   3093             break;
   3094       }
   3095    }
   3096 
   3097    /* --------- UNARY ops --------- */
   3098    if (e->tag == Iex_Unop) {
   3099       switch (e->Iex.Unop.op) {
   3100 
   3101          /* ReinterpF64asI64 */
   3102          case Iop_ReinterpF64asI64:
   3103          /* Left64(e) */
   3104          case Iop_Left64:
   3105          /* CmpwNEZ64(e) */
   3106          //case Iop_CmpwNEZ64:
   3107          case Iop_1Sto64: {
   3108             HReg rLo, rHi;
   3109             HReg res = newVRegD(env);
   3110             iselInt64Expr(&rHi, &rLo, env, e);
   3111             addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
   3112             return res;
   3113          }
   3114          case Iop_Not64: {
   3115             DECLARE_PATTERN(p_veqz_8x8);
   3116             DECLARE_PATTERN(p_veqz_16x4);
   3117             DECLARE_PATTERN(p_veqz_32x2);
   3118             DECLARE_PATTERN(p_vcge_8sx8);
   3119             DECLARE_PATTERN(p_vcge_16sx4);
   3120             DECLARE_PATTERN(p_vcge_32sx2);
   3121             DECLARE_PATTERN(p_vcge_8ux8);
   3122             DECLARE_PATTERN(p_vcge_16ux4);
   3123             DECLARE_PATTERN(p_vcge_32ux2);
   3124             DEFINE_PATTERN(p_veqz_8x8,
   3125                   unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
   3126             DEFINE_PATTERN(p_veqz_16x4,
   3127                   unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
   3128             DEFINE_PATTERN(p_veqz_32x2,
   3129                   unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
   3130             DEFINE_PATTERN(p_vcge_8sx8,
   3131                   unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
   3132             DEFINE_PATTERN(p_vcge_16sx4,
   3133                   unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
   3134             DEFINE_PATTERN(p_vcge_32sx2,
   3135                   unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
   3136             DEFINE_PATTERN(p_vcge_8ux8,
   3137                   unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
   3138             DEFINE_PATTERN(p_vcge_16ux4,
   3139                   unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
   3140             DEFINE_PATTERN(p_vcge_32ux2,
   3141                   unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
   3142             if (matchIRExpr(&mi, p_veqz_8x8, e)) {
   3143                HReg res = newVRegD(env);
   3144                HReg arg = iselNeon64Expr(env, mi.bindee[0]);
   3145                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
   3146                return res;
   3147             } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
   3148                HReg res = newVRegD(env);
   3149                HReg arg = iselNeon64Expr(env, mi.bindee[0]);
   3150                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
   3151                return res;
   3152             } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
   3153                HReg res = newVRegD(env);
   3154                HReg arg = iselNeon64Expr(env, mi.bindee[0]);
   3155                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
   3156                return res;
   3157             } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
   3158                HReg res = newVRegD(env);
   3159                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
   3160                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
   3161                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
   3162                                               res, argL, argR, 0, False));
   3163                return res;
   3164             } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
   3165                HReg res = newVRegD(env);
   3166                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
   3167                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
   3168                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
   3169                                               res, argL, argR, 1, False));
   3170                return res;
   3171             } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
   3172                HReg res = newVRegD(env);
   3173                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
   3174                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
   3175                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
   3176                                               res, argL, argR, 2, False));
   3177                return res;
   3178             } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
   3179                HReg res = newVRegD(env);
   3180                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
   3181                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
   3182                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
   3183                                               res, argL, argR, 0, False));
   3184                return res;
   3185             } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
   3186                HReg res = newVRegD(env);
   3187                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
   3188                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
   3189                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
   3190                                               res, argL, argR, 1, False));
   3191                return res;
   3192             } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
   3193                HReg res = newVRegD(env);
   3194                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
   3195                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
   3196                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
   3197                                               res, argL, argR, 2, False));
   3198                return res;
   3199             } else {
   3200                HReg res = newVRegD(env);
   3201                HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3202                addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
   3203                return res;
   3204             }
   3205          }
   3206          case Iop_Dup8x8:
   3207          case Iop_Dup16x4:
   3208          case Iop_Dup32x2: {
   3209             HReg res, arg;
   3210             UInt size;
   3211             DECLARE_PATTERN(p_vdup_8x8);
   3212             DECLARE_PATTERN(p_vdup_16x4);
   3213             DECLARE_PATTERN(p_vdup_32x2);
   3214             DEFINE_PATTERN(p_vdup_8x8,
   3215                   unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
   3216             DEFINE_PATTERN(p_vdup_16x4,
   3217                   unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
   3218             DEFINE_PATTERN(p_vdup_32x2,
   3219                   unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
   3220             if (matchIRExpr(&mi, p_vdup_8x8, e)) {
   3221                UInt index;
   3222                UInt imm4;
   3223                if (mi.bindee[1]->tag == Iex_Const &&
   3224                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
   3225                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
   3226                   imm4 = (index << 1) + 1;
   3227                   if (index < 8) {
   3228                      res = newVRegD(env);
   3229                      arg = iselNeon64Expr(env, mi.bindee[0]);
   3230                      addInstr(env, ARMInstr_NUnaryS(
   3231                                       ARMneon_VDUP,
   3232                                       mkARMNRS(ARMNRS_Reg, res, 0),
   3233                                       mkARMNRS(ARMNRS_Scalar, arg, index),
   3234                                       imm4, False
   3235                              ));
   3236                      return res;
   3237                   }
   3238                }
   3239             } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
   3240                UInt index;
   3241                UInt imm4;
   3242                if (mi.bindee[1]->tag == Iex_Const &&
   3243                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
   3244                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
   3245                   imm4 = (index << 2) + 2;
   3246                   if (index < 4) {
   3247                      res = newVRegD(env);
   3248                      arg = iselNeon64Expr(env, mi.bindee[0]);
   3249                      addInstr(env, ARMInstr_NUnaryS(
   3250                                       ARMneon_VDUP,
   3251                                       mkARMNRS(ARMNRS_Reg, res, 0),
   3252                                       mkARMNRS(ARMNRS_Scalar, arg, index),
   3253                                       imm4, False
   3254                              ));
   3255                      return res;
   3256                   }
   3257                }
   3258             } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
   3259                UInt index;
   3260                UInt imm4;
   3261                if (mi.bindee[1]->tag == Iex_Const &&
   3262                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
   3263                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
   3264                   imm4 = (index << 3) + 4;
   3265                   if (index < 2) {
   3266                      res = newVRegD(env);
   3267                      arg = iselNeon64Expr(env, mi.bindee[0]);
   3268                      addInstr(env, ARMInstr_NUnaryS(
   3269                                       ARMneon_VDUP,
   3270                                       mkARMNRS(ARMNRS_Reg, res, 0),
   3271                                       mkARMNRS(ARMNRS_Scalar, arg, index),
   3272                                       imm4, False
   3273                              ));
   3274                      return res;
   3275                   }
   3276                }
   3277             }
   3278             arg = iselIntExpr_R(env, e->Iex.Unop.arg);
   3279             res = newVRegD(env);
   3280             switch (e->Iex.Unop.op) {
   3281                case Iop_Dup8x8: size = 0; break;
   3282                case Iop_Dup16x4: size = 1; break;
   3283                case Iop_Dup32x2: size = 2; break;
   3284                default: vassert(0);
   3285             }
   3286             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
   3287             return res;
   3288          }
   3289          case Iop_Abs8x8:
   3290          case Iop_Abs16x4:
   3291          case Iop_Abs32x2: {
   3292             HReg res = newVRegD(env);
   3293             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3294             UInt size = 0;
   3295             switch(e->Iex.Binop.op) {
   3296                case Iop_Abs8x8: size = 0; break;
   3297                case Iop_Abs16x4: size = 1; break;
   3298                case Iop_Abs32x2: size = 2; break;
   3299                default: vassert(0);
   3300             }
   3301             addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
   3302             return res;
   3303          }
   3304          case Iop_Reverse64_8x8:
   3305          case Iop_Reverse64_16x4:
   3306          case Iop_Reverse64_32x2: {
   3307             HReg res = newVRegD(env);
   3308             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3309             UInt size = 0;
   3310             switch(e->Iex.Binop.op) {
   3311                case Iop_Reverse64_8x8: size = 0; break;
   3312                case Iop_Reverse64_16x4: size = 1; break;
   3313                case Iop_Reverse64_32x2: size = 2; break;
   3314                default: vassert(0);
   3315             }
   3316             addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
   3317                                           res, arg, size, False));
   3318             return res;
   3319          }
   3320          case Iop_Reverse32_8x8:
   3321          case Iop_Reverse32_16x4: {
   3322             HReg res = newVRegD(env);
   3323             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3324             UInt size = 0;
   3325             switch(e->Iex.Binop.op) {
   3326                case Iop_Reverse32_8x8: size = 0; break;
   3327                case Iop_Reverse32_16x4: size = 1; break;
   3328                default: vassert(0);
   3329             }
   3330             addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
   3331                                           res, arg, size, False));
   3332             return res;
   3333          }
   3334          case Iop_Reverse16_8x8: {
   3335             HReg res = newVRegD(env);
   3336             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3337             UInt size = 0;
   3338             addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
   3339                                           res, arg, size, False));
   3340             return res;
   3341          }
   3342          case Iop_CmpwNEZ64: {
   3343             HReg x_lsh = newVRegD(env);
   3344             HReg x_rsh = newVRegD(env);
   3345             HReg lsh_amt = newVRegD(env);
   3346             HReg rsh_amt = newVRegD(env);
   3347             HReg zero = newVRegD(env);
   3348             HReg tmp = newVRegD(env);
   3349             HReg tmp2 = newVRegD(env);
   3350             HReg res = newVRegD(env);
   3351             HReg x = newVRegD(env);
   3352             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3353             addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
   3354             addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
   3355             addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
   3356             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
   3357             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
   3358                                            rsh_amt, zero, lsh_amt, 2, False));
   3359             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   3360                                           x_lsh, x, lsh_amt, 3, False));
   3361             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   3362                                           x_rsh, x, rsh_amt, 3, False));
   3363             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
   3364                                            tmp, x_lsh, x_rsh, 0, False));
   3365             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
   3366                                            res, tmp, x, 0, False));
   3367             return res;
   3368          }
   3369          case Iop_CmpNEZ8x8:
   3370          case Iop_CmpNEZ16x4:
   3371          case Iop_CmpNEZ32x2: {
   3372             HReg res = newVRegD(env);
   3373             HReg tmp = newVRegD(env);
   3374             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3375             UInt size;
   3376             switch (e->Iex.Unop.op) {
   3377                case Iop_CmpNEZ8x8: size = 0; break;
   3378                case Iop_CmpNEZ16x4: size = 1; break;
   3379                case Iop_CmpNEZ32x2: size = 2; break;
   3380                default: vassert(0);
   3381             }
   3382             addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
   3383             addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
   3384             return res;
   3385          }
   3386          case Iop_NarrowUn16to8x8:
   3387          case Iop_NarrowUn32to16x4:
   3388          case Iop_NarrowUn64to32x2: {
   3389             HReg res = newVRegD(env);
   3390             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3391             UInt size = 0;
   3392             switch(e->Iex.Binop.op) {
   3393                case Iop_NarrowUn16to8x8:  size = 0; break;
   3394                case Iop_NarrowUn32to16x4: size = 1; break;
   3395                case Iop_NarrowUn64to32x2: size = 2; break;
   3396                default: vassert(0);
   3397             }
   3398             addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
   3399                                           res, arg, size, False));
   3400             return res;
   3401          }
   3402          case Iop_QNarrowUn16Sto8Sx8:
   3403          case Iop_QNarrowUn32Sto16Sx4:
   3404          case Iop_QNarrowUn64Sto32Sx2: {
   3405             HReg res = newVRegD(env);
   3406             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3407             UInt size = 0;
   3408             switch(e->Iex.Binop.op) {
   3409                case Iop_QNarrowUn16Sto8Sx8:  size = 0; break;
   3410                case Iop_QNarrowUn32Sto16Sx4: size = 1; break;
   3411                case Iop_QNarrowUn64Sto32Sx2: size = 2; break;
   3412                default: vassert(0);
   3413             }
   3414             addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
   3415                                           res, arg, size, False));
   3416             return res;
   3417          }
   3418          case Iop_QNarrowUn16Sto8Ux8:
   3419          case Iop_QNarrowUn32Sto16Ux4:
   3420          case Iop_QNarrowUn64Sto32Ux2: {
   3421             HReg res = newVRegD(env);
   3422             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3423             UInt size = 0;
   3424             switch(e->Iex.Binop.op) {
   3425                case Iop_QNarrowUn16Sto8Ux8:  size = 0; break;
   3426                case Iop_QNarrowUn32Sto16Ux4: size = 1; break;
   3427                case Iop_QNarrowUn64Sto32Ux2: size = 2; break;
   3428                default: vassert(0);
   3429             }
   3430             addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
   3431                                           res, arg, size, False));
   3432             return res;
   3433          }
   3434          case Iop_QNarrowUn16Uto8Ux8:
   3435          case Iop_QNarrowUn32Uto16Ux4:
   3436          case Iop_QNarrowUn64Uto32Ux2: {
   3437             HReg res = newVRegD(env);
   3438             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3439             UInt size = 0;
   3440             switch(e->Iex.Binop.op) {
   3441                case Iop_QNarrowUn16Uto8Ux8:  size = 0; break;
   3442                case Iop_QNarrowUn32Uto16Ux4: size = 1; break;
   3443                case Iop_QNarrowUn64Uto32Ux2: size = 2; break;
   3444                default: vassert(0);
   3445             }
   3446             addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
   3447                                           res, arg, size, False));
   3448             return res;
   3449          }
   3450          case Iop_PwAddL8Sx8:
   3451          case Iop_PwAddL16Sx4:
   3452          case Iop_PwAddL32Sx2: {
   3453             HReg res = newVRegD(env);
   3454             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3455             UInt size = 0;
   3456             switch(e->Iex.Binop.op) {
   3457                case Iop_PwAddL8Sx8: size = 0; break;
   3458                case Iop_PwAddL16Sx4: size = 1; break;
   3459                case Iop_PwAddL32Sx2: size = 2; break;
   3460                default: vassert(0);
   3461             }
   3462             addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
   3463                                           res, arg, size, False));
   3464             return res;
   3465          }
   3466          case Iop_PwAddL8Ux8:
   3467          case Iop_PwAddL16Ux4:
   3468          case Iop_PwAddL32Ux2: {
   3469             HReg res = newVRegD(env);
   3470             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3471             UInt size = 0;
   3472             switch(e->Iex.Binop.op) {
   3473                case Iop_PwAddL8Ux8: size = 0; break;
   3474                case Iop_PwAddL16Ux4: size = 1; break;
   3475                case Iop_PwAddL32Ux2: size = 2; break;
   3476                default: vassert(0);
   3477             }
   3478             addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
   3479                                           res, arg, size, False));
   3480             return res;
   3481          }
   3482          case Iop_Cnt8x8: {
   3483             HReg res = newVRegD(env);
   3484             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3485             UInt size = 0;
   3486             addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
   3487                                           res, arg, size, False));
   3488             return res;
   3489          }
   3490          case Iop_Clz8Sx8:
   3491          case Iop_Clz16Sx4:
   3492          case Iop_Clz32Sx2: {
   3493             HReg res = newVRegD(env);
   3494             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3495             UInt size = 0;
   3496             switch(e->Iex.Binop.op) {
   3497                case Iop_Clz8Sx8: size = 0; break;
   3498                case Iop_Clz16Sx4: size = 1; break;
   3499                case Iop_Clz32Sx2: size = 2; break;
   3500                default: vassert(0);
   3501             }
   3502             addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
   3503                                           res, arg, size, False));
   3504             return res;
   3505          }
   3506          case Iop_Cls8Sx8:
   3507          case Iop_Cls16Sx4:
   3508          case Iop_Cls32Sx2: {
   3509             HReg res = newVRegD(env);
   3510             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3511             UInt size = 0;
   3512             switch(e->Iex.Binop.op) {
   3513                case Iop_Cls8Sx8: size = 0; break;
   3514                case Iop_Cls16Sx4: size = 1; break;
   3515                case Iop_Cls32Sx2: size = 2; break;
   3516                default: vassert(0);
   3517             }
   3518             addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
   3519                                           res, arg, size, False));
   3520             return res;
   3521          }
   3522          case Iop_FtoI32Sx2_RZ: {
   3523             HReg res = newVRegD(env);
   3524             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3525             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
   3526                                           res, arg, 2, False));
   3527             return res;
   3528          }
   3529          case Iop_FtoI32Ux2_RZ: {
   3530             HReg res = newVRegD(env);
   3531             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3532             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
   3533                                           res, arg, 2, False));
   3534             return res;
   3535          }
   3536          case Iop_I32StoFx2: {
   3537             HReg res = newVRegD(env);
   3538             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3539             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
   3540                                           res, arg, 2, False));
   3541             return res;
   3542          }
   3543          case Iop_I32UtoFx2: {
   3544             HReg res = newVRegD(env);
   3545             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3546             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
   3547                                           res, arg, 2, False));
   3548             return res;
   3549          }
   3550          case Iop_F32toF16x4: {
   3551             HReg res = newVRegD(env);
   3552             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3553             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
   3554                                           res, arg, 2, False));
   3555             return res;
   3556          }
   3557          case Iop_Recip32Fx2: {
   3558             HReg res = newVRegD(env);
   3559             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   3560             addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
   3561                                           res, argL, 0, False));
   3562             return res;
   3563          }
   3564          case Iop_Recip32x2: {
   3565             HReg res = newVRegD(env);
   3566             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   3567             addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
   3568                                           res, argL, 0, False));
   3569             return res;
   3570          }
   3571          case Iop_Abs32Fx2: {
   3572             DECLARE_PATTERN(p_vabd_32fx2);
   3573             DEFINE_PATTERN(p_vabd_32fx2,
   3574                            unop(Iop_Abs32Fx2,
   3575                                 binop(Iop_Sub32Fx2,
   3576                                       bind(0),
   3577                                       bind(1))));
   3578             if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
   3579                HReg res = newVRegD(env);
   3580                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
   3581                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
   3582                addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
   3583                                               res, argL, argR, 0, False));
   3584                return res;
   3585             } else {
   3586                HReg res = newVRegD(env);
   3587                HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3588                addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
   3589                                              res, arg, 0, False));
   3590                return res;
   3591             }
   3592          }
   3593          case Iop_Rsqrte32Fx2: {
   3594             HReg res = newVRegD(env);
   3595             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3596             addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
   3597                                           res, arg, 0, False));
   3598             return res;
   3599          }
   3600          case Iop_Rsqrte32x2: {
   3601             HReg res = newVRegD(env);
   3602             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3603             addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
   3604                                           res, arg, 0, False));
   3605             return res;
   3606          }
   3607          case Iop_Neg32Fx2: {
   3608             HReg res = newVRegD(env);
   3609             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3610             addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
   3611                                           res, arg, 0, False));
   3612             return res;
   3613          }
   3614          default:
   3615             break;
   3616       }
   3617    } /* if (e->tag == Iex_Unop) */
   3618 
   3619    if (e->tag == Iex_Triop) {
   3620       IRTriop *triop = e->Iex.Triop.details;
   3621 
   3622       switch (triop->op) {
   3623          case Iop_Extract64: {
   3624             HReg res = newVRegD(env);
   3625             HReg argL = iselNeon64Expr(env, triop->arg1);
   3626             HReg argR = iselNeon64Expr(env, triop->arg2);
   3627             UInt imm4;
   3628             if (triop->arg3->tag != Iex_Const ||
   3629                 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
   3630                vpanic("ARM target supports Iop_Extract64 with constant "
   3631                       "third argument less than 16 only\n");
   3632             }
   3633             imm4 = triop->arg3->Iex.Const.con->Ico.U8;
   3634             if (imm4 >= 8) {
   3635                vpanic("ARM target supports Iop_Extract64 with constant "
   3636                       "third argument less than 16 only\n");
   3637             }
   3638             addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
   3639                                            res, argL, argR, imm4, False));
   3640             return res;
   3641          }
   3642          case Iop_SetElem8x8:
   3643          case Iop_SetElem16x4:
   3644          case Iop_SetElem32x2: {
   3645             HReg res = newVRegD(env);
   3646             HReg dreg = iselNeon64Expr(env, triop->arg1);
   3647             HReg arg = iselIntExpr_R(env, triop->arg3);
   3648             UInt index, size;
   3649             if (triop->arg2->tag != Iex_Const ||
   3650                 typeOfIRExpr(env->type_env, triop->arg2) != Ity_I8) {
   3651                vpanic("ARM target supports SetElem with constant "
   3652                       "second argument only\n");
   3653             }
   3654             index = triop->arg2->Iex.Const.con->Ico.U8;
   3655             switch (triop->op) {
   3656                case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
   3657                case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
   3658                case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
   3659                default: vassert(0);
   3660             }
   3661             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
   3662             addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
   3663                                            mkARMNRS(ARMNRS_Scalar, res, index),
   3664                                            mkARMNRS(ARMNRS_Reg, arg, 0),
   3665                                            size, False));
   3666             return res;
   3667          }
   3668          default:
   3669             break;
   3670       }
   3671    }
   3672 
   3673    /* --------- MULTIPLEX --------- */
   3674    if (e->tag == Iex_Mux0X) {
   3675       HReg rLo, rHi;
   3676       HReg res = newVRegD(env);
   3677       iselInt64Expr(&rHi, &rLo, env, e);
   3678       addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
   3679       return res;
   3680    }
   3681 
   3682    ppIRExpr(e);
   3683    vpanic("iselNeon64Expr");
   3684 }
   3685 
   3686 static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e )
   3687 {
   3688    HReg r = iselNeonExpr_wrk( env, e );
   3689    vassert(hregClass(r) == HRcVec128);
   3690    vassert(hregIsVirtual(r));
   3691    return r;
   3692 }
   3693 
   3694 /* DO NOT CALL THIS DIRECTLY */
   3695 static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e )
   3696 {
   3697    IRType ty = typeOfIRExpr(env->type_env, e);
   3698    MatchInfo mi;
   3699    vassert(e);
   3700    vassert(ty == Ity_V128);
   3701 
   3702    if (e->tag == Iex_RdTmp) {
   3703       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
   3704    }
   3705 
   3706    if (e->tag == Iex_Const) {
   3707       /* At the moment there should be no 128-bit constants in IR for ARM
   3708          generated during disassemble. They are represented as Iop_64HLtoV128
   3709          binary operation and are handled among binary ops. */
   3710       /* But zero can be created by valgrind internal optimizer */
   3711       if (e->Iex.Const.con->Ico.V128 == 0) {
   3712          HReg res = newVRegV(env);
   3713          addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(0, 0)));
   3714          return res;
   3715       }
   3716       ppIRExpr(e);
   3717       vpanic("128-bit constant is not implemented");
   3718    }
   3719 
   3720    if (e->tag == Iex_Load) {
   3721       HReg res = newVRegV(env);
   3722       ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
   3723       vassert(ty == Ity_V128);
   3724       addInstr(env, ARMInstr_NLdStQ(True, res, am));
   3725       return res;
   3726    }
   3727 
   3728    if (e->tag == Iex_Get) {
   3729       HReg addr = newVRegI(env);
   3730       HReg res = newVRegV(env);
   3731       vassert(ty == Ity_V128);
   3732       addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
   3733       addInstr(env, ARMInstr_NLdStQ(True, res, mkARMAModeN_R(addr)));
   3734       return res;
   3735    }
   3736 
   3737    if (e->tag == Iex_Unop) {
   3738       switch (e->Iex.Unop.op) {
   3739          case Iop_NotV128: {
   3740             DECLARE_PATTERN(p_veqz_8x16);
   3741             DECLARE_PATTERN(p_veqz_16x8);
   3742             DECLARE_PATTERN(p_veqz_32x4);
   3743             DECLARE_PATTERN(p_vcge_8sx16);
   3744             DECLARE_PATTERN(p_vcge_16sx8);
   3745             DECLARE_PATTERN(p_vcge_32sx4);
   3746             DECLARE_PATTERN(p_vcge_8ux16);
   3747             DECLARE_PATTERN(p_vcge_16ux8);
   3748             DECLARE_PATTERN(p_vcge_32ux4);
   3749             DEFINE_PATTERN(p_veqz_8x16,
   3750                   unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
   3751             DEFINE_PATTERN(p_veqz_16x8,
   3752                   unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
   3753             DEFINE_PATTERN(p_veqz_32x4,
   3754                   unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
   3755             DEFINE_PATTERN(p_vcge_8sx16,
   3756                   unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
   3757             DEFINE_PATTERN(p_vcge_16sx8,
   3758                   unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
   3759             DEFINE_PATTERN(p_vcge_32sx4,
   3760                   unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
   3761             DEFINE_PATTERN(p_vcge_8ux16,
   3762                   unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
   3763             DEFINE_PATTERN(p_vcge_16ux8,
   3764                   unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
   3765             DEFINE_PATTERN(p_vcge_32ux4,
   3766                   unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
   3767             if (matchIRExpr(&mi, p_veqz_8x16, e)) {
   3768                HReg res = newVRegV(env);
   3769                HReg arg = iselNeonExpr(env, mi.bindee[0]);
   3770                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
   3771                return res;
   3772             } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
   3773                HReg res = newVRegV(env);
   3774                HReg arg = iselNeonExpr(env, mi.bindee[0]);
   3775                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
   3776                return res;
   3777             } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
   3778                HReg res = newVRegV(env);
   3779                HReg arg = iselNeonExpr(env, mi.bindee[0]);
   3780                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
   3781                return res;
   3782             } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
   3783                HReg res = newVRegV(env);
   3784                HReg argL = iselNeonExpr(env, mi.bindee[0]);
   3785                HReg argR = iselNeonExpr(env, mi.bindee[1]);
   3786                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
   3787                                               res, argL, argR, 0, True));
   3788                return res;
   3789             } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
   3790                HReg res = newVRegV(env);
   3791                HReg argL = iselNeonExpr(env, mi.bindee[0]);
   3792                HReg argR = iselNeonExpr(env, mi.bindee[1]);
   3793                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
   3794                                               res, argL, argR, 1, True));
   3795                return res;
   3796             } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
   3797                HReg res = newVRegV(env);
   3798                HReg argL = iselNeonExpr(env, mi.bindee[0]);
   3799                HReg argR = iselNeonExpr(env, mi.bindee[1]);
   3800                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
   3801                                               res, argL, argR, 2, True));
   3802                return res;
   3803             } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
   3804                HReg res = newVRegV(env);
   3805                HReg argL = iselNeonExpr(env, mi.bindee[0]);
   3806                HReg argR = iselNeonExpr(env, mi.bindee[1]);
   3807                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
   3808                                               res, argL, argR, 0, True));
   3809                return res;
   3810             } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
   3811                HReg res = newVRegV(env);
   3812                HReg argL = iselNeonExpr(env, mi.bindee[0]);
   3813                HReg argR = iselNeonExpr(env, mi.bindee[1]);
   3814                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
   3815                                               res, argL, argR, 1, True));
   3816                return res;
   3817             } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
   3818                HReg res = newVRegV(env);
   3819                HReg argL = iselNeonExpr(env, mi.bindee[0]);
   3820                HReg argR = iselNeonExpr(env, mi.bindee[1]);
   3821                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
   3822                                               res, argL, argR, 2, True));
   3823                return res;
   3824             } else {
   3825                HReg res = newVRegV(env);
   3826                HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3827                addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
   3828                return res;
   3829             }
   3830          }
   3831          case Iop_Dup8x16:
   3832          case Iop_Dup16x8:
   3833          case Iop_Dup32x4: {
   3834             HReg res, arg;
   3835             UInt size;
   3836             DECLARE_PATTERN(p_vdup_8x16);
   3837             DECLARE_PATTERN(p_vdup_16x8);
   3838             DECLARE_PATTERN(p_vdup_32x4);
   3839             DEFINE_PATTERN(p_vdup_8x16,
   3840                   unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
   3841             DEFINE_PATTERN(p_vdup_16x8,
   3842                   unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
   3843             DEFINE_PATTERN(p_vdup_32x4,
   3844                   unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
   3845             if (matchIRExpr(&mi, p_vdup_8x16, e)) {
   3846                UInt index;
   3847                UInt imm4;
   3848                if (mi.bindee[1]->tag == Iex_Const &&
   3849                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
   3850                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
   3851                   imm4 = (index << 1) + 1;
   3852                   if (index < 8) {
   3853                      res = newVRegV(env);
   3854                      arg = iselNeon64Expr(env, mi.bindee[0]);
   3855                      addInstr(env, ARMInstr_NUnaryS(
   3856                                       ARMneon_VDUP,
   3857                                       mkARMNRS(ARMNRS_Reg, res, 0),
   3858                                       mkARMNRS(ARMNRS_Scalar, arg, index),
   3859                                       imm4, True
   3860                              ));
   3861                      return res;
   3862                   }
   3863                }
   3864             } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
   3865                UInt index;
   3866                UInt imm4;
   3867                if (mi.bindee[1]->tag == Iex_Const &&
   3868                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
   3869                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
   3870                   imm4 = (index << 2) + 2;
   3871                   if (index < 4) {
   3872                      res = newVRegV(env);
   3873                      arg = iselNeon64Expr(env, mi.bindee[0]);
   3874                      addInstr(env, ARMInstr_NUnaryS(
   3875                                       ARMneon_VDUP,
   3876                                       mkARMNRS(ARMNRS_Reg, res, 0),
   3877                                       mkARMNRS(ARMNRS_Scalar, arg, index),
   3878                                       imm4, True
   3879                              ));
   3880                      return res;
   3881                   }
   3882                }
   3883             } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
   3884                UInt index;
   3885                UInt imm4;
   3886                if (mi.bindee[1]->tag == Iex_Const &&
   3887                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
   3888                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
   3889                   imm4 = (index << 3) + 4;
   3890                   if (index < 2) {
   3891                      res = newVRegV(env);
   3892                      arg = iselNeon64Expr(env, mi.bindee[0]);
   3893                      addInstr(env, ARMInstr_NUnaryS(
   3894                                       ARMneon_VDUP,
   3895                                       mkARMNRS(ARMNRS_Reg, res, 0),
   3896                                       mkARMNRS(ARMNRS_Scalar, arg, index),
   3897                                       imm4, True
   3898                              ));
   3899                      return res;
   3900                   }
   3901                }
   3902             }
   3903             arg = iselIntExpr_R(env, e->Iex.Unop.arg);
   3904             res = newVRegV(env);
   3905             switch (e->Iex.Unop.op) {
   3906                case Iop_Dup8x16: size = 0; break;
   3907                case Iop_Dup16x8: size = 1; break;
   3908                case Iop_Dup32x4: size = 2; break;
   3909                default: vassert(0);
   3910             }
   3911             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
   3912             return res;
   3913          }
   3914          case Iop_Abs8x16:
   3915          case Iop_Abs16x8:
   3916          case Iop_Abs32x4: {
   3917             HReg res = newVRegV(env);
   3918             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3919             UInt size = 0;
   3920             switch(e->Iex.Binop.op) {
   3921                case Iop_Abs8x16: size = 0; break;
   3922                case Iop_Abs16x8: size = 1; break;
   3923                case Iop_Abs32x4: size = 2; break;
   3924                default: vassert(0);
   3925             }
   3926             addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
   3927             return res;
   3928          }
   3929          case Iop_Reverse64_8x16:
   3930          case Iop_Reverse64_16x8:
   3931          case Iop_Reverse64_32x4: {
   3932             HReg res = newVRegV(env);
   3933             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3934             UInt size = 0;
   3935             switch(e->Iex.Binop.op) {
   3936                case Iop_Reverse64_8x16: size = 0; break;
   3937                case Iop_Reverse64_16x8: size = 1; break;
   3938                case Iop_Reverse64_32x4: size = 2; break;
   3939                default: vassert(0);
   3940             }
   3941             addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
   3942                                           res, arg, size, True));
   3943             return res;
   3944          }
   3945          case Iop_Reverse32_8x16:
   3946          case Iop_Reverse32_16x8: {
   3947             HReg res = newVRegV(env);
   3948             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3949             UInt size = 0;
   3950             switch(e->Iex.Binop.op) {
   3951                case Iop_Reverse32_8x16: size = 0; break;
   3952                case Iop_Reverse32_16x8: size = 1; break;
   3953                default: vassert(0);
   3954             }
   3955             addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
   3956                                           res, arg, size, True));
   3957             return res;
   3958          }
   3959          case Iop_Reverse16_8x16: {
   3960             HReg res = newVRegV(env);
   3961             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3962             UInt size = 0;
   3963             addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
   3964                                           res, arg, size, True));
   3965             return res;
   3966          }
   3967          case Iop_CmpNEZ64x2: {
   3968             HReg x_lsh = newVRegV(env);
   3969             HReg x_rsh = newVRegV(env);
   3970             HReg lsh_amt = newVRegV(env);
   3971             HReg rsh_amt = newVRegV(env);
   3972             HReg zero = newVRegV(env);
   3973             HReg tmp = newVRegV(env);
   3974             HReg tmp2 = newVRegV(env);
   3975             HReg res = newVRegV(env);
   3976             HReg x = newVRegV(env);
   3977             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3978             addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
   3979             addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
   3980             addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
   3981             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
   3982             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
   3983                                            rsh_amt, zero, lsh_amt, 2, True));
   3984             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   3985                                           x_lsh, x, lsh_amt, 3, True));
   3986             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   3987                                           x_rsh, x, rsh_amt, 3, True));
   3988             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
   3989                                            tmp, x_lsh, x_rsh, 0, True));
   3990             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
   3991                                            res, tmp, x, 0, True));
   3992             return res;
   3993          }
   3994          case Iop_CmpNEZ8x16:
   3995          case Iop_CmpNEZ16x8:
   3996          case Iop_CmpNEZ32x4: {
   3997             HReg res = newVRegV(env);
   3998             HReg tmp = newVRegV(env);
   3999             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4000             UInt size;
   4001             switch (e->Iex.Unop.op) {
   4002                case Iop_CmpNEZ8x16: size = 0; break;
   4003                case Iop_CmpNEZ16x8: size = 1; break;
   4004                case Iop_CmpNEZ32x4: size = 2; break;
   4005                default: vassert(0);
   4006             }
   4007             addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True));
   4008             addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True));
   4009             return res;
   4010          }
   4011          case Iop_Widen8Uto16x8:
   4012          case Iop_Widen16Uto32x4:
   4013          case Iop_Widen32Uto64x2: {
   4014             HReg res = newVRegV(env);
   4015             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   4016             UInt size;
   4017             switch (e->Iex.Unop.op) {
   4018                case Iop_Widen8Uto16x8:  size = 0; break;
   4019                case Iop_Widen16Uto32x4: size = 1; break;
   4020                case Iop_Widen32Uto64x2: size = 2; break;
   4021                default: vassert(0);
   4022             }
   4023             addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
   4024                                           res, arg, size, True));
   4025             return res;
   4026          }
   4027          case Iop_Widen8Sto16x8:
   4028          case Iop_Widen16Sto32x4:
   4029          case Iop_Widen32Sto64x2: {
   4030             HReg res = newVRegV(env);
   4031             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   4032             UInt size;
   4033             switch (e->Iex.Unop.op) {
   4034                case Iop_Widen8Sto16x8:  size = 0; break;
   4035                case Iop_Widen16Sto32x4: size = 1; break;
   4036                case Iop_Widen32Sto64x2: size = 2; break;
   4037                default: vassert(0);
   4038             }
   4039             addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
   4040                                           res, arg, size, True));
   4041             return res;
   4042          }
   4043          case Iop_PwAddL8Sx16:
   4044          case Iop_PwAddL16Sx8:
   4045          case Iop_PwAddL32Sx4: {
   4046             HReg res = newVRegV(env);
   4047             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4048             UInt size = 0;
   4049             switch(e->Iex.Binop.op) {
   4050                case Iop_PwAddL8Sx16: size = 0; break;
   4051                case Iop_PwAddL16Sx8: size = 1; break;
   4052                case Iop_PwAddL32Sx4: size = 2; break;
   4053                default: vassert(0);
   4054             }
   4055             addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
   4056                                           res, arg, size, True));
   4057             return res;
   4058          }
   4059          case Iop_PwAddL8Ux16:
   4060          case Iop_PwAddL16Ux8:
   4061          case Iop_PwAddL32Ux4: {
   4062             HReg res = newVRegV(env);
   4063             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4064             UInt size = 0;
   4065             switch(e->Iex.Binop.op) {
   4066                case Iop_PwAddL8Ux16: size = 0; break;
   4067                case Iop_PwAddL16Ux8: size = 1; break;
   4068                case Iop_PwAddL32Ux4: size = 2; break;
   4069                default: vassert(0);
   4070             }
   4071             addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
   4072                                           res, arg, size, True));
   4073             return res;
   4074          }
   4075          case Iop_Cnt8x16: {
   4076             HReg res = newVRegV(env);
   4077             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4078             UInt size = 0;
   4079             addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
   4080             return res;
   4081          }
   4082          case Iop_Clz8Sx16:
   4083          case Iop_Clz16Sx8:
   4084          case Iop_Clz32Sx4: {
   4085             HReg res = newVRegV(env);
   4086             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4087             UInt size = 0;
   4088             switch(e->Iex.Binop.op) {
   4089                case Iop_Clz8Sx16: size = 0; break;
   4090                case Iop_Clz16Sx8: size = 1; break;
   4091                case Iop_Clz32Sx4: size = 2; break;
   4092                default: vassert(0);
   4093             }
   4094             addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
   4095             return res;
   4096          }
   4097          case Iop_Cls8Sx16:
   4098          case Iop_Cls16Sx8:
   4099          case Iop_Cls32Sx4: {
   4100             HReg res = newVRegV(env);
   4101             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4102             UInt size = 0;
   4103             switch(e->Iex.Binop.op) {
   4104                case Iop_Cls8Sx16: size = 0; break;
   4105                case Iop_Cls16Sx8: size = 1; break;
   4106                case Iop_Cls32Sx4: size = 2; break;
   4107                default: vassert(0);
   4108             }
   4109             addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
   4110             return res;
   4111          }
   4112          case Iop_FtoI32Sx4_RZ: {
   4113             HReg res = newVRegV(env);
   4114             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4115             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
   4116                                           res, arg, 2, True));
   4117             return res;
   4118          }
   4119          case Iop_FtoI32Ux4_RZ: {
   4120             HReg res = newVRegV(env);
   4121             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4122             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
   4123                                           res, arg, 2, True));
   4124             return res;
   4125          }
   4126          case Iop_I32StoFx4: {
   4127             HReg res = newVRegV(env);
   4128             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4129             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
   4130                                           res, arg, 2, True));
   4131             return res;
   4132          }
   4133          case Iop_I32UtoFx4: {
   4134             HReg res = newVRegV(env);
   4135             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4136             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
   4137                                           res, arg, 2, True));
   4138             return res;
   4139          }
   4140          case Iop_F16toF32x4: {
   4141             HReg res = newVRegV(env);
   4142             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   4143             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
   4144                                           res, arg, 2, True));
   4145             return res;
   4146          }
   4147          case Iop_Recip32Fx4: {
   4148             HReg res = newVRegV(env);
   4149             HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
   4150             addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
   4151                                           res, argL, 0, True));
   4152             return res;
   4153          }
   4154          case Iop_Recip32x4: {
   4155             HReg res = newVRegV(env);
   4156             HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
   4157             addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
   4158                                           res, argL, 0, True));
   4159             return res;
   4160          }
   4161          case Iop_Abs32Fx4: {
   4162             DECLARE_PATTERN(p_vabd_32fx4);
   4163             DEFINE_PATTERN(p_vabd_32fx4,
   4164                            unop(Iop_Abs32Fx4,
   4165                                 binop(Iop_Sub32Fx4,
   4166                                       bind(0),
   4167                                       bind(1))));
   4168             if (matchIRExpr(&mi, p_vabd_32fx4, e)) {
   4169                HReg res = newVRegV(env);
   4170                HReg argL = iselNeonExpr(env, mi.bindee[0]);
   4171                HReg argR = iselNeonExpr(env, mi.bindee[1]);
   4172                addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
   4173                                               res, argL, argR, 0, True));
   4174                return res;
   4175             } else {
   4176                HReg res = newVRegV(env);
   4177                HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
   4178                addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
   4179                                              res, argL, 0, True));
   4180                return res;
   4181             }
   4182          }
   4183          case Iop_Rsqrte32Fx4: {
   4184             HReg res = newVRegV(env);
   4185             HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
   4186             addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
   4187                                           res, argL, 0, True));
   4188             return res;
   4189          }
   4190          case Iop_Rsqrte32x4: {
   4191             HReg res = newVRegV(env);
   4192             HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
   4193             addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
   4194                                           res, argL, 0, True));
   4195             return res;
   4196          }
   4197          case Iop_Neg32Fx4: {
   4198             HReg res = newVRegV(env);
   4199             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4200             addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
   4201                                           res, arg, 0, True));
   4202             return res;
   4203          }
   4204          /* ... */
   4205          default:
   4206             break;
   4207       }
   4208    }
   4209 
   4210    if (e->tag == Iex_Binop) {
   4211       switch (e->Iex.Binop.op) {
   4212          case Iop_64HLtoV128:
   4213             /* Try to match into single "VMOV reg, imm" instruction */
   4214             if (e->Iex.Binop.arg1->tag == Iex_Const &&
   4215                 e->Iex.Binop.arg2->tag == Iex_Const &&
   4216                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg1) == Ity_I64 &&
   4217                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) == Ity_I64 &&
   4218                 e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 ==
   4219                            e->Iex.Binop.arg2->Iex.Const.con->Ico.U64) {
   4220                ULong imm64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
   4221                ARMNImm *imm = Imm64_to_ARMNImm(imm64);
   4222                if (imm) {
   4223                   HReg res = newVRegV(env);
   4224                   addInstr(env, ARMInstr_NeonImm(res, imm));
   4225                   return res;
   4226                }
   4227                if ((imm64 >> 32) == 0LL &&
   4228                    (imm = Imm64_to_ARMNImm(imm64 | (imm64 << 32))) != NULL) {
   4229                   HReg tmp1 = newVRegV(env);
   4230                   HReg tmp2 = newVRegV(env);
   4231                   HReg res = newVRegV(env);
   4232                   if (imm->type < 10) {
   4233                      addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0x0f)));
   4234                      addInstr(env, ARMInstr_NeonImm(tmp2, imm));
   4235                      addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
   4236                                                     res, tmp1, tmp2, 4, True));
   4237                      return res;
   4238                   }
   4239                }
   4240                if ((imm64 & 0xFFFFFFFFLL) == 0LL &&
   4241                    (imm = Imm64_to_ARMNImm(imm64 | (imm64 >> 32))) != NULL) {
   4242                   HReg tmp1 = newVRegV(env);
   4243                   HReg tmp2 = newVRegV(env);
   4244                   HReg res = newVRegV(env);
   4245                   if (imm->type < 10) {
   4246                      addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0xf0)));
   4247                      addInstr(env, ARMInstr_NeonImm(tmp2, imm));
   4248                      addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
   4249                                                     res, tmp1, tmp2, 4, True));
   4250                      return res;
   4251                   }
   4252                }
   4253             }
   4254             /* Does not match "VMOV Reg, Imm" form.  We'll have to do
   4255                it the slow way. */
   4256             {
   4257                /* local scope */
   4258                /* Done via the stack for ease of use. */
   4259                /* FIXME: assumes little endian host */
   4260                HReg       w3, w2, w1, w0;
   4261                HReg       res  = newVRegV(env);
   4262                ARMAMode1* sp_0  = ARMAMode1_RI(hregARM_R13(), 0);
   4263                ARMAMode1* sp_4  = ARMAMode1_RI(hregARM_R13(), 4);
   4264                ARMAMode1* sp_8  = ARMAMode1_RI(hregARM_R13(), 8);
   4265                ARMAMode1* sp_12 = ARMAMode1_RI(hregARM_R13(), 12);
   4266                ARMRI84*   c_16  = ARMRI84_I84(16,0);
   4267                /* Make space for SP */
   4268                addInstr(env, ARMInstr_Alu(ARMalu_SUB, hregARM_R13(),
   4269                                                       hregARM_R13(), c_16));
   4270 
   4271                /* Store the less significant 64 bits */
   4272                iselInt64Expr(&w1, &w0, env, e->Iex.Binop.arg2);
   4273                addInstr(env, ARMInstr_LdSt32(False/*store*/, w0, sp_0));
   4274                addInstr(env, ARMInstr_LdSt32(False/*store*/, w1, sp_4));
   4275 
   4276                /* Store the more significant 64 bits */
   4277                iselInt64Expr(&w3, &w2, env, e->Iex.Binop.arg1);
   4278                addInstr(env, ARMInstr_LdSt32(False/*store*/, w2, sp_8));
   4279                addInstr(env, ARMInstr_LdSt32(False/*store*/, w3, sp_12));
   4280 
   4281                 /* Load result back from stack. */
   4282                 addInstr(env, ARMInstr_NLdStQ(True/*load*/, res,
   4283                                               mkARMAModeN_R(hregARM_R13())));
   4284 
   4285                 /* Restore SP */
   4286                 addInstr(env, ARMInstr_Alu(ARMalu_ADD, hregARM_R13(),
   4287                                            hregARM_R13(), c_16));
   4288                 return res;
   4289             } /* local scope */
   4290             goto neon_expr_bad;
   4291          case Iop_AndV128: {
   4292             HReg res = newVRegV(env);
   4293             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4294             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4295             addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
   4296                                            res, argL, argR, 4, True));
   4297             return res;
   4298          }
   4299          case Iop_OrV128: {
   4300             HReg res = newVRegV(env);
   4301             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4302             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4303             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
   4304                                            res, argL, argR, 4, True));
   4305             return res;
   4306          }
   4307          case Iop_XorV128: {
   4308             HReg res = newVRegV(env);
   4309             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4310             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4311             addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
   4312                                            res, argL, argR, 4, True));
   4313             return res;
   4314          }
   4315          case Iop_Add8x16:
   4316          case Iop_Add16x8:
   4317          case Iop_Add32x4:
   4318          case Iop_Add64x2: {
   4319             /*
   4320             FIXME: remove this if not used
   4321             DECLARE_PATTERN(p_vrhadd_32sx4);
   4322             ULong one = (1LL << 32) | 1LL;
   4323             DEFINE_PATTERN(p_vrhadd_32sx4,
   4324                   binop(Iop_Add32x4,
   4325                         binop(Iop_Add32x4,
   4326                               binop(Iop_SarN32x4,
   4327                                     bind(0),
   4328                                     mkU8(1)),
   4329                               binop(Iop_SarN32x4,
   4330                                     bind(1),
   4331                                     mkU8(1))),
   4332                         binop(Iop_SarN32x4,
   4333                               binop(Iop_Add32x4,
   4334                                     binop(Iop_Add32x4,
   4335                                           binop(Iop_AndV128,
   4336                                                 bind(0),
   4337                                                 mkU128(one)),
   4338                                           binop(Iop_AndV128,
   4339                                                 bind(1),
   4340                                                 mkU128(one))),
   4341                                     mkU128(one)),
   4342                               mkU8(1))));
   4343             */
   4344             HReg res = newVRegV(env);
   4345             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4346             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4347             UInt size;
   4348             switch (e->Iex.Binop.op) {
   4349                case Iop_Add8x16: size = 0; break;
   4350                case Iop_Add16x8: size = 1; break;
   4351                case Iop_Add32x4: size = 2; break;
   4352                case Iop_Add64x2: size = 3; break;
   4353                default:
   4354                   ppIROp(e->Iex.Binop.op);
   4355                   vpanic("Illegal element size in VADD");
   4356             }
   4357             addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
   4358                                            res, argL, argR, size, True));
   4359             return res;
   4360          }
   4361          case Iop_Add32Fx4: {
   4362             HReg res = newVRegV(env);
   4363             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4364             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4365             UInt size = 0;
   4366             addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
   4367                                            res, argL, argR, size, True));
   4368             return res;
   4369          }
   4370          case Iop_Recps32Fx4: {
   4371             HReg res = newVRegV(env);
   4372             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4373             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4374             UInt size = 0;
   4375             addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
   4376                                            res, argL, argR, size, True));
   4377             return res;
   4378          }
   4379          case Iop_Rsqrts32Fx4: {
   4380             HReg res = newVRegV(env);
   4381             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4382             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4383             UInt size = 0;
   4384             addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
   4385                                            res, argL, argR, size, True));
   4386             return res;
   4387          }
   4388          case Iop_InterleaveEvenLanes8x16:
   4389          case Iop_InterleaveEvenLanes16x8:
   4390          case Iop_InterleaveEvenLanes32x4:
   4391          case Iop_InterleaveOddLanes8x16:
   4392          case Iop_InterleaveOddLanes16x8:
   4393          case Iop_InterleaveOddLanes32x4: {
   4394             HReg tmp = newVRegV(env);
   4395             HReg res = newVRegV(env);
   4396             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4397             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4398             UInt size;
   4399             UInt is_lo;
   4400             switch (e->Iex.Binop.op) {
   4401                case Iop_InterleaveEvenLanes8x16: is_lo = 0; size = 0; break;
   4402                case Iop_InterleaveOddLanes8x16: is_lo = 1; size = 0; break;
   4403                case Iop_InterleaveEvenLanes16x8: is_lo = 0; size = 1; break;
   4404                case Iop_InterleaveOddLanes16x8: is_lo = 1; size = 1; break;
   4405                case Iop_InterleaveEvenLanes32x4: is_lo = 0; size = 2; break;
   4406                case Iop_InterleaveOddLanes32x4: is_lo = 1; size = 2; break;
   4407                default:
   4408                   ppIROp(e->Iex.Binop.op);
   4409                   vpanic("Illegal element size in VTRN");
   4410             }
   4411             if (is_lo) {
   4412                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   4413                                              tmp, argL, 4, True));
   4414                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   4415                                              res, argR, 4, True));
   4416                addInstr(env, ARMInstr_NDual(ARMneon_TRN,
   4417                                             res, tmp, size, True));
   4418             } else {
   4419                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   4420                                              tmp, argR, 4, True));
   4421                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   4422                                              res, argL, 4, True));
   4423                addInstr(env, ARMInstr_NDual(ARMneon_TRN,
   4424                                             tmp, res, size, True));
   4425             }
   4426             return res;
   4427          }
   4428          case Iop_InterleaveHI8x16:
   4429          case Iop_InterleaveHI16x8:
   4430          case Iop_InterleaveHI32x4:
   4431          case Iop_InterleaveLO8x16:
   4432          case Iop_InterleaveLO16x8:
   4433          case Iop_InterleaveLO32x4: {
   4434             HReg tmp = newVRegV(env);
   4435             HReg res = newVRegV(env);
   4436             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4437             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4438             UInt size;
   4439             UInt is_lo;
   4440             switch (e->Iex.Binop.op) {
   4441                case Iop_InterleaveHI8x16: is_lo = 1; size = 0; break;
   4442                case Iop_InterleaveLO8x16: is_lo = 0; size = 0; break;
   4443                case Iop_InterleaveHI16x8: is_lo = 1; size = 1; break;
   4444                case Iop_InterleaveLO16x8: is_lo = 0; size = 1; break;
   4445                case Iop_InterleaveHI32x4: is_lo = 1; size = 2; break;
   4446                case Iop_InterleaveLO32x4: is_lo = 0; size = 2; break;
   4447                default:
   4448                   ppIROp(e->Iex.Binop.op);
   4449                   vpanic("Illegal element size in VZIP");
   4450             }
   4451             if (is_lo) {
   4452                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   4453                                              tmp, argL, 4, True));
   4454                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   4455                                              res, argR, 4, True));
   4456                addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
   4457                                             res, tmp, size, True));
   4458             } else {
   4459                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   4460                                              tmp, argR, 4, True));
   4461                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   4462                                              res, argL, 4, True));
   4463                addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
   4464                                             tmp, res, size, True));
   4465             }
   4466             return res;
   4467          }
   4468          case Iop_CatOddLanes8x16:
   4469          case Iop_CatOddLanes16x8:
   4470          case Iop_CatOddLanes32x4:
   4471          case Iop_CatEvenLanes8x16:
   4472          case Iop_CatEvenLanes16x8:
   4473          case Iop_CatEvenLanes32x4: {
   4474             HReg tmp = newVRegV(env);
   4475             HReg res = newVRegV(env);
   4476             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4477             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4478             UInt size;
   4479             UInt is_lo;
   4480             switch (e->Iex.Binop.op) {
   4481                case Iop_CatOddLanes8x16: is_lo = 1; size = 0; break;
   4482                case Iop_CatEvenLanes8x16: is_lo = 0; size = 0; break;
   4483                case Iop_CatOddLanes16x8: is_lo = 1; size = 1; break;
   4484                case Iop_CatEvenLanes16x8: is_lo = 0; size = 1; break;
   4485                case Iop_CatOddLanes32x4: is_lo = 1; size = 2; break;
   4486                case Iop_CatEvenLanes32x4: is_lo = 0; size = 2; break;
   4487                default:
   4488                   ppIROp(e->Iex.Binop.op);
   4489                   vpanic("Illegal element size in VUZP");
   4490             }
   4491             if (is_lo) {
   4492                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   4493                                              tmp, argL, 4, True));
   4494                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   4495                                              res, argR, 4, True));
   4496                addInstr(env, ARMInstr_NDual(ARMneon_UZP,
   4497                                             res, tmp, size, True));
   4498             } else {
   4499                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   4500                                              tmp, argR, 4, True));
   4501                addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
   4502                                              res, argL, 4, True));
   4503                addInstr(env, ARMInstr_NDual(ARMneon_UZP,
   4504                                             tmp, res, size, True));
   4505             }
   4506             return res;
   4507          }
   4508          case Iop_QAdd8Ux16:
   4509          case Iop_QAdd16Ux8:
   4510          case Iop_QAdd32Ux4:
   4511          case Iop_QAdd64Ux2: {
   4512             HReg res = newVRegV(env);
   4513             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4514             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4515             UInt size;
   4516             switch (e->Iex.Binop.op) {
   4517                case Iop_QAdd8Ux16: size = 0; break;
   4518                case Iop_QAdd16Ux8: size = 1; break;
   4519                case Iop_QAdd32Ux4: size = 2; break;
   4520                case Iop_QAdd64Ux2: size = 3; break;
   4521                default:
   4522                   ppIROp(e->Iex.Binop.op);
   4523                   vpanic("Illegal element size in VQADDU");
   4524             }
   4525             addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
   4526                                            res, argL, argR, size, True));
   4527             return res;
   4528          }
   4529          case Iop_QAdd8Sx16:
   4530          case Iop_QAdd16Sx8:
   4531          case Iop_QAdd32Sx4:
   4532          case Iop_QAdd64Sx2: {
   4533             HReg res = newVRegV(env);
   4534             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4535             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4536             UInt size;
   4537             switch (e->Iex.Binop.op) {
   4538                case Iop_QAdd8Sx16: size = 0; break;
   4539                case Iop_QAdd16Sx8: size = 1; break;
   4540                case Iop_QAdd32Sx4: size = 2; break;
   4541                case Iop_QAdd64Sx2: size = 3; break;
   4542                default:
   4543                   ppIROp(e->Iex.Binop.op);
   4544                   vpanic("Illegal element size in VQADDS");
   4545             }
   4546             addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
   4547                                            res, argL, argR, size, True));
   4548             return res;
   4549          }
   4550          case Iop_Sub8x16:
   4551          case Iop_Sub16x8:
   4552          case Iop_Sub32x4:
   4553          case Iop_Sub64x2: {
   4554             HReg res = newVRegV(env);
   4555             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4556             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4557             UInt size;
   4558             switch (e->Iex.Binop.op) {
   4559                case Iop_Sub8x16: size = 0; break;
   4560                case Iop_Sub16x8: size = 1; break;
   4561                case Iop_Sub32x4: size = 2; break;
   4562                case Iop_Sub64x2: size = 3; break;
   4563                default:
   4564                   ppIROp(e->Iex.Binop.op);
   4565                   vpanic("Illegal element size in VSUB");
   4566             }
   4567             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
   4568                                            res, argL, argR, size, True));
   4569             return res;
   4570          }
   4571          case Iop_Sub32Fx4: {
   4572             HReg res = newVRegV(env);
   4573             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4574             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4575             UInt size = 0;
   4576             addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
   4577                                            res, argL, argR, size, True));
   4578             return res;
   4579          }
   4580          case Iop_QSub8Ux16:
   4581          case Iop_QSub16Ux8:
   4582          case Iop_QSub32Ux4:
   4583          case Iop_QSub64Ux2: {
   4584             HReg res = newVRegV(env);
   4585             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4586             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4587             UInt size;
   4588             switch (e->Iex.Binop.op) {
   4589                case Iop_QSub8Ux16: size = 0; break;
   4590                case Iop_QSub16Ux8: size = 1; break;
   4591                case Iop_QSub32Ux4: size = 2; break;
   4592                case Iop_QSub64Ux2: size = 3; break;
   4593                default:
   4594                   ppIROp(e->Iex.Binop.op);
   4595                   vpanic("Illegal element size in VQSUBU");
   4596             }
   4597             addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
   4598                                            res, argL, argR, size, True));
   4599             return res;
   4600          }
   4601          case Iop_QSub8Sx16:
   4602          case Iop_QSub16Sx8:
   4603          case Iop_QSub32Sx4:
   4604          case Iop_QSub64Sx2: {
   4605             HReg res = newVRegV(env);
   4606             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4607             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4608             UInt size;
   4609             switch (e->Iex.Binop.op) {
   4610                case Iop_QSub8Sx16: size = 0; break;
   4611                case Iop_QSub16Sx8: size = 1; break;
   4612                case Iop_QSub32Sx4: size = 2; break;
   4613                case Iop_QSub64Sx2: size = 3; break;
   4614                default:
   4615                   ppIROp(e->Iex.Binop.op);
   4616                   vpanic("Illegal element size in VQSUBS");
   4617             }
   4618             addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
   4619                                            res, argL, argR, size, True));
   4620             return res;
   4621          }
   4622          case Iop_Max8Ux16:
   4623          case Iop_Max16Ux8:
   4624          case Iop_Max32Ux4: {
   4625             HReg res = newVRegV(env);
   4626             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4627             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4628             UInt size;
   4629             switch (e->Iex.Binop.op) {
   4630                case Iop_Max8Ux16: size = 0; break;
   4631                case Iop_Max16Ux8: size = 1; break;
   4632                case Iop_Max32Ux4: size = 2; break;
   4633                default: vpanic("Illegal element size in VMAXU");
   4634             }
   4635             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
   4636                                            res, argL, argR, size, True));
   4637             return res;
   4638          }
   4639          case Iop_Max8Sx16:
   4640          case Iop_Max16Sx8:
   4641          case Iop_Max32Sx4: {
   4642             HReg res = newVRegV(env);
   4643             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4644             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4645             UInt size;
   4646             switch (e->Iex.Binop.op) {
   4647                case Iop_Max8Sx16: size = 0; break;
   4648                case Iop_Max16Sx8: size = 1; break;
   4649                case Iop_Max32Sx4: size = 2; break;
   4650                default: vpanic("Illegal element size in VMAXU");
   4651             }
   4652             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
   4653                                            res, argL, argR, size, True));
   4654             return res;
   4655          }
   4656          case Iop_Min8Ux16:
   4657          case Iop_Min16Ux8:
   4658          case Iop_Min32Ux4: {
   4659             HReg res = newVRegV(env);
   4660             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4661             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4662             UInt size;
   4663             switch (e->Iex.Binop.op) {
   4664                case Iop_Min8Ux16: size = 0; break;
   4665                case Iop_Min16Ux8: size = 1; break;
   4666                case Iop_Min32Ux4: size = 2; break;
   4667                default: vpanic("Illegal element size in VMAXU");
   4668             }
   4669             addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
   4670                                            res, argL, argR, size, True));
   4671             return res;
   4672          }
   4673          case Iop_Min8Sx16:
   4674          case Iop_Min16Sx8:
   4675          case Iop_Min32Sx4: {
   4676             HReg res = newVRegV(env);
   4677             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4678             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4679             UInt size;
   4680             switch (e->Iex.Binop.op) {
   4681                case Iop_Min8Sx16: size = 0; break;
   4682                case Iop_Min16Sx8: size = 1; break;
   4683                case Iop_Min32Sx4: size = 2; break;
   4684                default: vpanic("Illegal element size in VMAXU");
   4685             }
   4686             addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
   4687                                            res, argL, argR, size, True));
   4688             return res;
   4689          }
   4690          case Iop_Sar8x16:
   4691          case Iop_Sar16x8:
   4692          case Iop_Sar32x4:
   4693          case Iop_Sar64x2: {
   4694             HReg res = newVRegV(env);
   4695             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4696             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4697             HReg argR2 = newVRegV(env);
   4698             HReg zero = newVRegV(env);
   4699             UInt size;
   4700             switch (e->Iex.Binop.op) {
   4701                case Iop_Sar8x16: size = 0; break;
   4702                case Iop_Sar16x8: size = 1; break;
   4703                case Iop_Sar32x4: size = 2; break;
   4704                case Iop_Sar64x2: size = 3; break;
   4705                default: vassert(0);
   4706             }
   4707             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
   4708             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
   4709                                            argR2, zero, argR, size, True));
   4710             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
   4711                                           res, argL, argR2, size, True));
   4712             return res;
   4713          }
   4714          case Iop_Sal8x16:
   4715          case Iop_Sal16x8:
   4716          case Iop_Sal32x4:
   4717          case Iop_Sal64x2: {
   4718             HReg res = newVRegV(env);
   4719             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4720             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4721             UInt size;
   4722             switch (e->Iex.Binop.op) {
   4723                case Iop_Sal8x16: size = 0; break;
   4724                case Iop_Sal16x8: size = 1; break;
   4725                case Iop_Sal32x4: size = 2; break;
   4726                case Iop_Sal64x2: size = 3; break;
   4727                default: vassert(0);
   4728             }
   4729             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
   4730                                           res, argL, argR, size, True));
   4731             return res;
   4732          }
   4733          case Iop_Shr8x16:
   4734          case Iop_Shr16x8:
   4735          case Iop_Shr32x4:
   4736          case Iop_Shr64x2: {
   4737             HReg res = newVRegV(env);
   4738             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4739             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4740             HReg argR2 = newVRegV(env);
   4741             HReg zero = newVRegV(env);
   4742             UInt size;
   4743             switch (e->Iex.Binop.op) {
   4744                case Iop_Shr8x16: size = 0; break;
   4745                case Iop_Shr16x8: size = 1; break;
   4746                case Iop_Shr32x4: size = 2; break;
   4747                case Iop_Shr64x2: size = 3; break;
   4748                default: vassert(0);
   4749             }
   4750             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
   4751             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
   4752                                            argR2, zero, argR, size, True));
   4753             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   4754                                           res, argL, argR2, size, True));
   4755             return res;
   4756          }
   4757          case Iop_Shl8x16:
   4758          case Iop_Shl16x8:
   4759          case Iop_Shl32x4:
   4760          case Iop_Shl64x2: {
   4761             HReg res = newVRegV(env);
   4762             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4763             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4764             UInt size;
   4765             switch (e->Iex.Binop.op) {
   4766                case Iop_Shl8x16: size = 0; break;
   4767                case Iop_Shl16x8: size = 1; break;
   4768                case Iop_Shl32x4: size = 2; break;
   4769                case Iop_Shl64x2: size = 3; break;
   4770                default: vassert(0);
   4771             }
   4772             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   4773                                           res, argL, argR, size, True));
   4774             return res;
   4775          }
   4776          case Iop_QShl8x16:
   4777          case Iop_QShl16x8:
   4778          case Iop_QShl32x4:
   4779          case Iop_QShl64x2: {
   4780             HReg res = newVRegV(env);
   4781             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4782             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4783             UInt size;
   4784             switch (e->Iex.Binop.op) {
   4785                case Iop_QShl8x16: size = 0; break;
   4786                case Iop_QShl16x8: size = 1; break;
   4787                case Iop_QShl32x4: size = 2; break;
   4788                case Iop_QShl64x2: size = 3; break;
   4789                default: vassert(0);
   4790             }
   4791             addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
   4792                                           res, argL, argR, size, True));
   4793             return res;
   4794          }
   4795          case Iop_QSal8x16:
   4796          case Iop_QSal16x8:
   4797          case Iop_QSal32x4:
   4798          case Iop_QSal64x2: {
   4799             HReg res = newVRegV(env);
   4800             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4801             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4802             UInt size;
   4803             switch (e->Iex.Binop.op) {
   4804                case Iop_QSal8x16: size = 0; break;
   4805                case Iop_QSal16x8: size = 1; break;
   4806                case Iop_QSal32x4: size = 2; break;
   4807                case Iop_QSal64x2: size = 3; break;
   4808                default: vassert(0);
   4809             }
   4810             addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
   4811                                           res, argL, argR, size, True));
   4812             return res;
   4813          }
   4814          case Iop_QShlN8x16:
   4815          case Iop_QShlN16x8:
   4816          case Iop_QShlN32x4:
   4817          case Iop_QShlN64x2: {
   4818             HReg res = newVRegV(env);
   4819             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4820             UInt size, imm;
   4821             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   4822                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   4823                vpanic("ARM taget supports Iop_QShlNAxB with constant "
   4824                       "second argument only\n");
   4825             }
   4826             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   4827             switch (e->Iex.Binop.op) {
   4828                case Iop_QShlN8x16: size = 8 | imm; break;
   4829                case Iop_QShlN16x8: size = 16 | imm; break;
   4830                case Iop_QShlN32x4: size = 32 | imm; break;
   4831                case Iop_QShlN64x2: size = 64 | imm; break;
   4832                default: vassert(0);
   4833             }
   4834             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
   4835                                           res, argL, size, True));
   4836             return res;
   4837          }
   4838          case Iop_QShlN8Sx16:
   4839          case Iop_QShlN16Sx8:
   4840          case Iop_QShlN32Sx4:
   4841          case Iop_QShlN64Sx2: {
   4842             HReg res = newVRegV(env);
   4843             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4844             UInt size, imm;
   4845             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   4846                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   4847                vpanic("ARM taget supports Iop_QShlNASxB with constant "
   4848                       "second argument only\n");
   4849             }
   4850             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   4851             switch (e->Iex.Binop.op) {
   4852                case Iop_QShlN8Sx16: size = 8 | imm; break;
   4853                case Iop_QShlN16Sx8: size = 16 | imm; break;
   4854                case Iop_QShlN32Sx4: size = 32 | imm; break;
   4855                case Iop_QShlN64Sx2: size = 64 | imm; break;
   4856                default: vassert(0);
   4857             }
   4858             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
   4859                                           res, argL, size, True));
   4860             return res;
   4861          }
   4862          case Iop_QSalN8x16:
   4863          case Iop_QSalN16x8:
   4864          case Iop_QSalN32x4:
   4865          case Iop_QSalN64x2: {
   4866             HReg res = newVRegV(env);
   4867             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4868             UInt size, imm;
   4869             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   4870                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   4871                vpanic("ARM taget supports Iop_QShlNAxB with constant "
   4872                       "second argument only\n");
   4873             }
   4874             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   4875             switch (e->Iex.Binop.op) {
   4876                case Iop_QSalN8x16: size = 8 | imm; break;
   4877                case Iop_QSalN16x8: size = 16 | imm; break;
   4878                case Iop_QSalN32x4: size = 32 | imm; break;
   4879                case Iop_QSalN64x2: size = 64 | imm; break;
   4880                default: vassert(0);
   4881             }
   4882             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
   4883                                           res, argL, size, True));
   4884             return res;
   4885          }
   4886          case Iop_ShrN8x16:
   4887          case Iop_ShrN16x8:
   4888          case Iop_ShrN32x4:
   4889          case Iop_ShrN64x2: {
   4890             HReg res = newVRegV(env);
   4891             HReg tmp = newVRegV(env);
   4892             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4893             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   4894             HReg argR2 = newVRegI(env);
   4895             UInt size;
   4896             switch (e->Iex.Binop.op) {
   4897                case Iop_ShrN8x16: size = 0; break;
   4898                case Iop_ShrN16x8: size = 1; break;
   4899                case Iop_ShrN32x4: size = 2; break;
   4900                case Iop_ShrN64x2: size = 3; break;
   4901                default: vassert(0);
   4902             }
   4903             addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
   4904             addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
   4905                                           tmp, argR2, 0, True));
   4906             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   4907                                           res, argL, tmp, size, True));
   4908             return res;
   4909          }
   4910          case Iop_ShlN8x16:
   4911          case Iop_ShlN16x8:
   4912          case Iop_ShlN32x4:
   4913          case Iop_ShlN64x2: {
   4914             HReg res = newVRegV(env);
   4915             HReg tmp = newVRegV(env);
   4916             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4917             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   4918             UInt size;
   4919             switch (e->Iex.Binop.op) {
   4920                case Iop_ShlN8x16: size = 0; break;
   4921                case Iop_ShlN16x8: size = 1; break;
   4922                case Iop_ShlN32x4: size = 2; break;
   4923                case Iop_ShlN64x2: size = 3; break;
   4924                default: vassert(0);
   4925             }
   4926             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True));
   4927             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   4928                                           res, argL, tmp, size, True));
   4929             return res;
   4930          }
   4931          case Iop_SarN8x16:
   4932          case Iop_SarN16x8:
   4933          case Iop_SarN32x4:
   4934          case Iop_SarN64x2: {
   4935             HReg res = newVRegV(env);
   4936             HReg tmp = newVRegV(env);
   4937             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4938             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   4939             HReg argR2 = newVRegI(env);
   4940             UInt size;
   4941             switch (e->Iex.Binop.op) {
   4942                case Iop_SarN8x16: size = 0; break;
   4943                case Iop_SarN16x8: size = 1; break;
   4944                case Iop_SarN32x4: size = 2; break;
   4945                case Iop_SarN64x2: size = 3; break;
   4946                default: vassert(0);
   4947             }
   4948             addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
   4949             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True));
   4950             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
   4951                                           res, argL, tmp, size, True));
   4952             return res;
   4953          }
   4954          case Iop_CmpGT8Ux16:
   4955          case Iop_CmpGT16Ux8:
   4956          case Iop_CmpGT32Ux4: {
   4957             HReg res = newVRegV(env);
   4958             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4959             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4960             UInt size;
   4961             switch (e->Iex.Binop.op) {
   4962                case Iop_CmpGT8Ux16: size = 0; break;
   4963                case Iop_CmpGT16Ux8: size = 1; break;
   4964                case Iop_CmpGT32Ux4: size = 2; break;
   4965                default: vassert(0);
   4966             }
   4967             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
   4968                                            res, argL, argR, size, True));
   4969             return res;
   4970          }
   4971          case Iop_CmpGT8Sx16:
   4972          case Iop_CmpGT16Sx8:
   4973          case Iop_CmpGT32Sx4: {
   4974             HReg res = newVRegV(env);
   4975             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4976             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4977             UInt size;
   4978             switch (e->Iex.Binop.op) {
   4979                case Iop_CmpGT8Sx16: size = 0; break;
   4980                case Iop_CmpGT16Sx8: size = 1; break;
   4981                case Iop_CmpGT32Sx4: size = 2; break;
   4982                default: vassert(0);
   4983             }
   4984             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
   4985                                            res, argL, argR, size, True));
   4986             return res;
   4987          }
   4988          case Iop_CmpEQ8x16:
   4989          case Iop_CmpEQ16x8:
   4990          case Iop_CmpEQ32x4: {
   4991             HReg res = newVRegV(env);
   4992             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4993             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4994             UInt size;
   4995             switch (e->Iex.Binop.op) {
   4996                case Iop_CmpEQ8x16: size = 0; break;
   4997                case Iop_CmpEQ16x8: size = 1; break;
   4998                case Iop_CmpEQ32x4: size = 2; break;
   4999                default: vassert(0);
   5000             }
   5001             addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
   5002                                            res, argL, argR, size, True));
   5003             return res;
   5004          }
   5005          case Iop_Mul8x16:
   5006          case Iop_Mul16x8:
   5007          case Iop_Mul32x4: {
   5008             HReg res = newVRegV(env);
   5009             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5010             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5011             UInt size = 0;
   5012             switch(e->Iex.Binop.op) {
   5013                case Iop_Mul8x16: size = 0; break;
   5014                case Iop_Mul16x8: size = 1; break;
   5015                case Iop_Mul32x4: size = 2; break;
   5016                default: vassert(0);
   5017             }
   5018             addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
   5019                                            res, argL, argR, size, True));
   5020             return res;
   5021          }
   5022          case Iop_Mul32Fx4: {
   5023             HReg res = newVRegV(env);
   5024             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5025             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5026             UInt size = 0;
   5027             addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
   5028                                            res, argL, argR, size, True));
   5029             return res;
   5030          }
   5031          case Iop_Mull8Ux8:
   5032          case Iop_Mull16Ux4:
   5033          case Iop_Mull32Ux2: {
   5034             HReg res = newVRegV(env);
   5035             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   5036             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   5037             UInt size = 0;
   5038             switch(e->Iex.Binop.op) {
   5039                case Iop_Mull8Ux8: size = 0; break;
   5040                case Iop_Mull16Ux4: size = 1; break;
   5041                case Iop_Mull32Ux2: size = 2; break;
   5042                default: vassert(0);
   5043             }
   5044             addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
   5045                                            res, argL, argR, size, True));
   5046             return res;
   5047          }
   5048 
   5049          case Iop_Mull8Sx8:
   5050          case Iop_Mull16Sx4:
   5051          case Iop_Mull32Sx2: {
   5052             HReg res = newVRegV(env);
   5053             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   5054             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   5055             UInt size = 0;
   5056             switch(e->Iex.Binop.op) {
   5057                case Iop_Mull8Sx8: size = 0; break;
   5058                case Iop_Mull16Sx4: size = 1; break;
   5059                case Iop_Mull32Sx2: size = 2; break;
   5060                default: vassert(0);
   5061             }
   5062             addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
   5063                                            res, argL, argR, size, True));
   5064             return res;
   5065          }
   5066 
   5067          case Iop_QDMulHi16Sx8:
   5068          case Iop_QDMulHi32Sx4: {
   5069             HReg res = newVRegV(env);
   5070             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5071             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5072             UInt size = 0;
   5073             switch(e->Iex.Binop.op) {
   5074                case Iop_QDMulHi16Sx8: size = 1; break;
   5075                case Iop_QDMulHi32Sx4: size = 2; break;
   5076                default: vassert(0);
   5077             }
   5078             addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
   5079                                            res, argL, argR, size, True));
   5080             return res;
   5081          }
   5082 
   5083          case Iop_QRDMulHi16Sx8:
   5084          case Iop_QRDMulHi32Sx4: {
   5085             HReg res = newVRegV(env);
   5086             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5087             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5088             UInt size = 0;
   5089             switch(e->Iex.Binop.op) {
   5090                case Iop_QRDMulHi16Sx8: size = 1; break;
   5091                case Iop_QRDMulHi32Sx4: size = 2; break;
   5092                default: vassert(0);
   5093             }
   5094             addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
   5095                                            res, argL, argR, size, True));
   5096             return res;
   5097          }
   5098 
   5099          case Iop_QDMulLong16Sx4:
   5100          case Iop_QDMulLong32Sx2: {
   5101             HReg res = newVRegV(env);
   5102             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   5103             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   5104             UInt size = 0;
   5105             switch(e->Iex.Binop.op) {
   5106                case Iop_QDMulLong16Sx4: size = 1; break;
   5107                case Iop_QDMulLong32Sx2: size = 2; break;
   5108                default: vassert(0);
   5109             }
   5110             addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
   5111                                            res, argL, argR, size, True));
   5112             return res;
   5113          }
   5114          case Iop_PolynomialMul8x16: {
   5115             HReg res = newVRegV(env);
   5116             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5117             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5118             UInt size = 0;
   5119             addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
   5120                                            res, argL, argR, size, True));
   5121             return res;
   5122          }
   5123          case Iop_Max32Fx4: {
   5124             HReg res = newVRegV(env);
   5125             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5126             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5127             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
   5128                                            res, argL, argR, 2, True));
   5129             return res;
   5130          }
   5131          case Iop_Min32Fx4: {
   5132             HReg res = newVRegV(env);
   5133             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5134             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5135             addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
   5136                                            res, argL, argR, 2, True));
   5137             return res;
   5138          }
   5139          case Iop_PwMax32Fx4: {
   5140             HReg res = newVRegV(env);
   5141             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5142             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5143             addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
   5144                                            res, argL, argR, 2, True));
   5145             return res;
   5146          }
   5147          case Iop_PwMin32Fx4: {
   5148             HReg res = newVRegV(env);
   5149             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5150             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5151             addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
   5152                                            res, argL, argR, 2, True));
   5153             return res;
   5154          }
   5155          case Iop_CmpGT32Fx4: {
   5156             HReg res = newVRegV(env);
   5157             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5158             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5159             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
   5160                                            res, argL, argR, 2, True));
   5161             return res;
   5162          }
   5163          case Iop_CmpGE32Fx4: {
   5164             HReg res = newVRegV(env);
   5165             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5166             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5167             addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
   5168                                            res, argL, argR, 2, True));
   5169             return res;
   5170          }
   5171          case Iop_CmpEQ32Fx4: {
   5172             HReg res = newVRegV(env);
   5173             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5174             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5175             addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
   5176                                            res, argL, argR, 2, True));
   5177             return res;
   5178          }
   5179 
   5180          case Iop_PolynomialMull8x8: {
   5181             HReg res = newVRegV(env);
   5182             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   5183             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   5184             UInt size = 0;
   5185             addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
   5186                                            res, argL, argR, size, True));
   5187             return res;
   5188          }
   5189          case Iop_F32ToFixed32Ux4_RZ:
   5190          case Iop_F32ToFixed32Sx4_RZ:
   5191          case Iop_Fixed32UToF32x4_RN:
   5192          case Iop_Fixed32SToF32x4_RN: {
   5193             HReg res = newVRegV(env);
   5194             HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
   5195             ARMNeonUnOp op;
   5196             UInt imm6;
   5197             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   5198                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   5199                   vpanic("ARM supports FP <-> Fixed conversion with constant "
   5200                          "second argument less than 33 only\n");
   5201             }
   5202             imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   5203             vassert(imm6 <= 32 && imm6 > 0);
   5204             imm6 = 64 - imm6;
   5205             switch(e->Iex.Binop.op) {
   5206                case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
   5207                case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
   5208                case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
   5209                case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
   5210                default: vassert(0);
   5211             }
   5212             addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
   5213             return res;
   5214          }
   5215          /*
   5216          FIXME remove if not used
   5217          case Iop_VDup8x16:
   5218          case Iop_VDup16x8:
   5219          case Iop_VDup32x4: {
   5220             HReg res = newVRegV(env);
   5221             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   5222             UInt imm4;
   5223             UInt index;
   5224             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   5225                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   5226                   vpanic("ARM supports Iop_VDup with constant "
   5227                          "second argument less than 16 only\n");
   5228             }
   5229             index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   5230             switch(e->Iex.Binop.op) {
   5231                case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
   5232                case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
   5233                case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
   5234                default: vassert(0);
   5235             }
   5236             if (imm4 >= 16) {
   5237                vpanic("ARM supports Iop_VDup with constant "
   5238                       "second argument less than 16 only\n");
   5239             }
   5240             addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
   5241                                           res, argL, imm4, True));
   5242             return res;
   5243          }
   5244          */
   5245          case Iop_PwAdd8x16:
   5246          case Iop_PwAdd16x8:
   5247          case Iop_PwAdd32x4: {
   5248             HReg res = newVRegV(env);
   5249             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5250             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5251             UInt size = 0;
   5252             switch(e->Iex.Binop.op) {
   5253                case Iop_PwAdd8x16: size = 0; break;
   5254                case Iop_PwAdd16x8: size = 1; break;
   5255                case Iop_PwAdd32x4: size = 2; break;
   5256                default: vassert(0);
   5257             }
   5258             addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
   5259                                            res, argL, argR, size, True));
   5260             return res;
   5261          }
   5262          /* ... */
   5263          default:
   5264             break;
   5265       }
   5266    }
   5267 
   5268    if (e->tag == Iex_Triop) {
   5269       IRTriop *triop = e->Iex.Triop.details;
   5270 
   5271       switch (triop->op) {
   5272          case Iop_ExtractV128: {
   5273             HReg res = newVRegV(env);
   5274             HReg argL = iselNeonExpr(env, triop->arg1);
   5275             HReg argR = iselNeonExpr(env, triop->arg2);
   5276             UInt imm4;
   5277             if (triop->arg3->tag != Iex_Const ||
   5278                 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
   5279                vpanic("ARM target supports Iop_ExtractV128 with constant "
   5280                       "third argument less than 16 only\n");
   5281             }
   5282             imm4 = triop->arg3->Iex.Const.con->Ico.U8;
   5283             if (imm4 >= 16) {
   5284                vpanic("ARM target supports Iop_ExtractV128 with constant "
   5285                       "third argument less than 16 only\n");
   5286             }
   5287             addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
   5288                                            res, argL, argR, imm4, True));
   5289             return res;
   5290          }
   5291          default:
   5292             break;
   5293       }
   5294    }
   5295 
   5296    if (e->tag == Iex_Mux0X) {
   5297       HReg r8;
   5298       HReg rX  = iselNeonExpr(env, e->Iex.Mux0X.exprX);
   5299       HReg r0  = iselNeonExpr(env, e->Iex.Mux0X.expr0);
   5300       HReg dst = newVRegV(env);
   5301       addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, rX, 4, True));
   5302       r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
   5303       addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
   5304                                       ARMRI84_I84(0xFF,0)));
   5305       addInstr(env, ARMInstr_NCMovQ(ARMcc_EQ, dst, r0));
   5306       return dst;
   5307    }
   5308 
   5309   neon_expr_bad:
   5310    ppIRExpr(e);
   5311    vpanic("iselNeonExpr_wrk");
   5312 }
   5313 
   5314 /*---------------------------------------------------------*/
   5315 /*--- ISEL: Floating point expressions (64 bit)         ---*/
   5316 /*---------------------------------------------------------*/
   5317 
   5318 /* Compute a 64-bit floating point value into a register, the identity
   5319    of which is returned.  As with iselIntExpr_R, the reg may be either
   5320    real or virtual; in any case it must not be changed by subsequent
   5321    code emitted by the caller.  */
   5322 
   5323 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
   5324 {
   5325    HReg r = iselDblExpr_wrk( env, e );
   5326 #  if 0
   5327    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   5328 #  endif
   5329    vassert(hregClass(r) == HRcFlt64);
   5330    vassert(hregIsVirtual(r));
   5331    return r;
   5332 }
   5333 
   5334 /* DO NOT CALL THIS DIRECTLY */
   5335 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
   5336 {
   5337    IRType ty = typeOfIRExpr(env->type_env,e);
   5338    vassert(e);
   5339    vassert(ty == Ity_F64);
   5340 
   5341    if (e->tag == Iex_RdTmp) {
   5342       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
   5343    }
   5344 
   5345    if (e->tag == Iex_Const) {
   5346       /* Just handle the zero case. */
   5347       IRConst* con = e->Iex.Const.con;
   5348       if (con->tag == Ico_F64i && con->Ico.F64i == 0ULL) {
   5349          HReg z32 = newVRegI(env);
   5350          HReg dst = newVRegD(env);
   5351          addInstr(env, ARMInstr_Imm32(z32, 0));
   5352          addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, z32, z32));
   5353          return dst;
   5354       }
   5355    }
   5356 
   5357    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
   5358       ARMAModeV* am;
   5359       HReg res = newVRegD(env);
   5360       vassert(e->Iex.Load.ty == Ity_F64);
   5361       am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
   5362       addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
   5363       return res;
   5364    }
   5365 
   5366    if (e->tag == Iex_Get) {
   5367       // XXX This won't work if offset > 1020 or is not 0 % 4.
   5368       // In which case we'll have to generate more longwinded code.
   5369       ARMAModeV* am  = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
   5370       HReg       res = newVRegD(env);
   5371       addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
   5372       return res;
   5373    }
   5374 
   5375    if (e->tag == Iex_Unop) {
   5376       switch (e->Iex.Unop.op) {
   5377          case Iop_ReinterpI64asF64: {
   5378             if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
   5379                return iselNeon64Expr(env, e->Iex.Unop.arg);
   5380             } else {
   5381                HReg srcHi, srcLo;
   5382                HReg dst = newVRegD(env);
   5383                iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
   5384                addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
   5385                return dst;
   5386             }
   5387          }
   5388          case Iop_NegF64: {
   5389             HReg src = iselDblExpr(env, e->Iex.Unop.arg);
   5390             HReg dst = newVRegD(env);
   5391             addInstr(env, ARMInstr_VUnaryD(ARMvfpu_NEG, dst, src));
   5392             return dst;
   5393          }
   5394          case Iop_AbsF64: {
   5395             HReg src = iselDblExpr(env, e->Iex.Unop.arg);
   5396             HReg dst = newVRegD(env);
   5397             addInstr(env, ARMInstr_VUnaryD(ARMvfpu_ABS, dst, src));
   5398             return dst;
   5399          }
   5400          case Iop_F32toF64: {
   5401             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
   5402             HReg dst = newVRegD(env);
   5403             addInstr(env, ARMInstr_VCvtSD(True/*sToD*/, dst, src));
   5404             return dst;
   5405          }
   5406          case Iop_I32UtoF64:
   5407          case Iop_I32StoF64: {
   5408             HReg src   = iselIntExpr_R(env, e->Iex.Unop.arg);
   5409             HReg f32   = newVRegF(env);
   5410             HReg dst   = newVRegD(env);
   5411             Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
   5412             /* VMOV f32, src */
   5413             addInstr(env, ARMInstr_VXferS(True/*toS*/, f32, src));
   5414             /* FSITOD dst, f32 */
   5415             addInstr(env, ARMInstr_VCvtID(True/*iToD*/, syned,
   5416                                           dst, f32));
   5417             return dst;
   5418          }
   5419          default:
   5420             break;
   5421       }
   5422    }
   5423 
   5424    if (e->tag == Iex_Binop) {
   5425       switch (e->Iex.Binop.op) {
   5426          case Iop_SqrtF64: {
   5427             /* first arg is rounding mode; we ignore it. */
   5428             HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
   5429             HReg dst = newVRegD(env);
   5430             addInstr(env, ARMInstr_VUnaryD(ARMvfpu_SQRT, dst, src));
   5431             return dst;
   5432          }
   5433          default:
   5434             break;
   5435       }
   5436    }
   5437 
   5438    if (e->tag == Iex_Triop) {
   5439       IRTriop *triop = e->Iex.Triop.details;
   5440 
   5441       switch (triop->op) {
   5442          case Iop_DivF64:
   5443          case Iop_MulF64:
   5444          case Iop_AddF64:
   5445          case Iop_SubF64: {
   5446             ARMVfpOp op = 0; /*INVALID*/
   5447             HReg argL = iselDblExpr(env, triop->arg2);
   5448             HReg argR = iselDblExpr(env, triop->arg3);
   5449             HReg dst  = newVRegD(env);
   5450             switch (triop->op) {
   5451                case Iop_DivF64: op = ARMvfp_DIV; break;
   5452                case Iop_MulF64: op = ARMvfp_MUL; break;
   5453                case Iop_AddF64: op = ARMvfp_ADD; break;
   5454                case Iop_SubF64: op = ARMvfp_SUB; break;
   5455                default: vassert(0);
   5456             }
   5457             addInstr(env, ARMInstr_VAluD(op, dst, argL, argR));
   5458             return dst;
   5459          }
   5460          default:
   5461             break;
   5462       }
   5463    }
   5464 
   5465    if (e->tag == Iex_Mux0X) {
   5466       if (ty == Ity_F64
   5467           && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
   5468          HReg r8;
   5469          HReg rX  = iselDblExpr(env, e->Iex.Mux0X.exprX);
   5470          HReg r0  = iselDblExpr(env, e->Iex.Mux0X.expr0);
   5471          HReg dst = newVRegD(env);
   5472          addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, rX));
   5473          r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
   5474          addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
   5475                                          ARMRI84_I84(0xFF,0)));
   5476          addInstr(env, ARMInstr_VCMovD(ARMcc_EQ, dst, r0));
   5477          return dst;
   5478       }
   5479    }
   5480 
   5481    ppIRExpr(e);
   5482    vpanic("iselDblExpr_wrk");
   5483 }
   5484 
   5485 
   5486 /*---------------------------------------------------------*/
   5487 /*--- ISEL: Floating point expressions (32 bit)         ---*/
   5488 /*---------------------------------------------------------*/
   5489 
   5490 /* Compute a 64-bit floating point value into a register, the identity
   5491    of which is returned.  As with iselIntExpr_R, the reg may be either
   5492    real or virtual; in any case it must not be changed by subsequent
   5493    code emitted by the caller.  */
   5494 
   5495 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
   5496 {
   5497    HReg r = iselFltExpr_wrk( env, e );
   5498 #  if 0
   5499    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   5500 #  endif
   5501    vassert(hregClass(r) == HRcFlt32);
   5502    vassert(hregIsVirtual(r));
   5503    return r;
   5504 }
   5505 
   5506 /* DO NOT CALL THIS DIRECTLY */
   5507 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
   5508 {
   5509    IRType ty = typeOfIRExpr(env->type_env,e);
   5510    vassert(e);
   5511    vassert(ty == Ity_F32);
   5512 
   5513    if (e->tag == Iex_RdTmp) {
   5514       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
   5515    }
   5516 
   5517    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
   5518       ARMAModeV* am;
   5519       HReg res = newVRegF(env);
   5520       vassert(e->Iex.Load.ty == Ity_F32);
   5521       am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
   5522       addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
   5523       return res;
   5524    }
   5525 
   5526    if (e->tag == Iex_Get) {
   5527       // XXX This won't work if offset > 1020 or is not 0 % 4.
   5528       // In which case we'll have to generate more longwinded code.
   5529       ARMAModeV* am  = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
   5530       HReg       res = newVRegF(env);
   5531       addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
   5532       return res;
   5533    }
   5534 
   5535    if (e->tag == Iex_Unop) {
   5536       switch (e->Iex.Unop.op) {
   5537          case Iop_ReinterpI32asF32: {
   5538             HReg dst = newVRegF(env);
   5539             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   5540             addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
   5541             return dst;
   5542          }
   5543          case Iop_NegF32: {
   5544             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
   5545             HReg dst = newVRegF(env);
   5546             addInstr(env, ARMInstr_VUnaryS(ARMvfpu_NEG, dst, src));
   5547             return dst;
   5548          }
   5549          case Iop_AbsF32: {
   5550             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
   5551             HReg dst = newVRegF(env);
   5552             addInstr(env, ARMInstr_VUnaryS(ARMvfpu_ABS, dst, src));
   5553             return dst;
   5554          }
   5555          default:
   5556             break;
   5557       }
   5558    }
   5559 
   5560    if (e->tag == Iex_Binop) {
   5561       switch (e->Iex.Binop.op) {
   5562          case Iop_SqrtF32: {
   5563             /* first arg is rounding mode; we ignore it. */
   5564             HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
   5565             HReg dst = newVRegF(env);
   5566             addInstr(env, ARMInstr_VUnaryS(ARMvfpu_SQRT, dst, src));
   5567             return dst;
   5568          }
   5569          case Iop_F64toF32: {
   5570             HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
   5571             set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
   5572             HReg valS = newVRegF(env);
   5573             /* FCVTSD valS, valD */
   5574             addInstr(env, ARMInstr_VCvtSD(False/*!sToD*/, valS, valD));
   5575             set_VFP_rounding_default(env);
   5576             return valS;
   5577          }
   5578          default:
   5579             break;
   5580       }
   5581    }
   5582 
   5583    if (e->tag == Iex_Triop) {
   5584       IRTriop *triop = e->Iex.Triop.details;
   5585 
   5586       switch (triop->op) {
   5587          case Iop_DivF32:
   5588          case Iop_MulF32:
   5589          case Iop_AddF32:
   5590          case Iop_SubF32: {
   5591             ARMVfpOp op = 0; /*INVALID*/
   5592             HReg argL = iselFltExpr(env, triop->arg2);
   5593             HReg argR = iselFltExpr(env, triop->arg3);
   5594             HReg dst  = newVRegF(env);
   5595             switch (triop->op) {
   5596                case Iop_DivF32: op = ARMvfp_DIV; break;
   5597                case Iop_MulF32: op = ARMvfp_MUL; break;
   5598                case Iop_AddF32: op = ARMvfp_ADD; break;
   5599                case Iop_SubF32: op = ARMvfp_SUB; break;
   5600                default: vassert(0);
   5601             }
   5602             addInstr(env, ARMInstr_VAluS(op, dst, argL, argR));
   5603             return dst;
   5604          }
   5605          default:
   5606             break;
   5607       }
   5608    }
   5609 
   5610    if (e->tag == Iex_Mux0X) {
   5611       if (ty == Ity_F32
   5612           && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
   5613          HReg r8;
   5614          HReg rX  = iselFltExpr(env, e->Iex.Mux0X.exprX);
   5615          HReg r0  = iselFltExpr(env, e->Iex.Mux0X.expr0);
   5616          HReg dst = newVRegF(env);
   5617          addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, rX));
   5618          r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
   5619          addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
   5620                                          ARMRI84_I84(0xFF,0)));
   5621          addInstr(env, ARMInstr_VCMovS(ARMcc_EQ, dst, r0));
   5622          return dst;
   5623       }
   5624    }
   5625 
   5626    ppIRExpr(e);
   5627    vpanic("iselFltExpr_wrk");
   5628 }
   5629 
   5630 
   5631 /*---------------------------------------------------------*/
   5632 /*--- ISEL: Statements                                  ---*/
   5633 /*---------------------------------------------------------*/
   5634 
   5635 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
   5636 {
   5637    if (vex_traceflags & VEX_TRACE_VCODE) {
   5638       vex_printf("\n-- ");
   5639       ppIRStmt(stmt);
   5640       vex_printf("\n");
   5641    }
   5642    switch (stmt->tag) {
   5643 
   5644    /* --------- STORE --------- */
   5645    /* little-endian write to memory */
   5646    case Ist_Store: {
   5647       IRType    tya  = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
   5648       IRType    tyd  = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
   5649       IREndness end  = stmt->Ist.Store.end;
   5650 
   5651       if (tya != Ity_I32 || end != Iend_LE)
   5652          goto stmt_fail;
   5653 
   5654       if (tyd == Ity_I32) {
   5655          HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
   5656          ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
   5657          addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rD, am));
   5658          return;
   5659       }
   5660       if (tyd == Ity_I16) {
   5661          HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
   5662          ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
   5663          addInstr(env, ARMInstr_LdSt16(False/*!isLoad*/,
   5664                                        False/*!isSignedLoad*/, rD, am));
   5665          return;
   5666       }
   5667       if (tyd == Ity_I8) {
   5668          HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
   5669          ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
   5670          addInstr(env, ARMInstr_LdSt8U(False/*!isLoad*/, rD, am));
   5671          return;
   5672       }
   5673       if (tyd == Ity_I64) {
   5674          if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
   5675             HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
   5676             ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
   5677             addInstr(env, ARMInstr_NLdStD(False, dD, am));
   5678          } else {
   5679             HReg rDhi, rDlo, rA;
   5680             iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
   5681             rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
   5682             addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDhi,
   5683                                           ARMAMode1_RI(rA,4)));
   5684             addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDlo,
   5685                                           ARMAMode1_RI(rA,0)));
   5686          }
   5687          return;
   5688       }
   5689       if (tyd == Ity_F64) {
   5690          HReg       dD = iselDblExpr(env, stmt->Ist.Store.data);
   5691          ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
   5692          addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
   5693          return;
   5694       }
   5695       if (tyd == Ity_F32) {
   5696          HReg       fD = iselFltExpr(env, stmt->Ist.Store.data);
   5697          ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
   5698          addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
   5699          return;
   5700       }
   5701       if (tyd == Ity_V128) {
   5702          HReg       qD = iselNeonExpr(env, stmt->Ist.Store.data);
   5703          ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
   5704          addInstr(env, ARMInstr_NLdStQ(False, qD, am));
   5705          return;
   5706       }
   5707 
   5708       break;
   5709    }
   5710 
   5711    /* --------- PUT --------- */
   5712    /* write guest state, fixed offset */
   5713    case Ist_Put: {
   5714        IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
   5715 
   5716        if (tyd == Ity_I32) {
   5717            HReg       rD = iselIntExpr_R(env, stmt->Ist.Put.data);
   5718            ARMAMode1* am = ARMAMode1_RI(hregARM_R8(), stmt->Ist.Put.offset);
   5719            addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rD, am));
   5720            return;
   5721        }
   5722        if (tyd == Ity_I64) {
   5723           if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
   5724              HReg addr = newVRegI(env);
   5725              HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
   5726              addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
   5727                                                 stmt->Ist.Put.offset));
   5728              addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
   5729           } else {
   5730              HReg rDhi, rDlo;
   5731              ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
   5732                                            stmt->Ist.Put.offset + 0);
   5733              ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
   5734                                            stmt->Ist.Put.offset + 4);
   5735              iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
   5736              addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDhi, am4));
   5737              addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDlo, am0));
   5738           }
   5739           return;
   5740        }
   5741        if (tyd == Ity_F64) {
   5742           // XXX This won't work if offset > 1020 or is not 0 % 4.
   5743           // In which case we'll have to generate more longwinded code.
   5744           ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
   5745           HReg       rD = iselDblExpr(env, stmt->Ist.Put.data);
   5746           addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
   5747           return;
   5748        }
   5749        if (tyd == Ity_F32) {
   5750           // XXX This won't work if offset > 1020 or is not 0 % 4.
   5751           // In which case we'll have to generate more longwinded code.
   5752           ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
   5753           HReg       rD = iselFltExpr(env, stmt->Ist.Put.data);
   5754           addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
   5755           return;
   5756        }
   5757        if (tyd == Ity_V128) {
   5758           HReg addr = newVRegI(env);
   5759           HReg qD = iselNeonExpr(env, stmt->Ist.Put.data);
   5760           addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
   5761                                        stmt->Ist.Put.offset));
   5762           addInstr(env, ARMInstr_NLdStQ(False, qD, mkARMAModeN_R(addr)));
   5763           return;
   5764        }
   5765        break;
   5766    }
   5767 
   5768 //zz   /* --------- Indexed PUT --------- */
   5769 //zz   /* write guest state, run-time offset */
   5770 //zz   case Ist_PutI: {
   5771 //zz      ARMAMode2* am2
   5772 //zz           = genGuestArrayOffset(
   5773 //zz               env, stmt->Ist.PutI.descr,
   5774 //zz               stmt->Ist.PutI.ix, stmt->Ist.PutI.bias );
   5775 //zz
   5776 //zz       IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data);
   5777 //zz
   5778 //zz       if (tyd == Ity_I8) {
   5779 //zz           HReg reg = iselIntExpr_R(env, stmt->Ist.PutI.data);
   5780 //zz           addInstr(env, ARMInstr_StoreB(reg, am2));
   5781 //zz           return;
   5782 //zz       }
   5783 //zz// CAB: Ity_I32, Ity_I16 ?
   5784 //zz       break;
   5785 //zz   }
   5786 
   5787    /* --------- TMP --------- */
   5788    /* assign value to temporary */
   5789    case Ist_WrTmp: {
   5790       IRTemp tmp = stmt->Ist.WrTmp.tmp;
   5791       IRType ty = typeOfIRTemp(env->type_env, tmp);
   5792 
   5793       if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
   5794          ARMRI84* ri84 = iselIntExpr_RI84(NULL, False,
   5795                                           env, stmt->Ist.WrTmp.data);
   5796          HReg     dst  = lookupIRTemp(env, tmp);
   5797          addInstr(env, ARMInstr_Mov(dst,ri84));
   5798          return;
   5799       }
   5800       if (ty == Ity_I1) {
   5801          HReg        dst  = lookupIRTemp(env, tmp);
   5802          ARMCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
   5803          addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
   5804          addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
   5805          return;
   5806       }
   5807       if (ty == Ity_I64) {
   5808          if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
   5809             HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data);
   5810             HReg dst = lookupIRTemp(env, tmp);
   5811             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False));
   5812          } else {
   5813             HReg rHi, rLo, dstHi, dstLo;
   5814             iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
   5815             lookupIRTemp64( &dstHi, &dstLo, env, tmp);
   5816             addInstr(env, mk_iMOVds_RR(dstHi, rHi) );
   5817             addInstr(env, mk_iMOVds_RR(dstLo, rLo) );
   5818          }
   5819          return;
   5820       }
   5821       if (ty == Ity_F64) {
   5822          HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
   5823          HReg dst = lookupIRTemp(env, tmp);
   5824          addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, src));
   5825          return;
   5826       }
   5827       if (ty == Ity_F32) {
   5828          HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
   5829          HReg dst = lookupIRTemp(env, tmp);
   5830          addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, src));
   5831          return;
   5832       }
   5833       if (ty == Ity_V128) {
   5834          HReg src = iselNeonExpr(env, stmt->Ist.WrTmp.data);
   5835          HReg dst = lookupIRTemp(env, tmp);
   5836          addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, True));
   5837          return;
   5838       }
   5839       break;
   5840    }
   5841 
   5842    /* --------- Call to DIRTY helper --------- */
   5843    /* call complex ("dirty") helper function */
   5844    case Ist_Dirty: {
   5845       IRType   retty;
   5846       IRDirty* d = stmt->Ist.Dirty.details;
   5847       Bool     passBBP = False;
   5848 
   5849       if (d->nFxState == 0)
   5850          vassert(!d->needsBBP);
   5851 
   5852       passBBP = toBool(d->nFxState > 0 && d->needsBBP);
   5853 
   5854       /* Marshal args, do the call, clear stack. */
   5855       Bool ok = doHelperCall( env, passBBP, d->guard, d->cee, d->args );
   5856       if (!ok)
   5857          break; /* will go to stmt_fail: */
   5858 
   5859       /* Now figure out what to do with the returned value, if any. */
   5860       if (d->tmp == IRTemp_INVALID)
   5861          /* No return value.  Nothing to do. */
   5862          return;
   5863 
   5864       retty = typeOfIRTemp(env->type_env, d->tmp);
   5865 
   5866       if (retty == Ity_I64) {
   5867          if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
   5868             HReg tmp = lookupIRTemp(env, d->tmp);
   5869             addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(),
   5870                                                      hregARM_R0()));
   5871          } else {
   5872             HReg dstHi, dstLo;
   5873             /* The returned value is in r1:r0.  Park it in the
   5874                register-pair associated with tmp. */
   5875             lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
   5876             addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R1()) );
   5877             addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R0()) );
   5878          }
   5879          return;
   5880       }
   5881       if (retty == Ity_I32 || retty == Ity_I16 || retty == Ity_I8) {
   5882          /* The returned value is in r0.  Park it in the register
   5883             associated with tmp. */
   5884          HReg dst = lookupIRTemp(env, d->tmp);
   5885          addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()) );
   5886          return;
   5887       }
   5888 
   5889       break;
   5890    }
   5891 
   5892    /* --------- Load Linked and Store Conditional --------- */
   5893    case Ist_LLSC: {
   5894       if (stmt->Ist.LLSC.storedata == NULL) {
   5895          /* LL */
   5896          IRTemp res = stmt->Ist.LLSC.result;
   5897          IRType ty  = typeOfIRTemp(env->type_env, res);
   5898          if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
   5899             Int  szB   = 0;
   5900             HReg r_dst = lookupIRTemp(env, res);
   5901             HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
   5902             switch (ty) {
   5903                case Ity_I8:  szB = 1; break;
   5904                case Ity_I16: szB = 2; break;
   5905                case Ity_I32: szB = 4; break;
   5906                default:      vassert(0);
   5907             }
   5908             addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
   5909             addInstr(env, ARMInstr_LdrEX(szB));
   5910             addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2()));
   5911             return;
   5912          }
   5913          if (ty == Ity_I64) {
   5914             HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
   5915             addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
   5916             addInstr(env, ARMInstr_LdrEX(8));
   5917             /* Result is in r3:r2.  On a non-NEON capable CPU, we must
   5918                move it into a result register pair.  On a NEON capable
   5919                CPU, the result register will be a 64 bit NEON
   5920                register, so we must move it there instead. */
   5921             if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
   5922                HReg dst = lookupIRTemp(env, res);
   5923                addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(),
   5924                                                         hregARM_R2()));
   5925             } else {
   5926                HReg r_dst_hi, r_dst_lo;
   5927                lookupIRTemp64(&r_dst_hi, &r_dst_lo, env, res);
   5928                addInstr(env, mk_iMOVds_RR(r_dst_lo, hregARM_R2()));
   5929                addInstr(env, mk_iMOVds_RR(r_dst_hi, hregARM_R3()));
   5930             }
   5931             return;
   5932          }
   5933          /*NOTREACHED*/
   5934          vassert(0);
   5935       } else {
   5936          /* SC */
   5937          IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
   5938          if (tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
   5939             Int  szB = 0;
   5940             HReg rD  = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
   5941             HReg rA  = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
   5942             switch (tyd) {
   5943                case Ity_I8:  szB = 1; break;
   5944                case Ity_I16: szB = 2; break;
   5945                case Ity_I32: szB = 4; break;
   5946                default:      vassert(0);
   5947             }
   5948             addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD));
   5949             addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
   5950             addInstr(env, ARMInstr_StrEX(szB));
   5951          } else {
   5952             vassert(tyd == Ity_I64);
   5953             /* This is really ugly.  There is no is/is-not NEON
   5954                decision akin to the case for LL, because iselInt64Expr
   5955                fudges this for us, and always gets the result into two
   5956                GPRs even if this means moving it from a NEON
   5957                register. */
   5958             HReg rDhi, rDlo;
   5959             iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.LLSC.storedata);
   5960             HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
   5961             addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDlo));
   5962             addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDhi));
   5963             addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
   5964             addInstr(env, ARMInstr_StrEX(8));
   5965          }
   5966          /* now r0 is 1 if failed, 0 if success.  Change to IR
   5967             conventions (0 is fail, 1 is success).  Also transfer
   5968             result to r_res. */
   5969          IRTemp   res   = stmt->Ist.LLSC.result;
   5970          IRType   ty    = typeOfIRTemp(env->type_env, res);
   5971          HReg     r_res = lookupIRTemp(env, res);
   5972          ARMRI84* one   = ARMRI84_I84(1,0);
   5973          vassert(ty == Ity_I1);
   5974          addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one));
   5975          /* And be conservative -- mask off all but the lowest bit */
   5976          addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
   5977          return;
   5978       }
   5979       break;
   5980    }
   5981 
   5982    /* --------- MEM FENCE --------- */
   5983    case Ist_MBE:
   5984       switch (stmt->Ist.MBE.event) {
   5985          case Imbe_Fence:
   5986             addInstr(env, ARMInstr_MFence());
   5987             return;
   5988          case Imbe_CancelReservation:
   5989             addInstr(env, ARMInstr_CLREX());
   5990             return;
   5991          default:
   5992             break;
   5993       }
   5994       break;
   5995 
   5996    /* --------- INSTR MARK --------- */
   5997    /* Doesn't generate any executable code ... */
   5998    case Ist_IMark:
   5999        return;
   6000 
   6001    /* --------- NO-OP --------- */
   6002    case Ist_NoOp:
   6003        return;
   6004 
   6005    /* --------- EXIT --------- */
   6006    case Ist_Exit: {
   6007       if (stmt->Ist.Exit.dst->tag != Ico_U32)
   6008          vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value");
   6009 
   6010       ARMCondCode cc     = iselCondCode(env, stmt->Ist.Exit.guard);
   6011       ARMAMode1*  amR15T = ARMAMode1_RI(hregARM_R8(),
   6012                                         stmt->Ist.Exit.offsIP);
   6013 
   6014       /* Case: boring transfer to known address */
   6015       if (stmt->Ist.Exit.jk == Ijk_Boring
   6016           || stmt->Ist.Exit.jk == Ijk_Call
   6017           || stmt->Ist.Exit.jk == Ijk_Ret) {
   6018          if (env->chainingAllowed) {
   6019             /* .. almost always true .. */
   6020             /* Skip the event check at the dst if this is a forwards
   6021                edge. */
   6022             Bool toFastEP
   6023                = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga;
   6024             if (0) vex_printf("%s", toFastEP ? "Y" : ",");
   6025             addInstr(env, ARMInstr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
   6026                                            amR15T, cc, toFastEP));
   6027          } else {
   6028             /* .. very occasionally .. */
   6029             /* We can't use chaining, so ask for an assisted transfer,
   6030                as that's the only alternative that is allowable. */
   6031             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
   6032             addInstr(env, ARMInstr_XAssisted(r, amR15T, cc, Ijk_Boring));
   6033          }
   6034          return;
   6035       }
   6036 
   6037       /* Case: assisted transfer to arbitrary address */
   6038       switch (stmt->Ist.Exit.jk) {
   6039          /* Keep this list in sync with that in iselNext below */
   6040          case Ijk_ClientReq:
   6041          case Ijk_NoDecode:
   6042          case Ijk_NoRedir:
   6043          case Ijk_Sys_syscall:
   6044          case Ijk_TInval:
   6045          {
   6046             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
   6047             addInstr(env, ARMInstr_XAssisted(r, amR15T, cc,
   6048                                              stmt->Ist.Exit.jk));
   6049             return;
   6050          }
   6051          default:
   6052             break;
   6053       }
   6054 
   6055       /* Do we ever expect to see any other kind? */
   6056       goto stmt_fail;
   6057    }
   6058 
   6059    default: break;
   6060    }
   6061   stmt_fail:
   6062    ppIRStmt(stmt);
   6063    vpanic("iselStmt");
   6064 }
   6065 
   6066 
   6067 /*---------------------------------------------------------*/
   6068 /*--- ISEL: Basic block terminators (Nexts)             ---*/
   6069 /*---------------------------------------------------------*/
   6070 
   6071 static void iselNext ( ISelEnv* env,
   6072                        IRExpr* next, IRJumpKind jk, Int offsIP )
   6073 {
   6074    if (vex_traceflags & VEX_TRACE_VCODE) {
   6075       vex_printf( "\n-- PUT(%d) = ", offsIP);
   6076       ppIRExpr( next );
   6077       vex_printf( "; exit-");
   6078       ppIRJumpKind(jk);
   6079       vex_printf( "\n");
   6080    }
   6081 
   6082    /* Case: boring transfer to known address */
   6083    if (next->tag == Iex_Const) {
   6084       IRConst* cdst = next->Iex.Const.con;
   6085       vassert(cdst->tag == Ico_U32);
   6086       if (jk == Ijk_Boring || jk == Ijk_Call) {
   6087          /* Boring transfer to known address */
   6088          ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
   6089          if (env->chainingAllowed) {
   6090             /* .. almost always true .. */
   6091             /* Skip the event check at the dst if this is a forwards
   6092                edge. */
   6093             Bool toFastEP
   6094                = ((Addr64)cdst->Ico.U32) > env->max_ga;
   6095             if (0) vex_printf("%s", toFastEP ? "X" : ".");
   6096             addInstr(env, ARMInstr_XDirect(cdst->Ico.U32,
   6097                                            amR15T, ARMcc_AL,
   6098                                            toFastEP));
   6099          } else {
   6100             /* .. very occasionally .. */
   6101             /* We can't use chaining, so ask for an assisted transfer,
   6102                as that's the only alternative that is allowable. */
   6103             HReg r = iselIntExpr_R(env, next);
   6104             addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
   6105                                              Ijk_Boring));
   6106          }
   6107          return;
   6108       }
   6109    }
   6110 
   6111    /* Case: call/return (==boring) transfer to any address */
   6112    switch (jk) {
   6113       case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
   6114          HReg       r      = iselIntExpr_R(env, next);
   6115          ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
   6116          if (env->chainingAllowed) {
   6117             addInstr(env, ARMInstr_XIndir(r, amR15T, ARMcc_AL));
   6118          } else {
   6119             addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
   6120                                                 Ijk_Boring));
   6121          }
   6122          return;
   6123       }
   6124       default:
   6125          break;
   6126    }
   6127 
   6128    /* Case: assisted transfer to arbitrary address */
   6129    switch (jk) {
   6130       /* Keep this list in sync with that for Ist_Exit above */
   6131       case Ijk_ClientReq:
   6132       case Ijk_NoDecode:
   6133       case Ijk_NoRedir:
   6134       case Ijk_Sys_syscall:
   6135       {
   6136          HReg       r      = iselIntExpr_R(env, next);
   6137          ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
   6138          addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL, jk));
   6139          return;
   6140       }
   6141       default:
   6142          break;
   6143    }
   6144 
   6145    vex_printf( "\n-- PUT(%d) = ", offsIP);
   6146    ppIRExpr( next );
   6147    vex_printf( "; exit-");
   6148    ppIRJumpKind(jk);
   6149    vex_printf( "\n");
   6150    vassert(0); // are we expecting any other kind?
   6151 }
   6152 
   6153 
   6154 /*---------------------------------------------------------*/
   6155 /*--- Insn selector top-level                           ---*/
   6156 /*---------------------------------------------------------*/
   6157 
   6158 /* Translate an entire SB to arm code. */
   6159 
   6160 HInstrArray* iselSB_ARM ( IRSB* bb,
   6161                           VexArch      arch_host,
   6162                           VexArchInfo* archinfo_host,
   6163                           VexAbiInfo*  vbi/*UNUSED*/,
   6164                           Int offs_Host_EvC_Counter,
   6165                           Int offs_Host_EvC_FailAddr,
   6166                           Bool chainingAllowed,
   6167                           Bool addProfInc,
   6168                           Addr64 max_ga )
   6169 {
   6170    Int       i, j;
   6171    HReg      hreg, hregHI;
   6172    ISelEnv*  env;
   6173    UInt      hwcaps_host = archinfo_host->hwcaps;
   6174    ARMAMode1 *amCounter, *amFailAddr;
   6175 
   6176    /* sanity ... */
   6177    vassert(arch_host == VexArchARM);
   6178 
   6179    /* hwcaps should not change from one ISEL call to another. */
   6180    arm_hwcaps = hwcaps_host; // JRS 2012 Mar 31: FIXME (RM)
   6181 
   6182    /* Make up an initial environment to use. */
   6183    env = LibVEX_Alloc(sizeof(ISelEnv));
   6184    env->vreg_ctr = 0;
   6185 
   6186    /* Set up output code array. */
   6187    env->code = newHInstrArray();
   6188 
   6189    /* Copy BB's type env. */
   6190    env->type_env = bb->tyenv;
   6191 
   6192    /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
   6193       change as we go along. */
   6194    env->n_vregmap = bb->tyenv->types_used;
   6195    env->vregmap   = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
   6196    env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
   6197 
   6198    /* and finally ... */
   6199    env->chainingAllowed = chainingAllowed;
   6200    env->hwcaps          = hwcaps_host;
   6201    env->max_ga          = max_ga;
   6202 
   6203    /* For each IR temporary, allocate a suitably-kinded virtual
   6204       register. */
   6205    j = 0;
   6206    for (i = 0; i < env->n_vregmap; i++) {
   6207       hregHI = hreg = INVALID_HREG;
   6208       switch (bb->tyenv->types[i]) {
   6209          case Ity_I1:
   6210          case Ity_I8:
   6211          case Ity_I16:
   6212          case Ity_I32:  hreg   = mkHReg(j++, HRcInt32, True); break;
   6213          case Ity_I64:
   6214             if (hwcaps_host & VEX_HWCAPS_ARM_NEON) {
   6215                hreg = mkHReg(j++, HRcFlt64, True);
   6216             } else {
   6217                hregHI = mkHReg(j++, HRcInt32, True);
   6218                hreg   = mkHReg(j++, HRcInt32, True);
   6219             }
   6220             break;
   6221          case Ity_F32:  hreg   = mkHReg(j++, HRcFlt32, True); break;
   6222          case Ity_F64:  hreg   = mkHReg(j++, HRcFlt64, True); break;
   6223          case Ity_V128: hreg   = mkHReg(j++, HRcVec128, True); break;
   6224          default: ppIRType(bb->tyenv->types[i]);
   6225                   vpanic("iselBB: IRTemp type");
   6226       }
   6227       env->vregmap[i]   = hreg;
   6228       env->vregmapHI[i] = hregHI;
   6229    }
   6230    env->vreg_ctr = j;
   6231 
   6232    /* The very first instruction must be an event check. */
   6233    amCounter  = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_Counter);
   6234    amFailAddr = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_FailAddr);
   6235    addInstr(env, ARMInstr_EvCheck(amCounter, amFailAddr));
   6236 
   6237    /* Possibly a block counter increment (for profiling).  At this
   6238       point we don't know the address of the counter, so just pretend
   6239       it is zero.  It will have to be patched later, but before this
   6240       translation is used, by a call to LibVEX_patchProfCtr. */
   6241    if (addProfInc) {
   6242       addInstr(env, ARMInstr_ProfInc());
   6243    }
   6244 
   6245    /* Ok, finally we can iterate over the statements. */
   6246    for (i = 0; i < bb->stmts_used; i++)
   6247       iselStmt(env, bb->stmts[i]);
   6248 
   6249    iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
   6250 
   6251    /* record the number of vregs we used. */
   6252    env->code->n_vregs = env->vreg_ctr;
   6253    return env->code;
   6254 }
   6255 
   6256 
   6257 /*---------------------------------------------------------------*/
   6258 /*--- end                                     host_arm_isel.c ---*/
   6259 /*---------------------------------------------------------------*/
   6260