Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                   host_arm_isel.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2013 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    NEON support is
     14    Copyright (C) 2010-2013 Samsung Electronics
     15    contributed by Dmitry Zhurikhin <zhur (at) ispras.ru>
     16               and Kirill Batuzov <batuzovk (at) ispras.ru>
     17 
     18    This program is free software; you can redistribute it and/or
     19    modify it under the terms of the GNU General Public License as
     20    published by the Free Software Foundation; either version 2 of the
     21    License, or (at your option) any later version.
     22 
     23    This program is distributed in the hope that it will be useful, but
     24    WITHOUT ANY WARRANTY; without even the implied warranty of
     25    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     26    General Public License for more details.
     27 
     28    You should have received a copy of the GNU General Public License
     29    along with this program; if not, write to the Free Software
     30    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     31    02110-1301, USA.
     32 
     33    The GNU General Public License is contained in the file COPYING.
     34 */
     35 
     36 #include "libvex_basictypes.h"
     37 #include "libvex_ir.h"
     38 #include "libvex.h"
     39 #include "ir_match.h"
     40 
     41 #include "main_util.h"
     42 #include "main_globals.h"
     43 #include "host_generic_regs.h"
     44 #include "host_generic_simd64.h"  // for 32-bit SIMD helpers
     45 #include "host_arm_defs.h"
     46 
     47 
     48 /*---------------------------------------------------------*/
     49 /*--- ARMvfp control word stuff                         ---*/
     50 /*---------------------------------------------------------*/
     51 
     52 /* Vex-generated code expects to run with the FPU set as follows: all
     53    exceptions masked, round-to-nearest, non-vector mode, with the NZCV
     54    flags cleared, and FZ (flush to zero) disabled.  Curiously enough,
     55    this corresponds to a FPSCR value of zero.
     56 
     57    fpscr should therefore be zero on entry to Vex-generated code, and
     58    should be unchanged at exit.  (Or at least the bottom 28 bits
     59    should be zero).
     60 */
     61 
     62 #define DEFAULT_FPSCR 0
     63 
     64 
     65 /*---------------------------------------------------------*/
     66 /*--- ISelEnv                                           ---*/
     67 /*---------------------------------------------------------*/
     68 
     69 /* This carries around:
     70 
     71    - A mapping from IRTemp to IRType, giving the type of any IRTemp we
     72      might encounter.  This is computed before insn selection starts,
     73      and does not change.
     74 
     75    - A mapping from IRTemp to HReg.  This tells the insn selector
     76      which virtual register(s) are associated with each IRTemp
     77      temporary.  This is computed before insn selection starts, and
     78      does not change.  We expect this mapping to map precisely the
     79      same set of IRTemps as the type mapping does.
     80 
     81         - vregmap   holds the primary register for the IRTemp.
     82         - vregmapHI is only used for 64-bit integer-typed
     83              IRTemps.  It holds the identity of a second
     84              32-bit virtual HReg, which holds the high half
     85              of the value.
     86 
     87    - The code array, that is, the insns selected so far.
     88 
     89    - A counter, for generating new virtual registers.
     90 
     91    - The host hardware capabilities word.  This is set at the start
     92      and does not change.
     93 
     94    - A Bool for indicating whether we may generate chain-me
     95      instructions for control flow transfers, or whether we must use
     96      XAssisted.
     97 
     98    - The maximum guest address of any guest insn in this block.
     99      Actually, the address of the highest-addressed byte from any insn
    100      in this block.  Is set at the start and does not change.  This is
    101      used for detecting jumps which are definitely forward-edges from
    102      this block, and therefore can be made (chained) to the fast entry
    103      point of the destination, thereby avoiding the destination's
    104      event check.
    105 
    106    Note, this is all (well, mostly) host-independent.
    107 */
    108 
    109 typedef
    110    struct {
    111       /* Constant -- are set at the start and do not change. */
    112       IRTypeEnv*   type_env;
    113 
    114       HReg*        vregmap;
    115       HReg*        vregmapHI;
    116       Int          n_vregmap;
    117 
    118       UInt         hwcaps;
    119 
    120       Bool         chainingAllowed;
    121       Addr64       max_ga;
    122 
    123       /* These are modified as we go along. */
    124       HInstrArray* code;
    125       Int          vreg_ctr;
    126    }
    127    ISelEnv;
    128 
    129 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
    130 {
    131    vassert(tmp >= 0);
    132    vassert(tmp < env->n_vregmap);
    133    return env->vregmap[tmp];
    134 }
    135 
    136 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
    137 {
    138    vassert(tmp >= 0);
    139    vassert(tmp < env->n_vregmap);
    140    vassert(! hregIsInvalid(env->vregmapHI[tmp]));
    141    *vrLO = env->vregmap[tmp];
    142    *vrHI = env->vregmapHI[tmp];
    143 }
    144 
    145 static void addInstr ( ISelEnv* env, ARMInstr* instr )
    146 {
    147    addHInstr(env->code, instr);
    148    if (vex_traceflags & VEX_TRACE_VCODE) {
    149       ppARMInstr(instr);
    150       vex_printf("\n");
    151    }
    152 #if 0
    153    if (instr->tag == ARMin_NUnary || instr->tag == ARMin_NBinary
    154          || instr->tag == ARMin_NUnaryS || instr->tag == ARMin_NBinaryS
    155          || instr->tag == ARMin_NDual || instr->tag == ARMin_NShift) {
    156       ppARMInstr(instr);
    157       vex_printf("\n");
    158    }
    159 #endif
    160 }
    161 
    162 static HReg newVRegI ( ISelEnv* env )
    163 {
    164    HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/);
    165    env->vreg_ctr++;
    166    return reg;
    167 }
    168 
    169 static HReg newVRegD ( ISelEnv* env )
    170 {
    171    HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
    172    env->vreg_ctr++;
    173    return reg;
    174 }
    175 
    176 static HReg newVRegF ( ISelEnv* env )
    177 {
    178    HReg reg = mkHReg(env->vreg_ctr, HRcFlt32, True/*virtual reg*/);
    179    env->vreg_ctr++;
    180    return reg;
    181 }
    182 
    183 static HReg newVRegV ( ISelEnv* env )
    184 {
    185    HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
    186    env->vreg_ctr++;
    187    return reg;
    188 }
    189 
    190 /* These are duplicated in guest_arm_toIR.c */
    191 static IRExpr* unop ( IROp op, IRExpr* a )
    192 {
    193    return IRExpr_Unop(op, a);
    194 }
    195 
    196 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
    197 {
    198    return IRExpr_Binop(op, a1, a2);
    199 }
    200 
    201 static IRExpr* bind ( Int binder )
    202 {
    203    return IRExpr_Binder(binder);
    204 }
    205 
    206 
    207 /*---------------------------------------------------------*/
    208 /*--- ISEL: Forward declarations                        ---*/
    209 /*---------------------------------------------------------*/
    210 
    211 /* These are organised as iselXXX and iselXXX_wrk pairs.  The
    212    iselXXX_wrk do the real work, but are not to be called directly.
    213    For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
    214    checks that all returned registers are virtual.  You should not
    215    call the _wrk version directly.
    216 */
    217 static ARMAMode1*  iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e );
    218 static ARMAMode1*  iselIntExpr_AMode1     ( ISelEnv* env, IRExpr* e );
    219 
    220 static ARMAMode2*  iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e );
    221 static ARMAMode2*  iselIntExpr_AMode2     ( ISelEnv* env, IRExpr* e );
    222 
    223 static ARMAModeV*  iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e );
    224 static ARMAModeV*  iselIntExpr_AModeV     ( ISelEnv* env, IRExpr* e );
    225 
    226 static ARMAModeN*  iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e );
    227 static ARMAModeN*  iselIntExpr_AModeN     ( ISelEnv* env, IRExpr* e );
    228 
    229 static ARMRI84*    iselIntExpr_RI84_wrk
    230         ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
    231 static ARMRI84*    iselIntExpr_RI84
    232         ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
    233 
    234 static ARMRI5*     iselIntExpr_RI5_wrk    ( ISelEnv* env, IRExpr* e );
    235 static ARMRI5*     iselIntExpr_RI5        ( ISelEnv* env, IRExpr* e );
    236 
    237 static ARMCondCode iselCondCode_wrk       ( ISelEnv* env, IRExpr* e );
    238 static ARMCondCode iselCondCode           ( ISelEnv* env, IRExpr* e );
    239 
    240 static HReg        iselIntExpr_R_wrk      ( ISelEnv* env, IRExpr* e );
    241 static HReg        iselIntExpr_R          ( ISelEnv* env, IRExpr* e );
    242 
    243 static void        iselInt64Expr_wrk      ( HReg* rHi, HReg* rLo,
    244                                             ISelEnv* env, IRExpr* e );
    245 static void        iselInt64Expr          ( HReg* rHi, HReg* rLo,
    246                                             ISelEnv* env, IRExpr* e );
    247 
    248 static HReg        iselDblExpr_wrk        ( ISelEnv* env, IRExpr* e );
    249 static HReg        iselDblExpr            ( ISelEnv* env, IRExpr* e );
    250 
    251 static HReg        iselFltExpr_wrk        ( ISelEnv* env, IRExpr* e );
    252 static HReg        iselFltExpr            ( ISelEnv* env, IRExpr* e );
    253 
    254 static HReg        iselNeon64Expr_wrk     ( ISelEnv* env, IRExpr* e );
    255 static HReg        iselNeon64Expr         ( ISelEnv* env, IRExpr* e );
    256 
    257 static HReg        iselNeonExpr_wrk       ( ISelEnv* env, IRExpr* e );
    258 static HReg        iselNeonExpr           ( ISelEnv* env, IRExpr* e );
    259 
    260 /*---------------------------------------------------------*/
    261 /*--- ISEL: Misc helpers                                ---*/
    262 /*---------------------------------------------------------*/
    263 
    264 static UInt ROR32 ( UInt x, UInt sh ) {
    265    vassert(sh >= 0 && sh < 32);
    266    if (sh == 0)
    267       return x;
    268    else
    269       return (x << (32-sh)) | (x >> sh);
    270 }
    271 
    272 /* Figure out if 'u' fits in the special shifter-operand 8x4 immediate
    273    form, and if so return the components. */
    274 static Bool fitsIn8x4 ( /*OUT*/UInt* u8, /*OUT*/UInt* u4, UInt u )
    275 {
    276    UInt i;
    277    for (i = 0; i < 16; i++) {
    278       if (0 == (u & 0xFFFFFF00)) {
    279          *u8 = u;
    280          *u4 = i;
    281          return True;
    282       }
    283       u = ROR32(u, 30);
    284    }
    285    vassert(i == 16);
    286    return False;
    287 }
    288 
    289 /* Make a int reg-reg move. */
    290 static ARMInstr* mk_iMOVds_RR ( HReg dst, HReg src )
    291 {
    292    vassert(hregClass(src) == HRcInt32);
    293    vassert(hregClass(dst) == HRcInt32);
    294    return ARMInstr_Mov(dst, ARMRI84_R(src));
    295 }
    296 
    297 /* Set the VFP unit's rounding mode to default (round to nearest). */
    298 static void set_VFP_rounding_default ( ISelEnv* env )
    299 {
    300    /* mov rTmp, #DEFAULT_FPSCR
    301       fmxr fpscr, rTmp
    302    */
    303    HReg rTmp = newVRegI(env);
    304    addInstr(env, ARMInstr_Imm32(rTmp, DEFAULT_FPSCR));
    305    addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, rTmp));
    306 }
    307 
    308 /* Mess with the VFP unit's rounding mode: 'mode' is an I32-typed
    309    expression denoting a value in the range 0 .. 3, indicating a round
    310    mode encoded as per type IRRoundingMode.  Set FPSCR to have the
    311    same rounding.
    312 */
    313 static
    314 void set_VFP_rounding_mode ( ISelEnv* env, IRExpr* mode )
    315 {
    316    /* This isn't simple, because 'mode' carries an IR rounding
    317       encoding, and we need to translate that to an ARMvfp one:
    318       The IR encoding:
    319          00  to nearest (the default)
    320          10  to +infinity
    321          01  to -infinity
    322          11  to zero
    323       The ARMvfp encoding:
    324          00  to nearest
    325          01  to +infinity
    326          10  to -infinity
    327          11  to zero
    328       Easy enough to do; just swap the two bits.
    329    */
    330    HReg irrm = iselIntExpr_R(env, mode);
    331    HReg tL   = newVRegI(env);
    332    HReg tR   = newVRegI(env);
    333    HReg t3   = newVRegI(env);
    334    /* tL = irrm << 1;
    335       tR = irrm >> 1;  if we're lucky, these will issue together
    336       tL &= 2;
    337       tR &= 1;         ditto
    338       t3 = tL | tR;
    339       t3 <<= 22;
    340       fmxr fpscr, t3
    341    */
    342    addInstr(env, ARMInstr_Shift(ARMsh_SHL, tL, irrm, ARMRI5_I5(1)));
    343    addInstr(env, ARMInstr_Shift(ARMsh_SHR, tR, irrm, ARMRI5_I5(1)));
    344    addInstr(env, ARMInstr_Alu(ARMalu_AND, tL, tL, ARMRI84_I84(2,0)));
    345    addInstr(env, ARMInstr_Alu(ARMalu_AND, tR, tR, ARMRI84_I84(1,0)));
    346    addInstr(env, ARMInstr_Alu(ARMalu_OR, t3, tL, ARMRI84_R(tR)));
    347    addInstr(env, ARMInstr_Shift(ARMsh_SHL, t3, t3, ARMRI5_I5(22)));
    348    addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, t3));
    349 }
    350 
    351 
    352 /*---------------------------------------------------------*/
    353 /*--- ISEL: Function call helpers                       ---*/
    354 /*---------------------------------------------------------*/
    355 
    356 /* Used only in doHelperCall.  See big comment in doHelperCall re
    357    handling of register-parameter args.  This function figures out
    358    whether evaluation of an expression might require use of a fixed
    359    register.  If in doubt return True (safe but suboptimal).
    360 */
    361 static
    362 Bool mightRequireFixedRegs ( IRExpr* e )
    363 {
    364    if (UNLIKELY(is_IRExpr_VECRET_or_BBPTR(e))) {
    365       // These are always "safe" -- either a copy of r13(sp) in some
    366       // arbitrary vreg, or a copy of r8, respectively.
    367       return False;
    368    }
    369    /* Else it's a "normal" expression. */
    370    switch (e->tag) {
    371    case Iex_RdTmp: case Iex_Const: case Iex_Get:
    372       return False;
    373    default:
    374       return True;
    375    }
    376 }
    377 
    378 
    379 /* Do a complete function call.  |guard| is a Ity_Bit expression
    380    indicating whether or not the call happens.  If guard==NULL, the
    381    call is unconditional.  |retloc| is set to indicate where the
    382    return value is after the call.  The caller (of this fn) must
    383    generate code to add |stackAdjustAfterCall| to the stack pointer
    384    after the call is done.  Returns True iff it managed to handle this
    385    combination of arg/return types, else returns False. */
    386 
    387 static
    388 Bool doHelperCall ( /*OUT*/UInt*   stackAdjustAfterCall,
    389                     /*OUT*/RetLoc* retloc,
    390                     ISelEnv* env,
    391                     IRExpr* guard,
    392                     IRCallee* cee, IRType retTy, IRExpr** args )
    393 {
    394    ARMCondCode cc;
    395    HReg        argregs[ARM_N_ARGREGS];
    396    HReg        tmpregs[ARM_N_ARGREGS];
    397    Bool        go_fast;
    398    Int         n_args, i, nextArgReg;
    399    ULong       target;
    400 
    401    vassert(ARM_N_ARGREGS == 4);
    402 
    403    /* Set default returns.  We'll update them later if needed. */
    404    *stackAdjustAfterCall = 0;
    405    *retloc               = mk_RetLoc_INVALID();
    406 
    407    /* These are used for cross-checking that IR-level constraints on
    408       the use of IRExpr_VECRET() and IRExpr_BBPTR() are observed. */
    409    UInt nVECRETs = 0;
    410    UInt nBBPTRs  = 0;
    411 
    412    /* Marshal args for a call and do the call.
    413 
    414       This function only deals with a tiny set of possibilities, which
    415       cover all helpers in practice.  The restrictions are that only
    416       arguments in registers are supported, hence only ARM_N_REGPARMS
    417       x 32 integer bits in total can be passed.  In fact the only
    418       supported arg types are I32 and I64.
    419 
    420       The return type can be I{64,32} or V128.  In the V128 case, it
    421       is expected that |args| will contain the special node
    422       IRExpr_VECRET(), in which case this routine generates code to
    423       allocate space on the stack for the vector return value.  Since
    424       we are not passing any scalars on the stack, it is enough to
    425       preallocate the return space before marshalling any arguments,
    426       in this case.
    427 
    428       |args| may also contain IRExpr_BBPTR(), in which case the
    429       value in r8 is passed as the corresponding argument.
    430 
    431       Generating code which is both efficient and correct when
    432       parameters are to be passed in registers is difficult, for the
    433       reasons elaborated in detail in comments attached to
    434       doHelperCall() in priv/host-x86/isel.c.  Here, we use a variant
    435       of the method described in those comments.
    436 
    437       The problem is split into two cases: the fast scheme and the
    438       slow scheme.  In the fast scheme, arguments are computed
    439       directly into the target (real) registers.  This is only safe
    440       when we can be sure that computation of each argument will not
    441       trash any real registers set by computation of any other
    442       argument.
    443 
    444       In the slow scheme, all args are first computed into vregs, and
    445       once they are all done, they are moved to the relevant real
    446       regs.  This always gives correct code, but it also gives a bunch
    447       of vreg-to-rreg moves which are usually redundant but are hard
    448       for the register allocator to get rid of.
    449 
    450       To decide which scheme to use, all argument expressions are
    451       first examined.  If they are all so simple that it is clear they
    452       will be evaluated without use of any fixed registers, use the
    453       fast scheme, else use the slow scheme.  Note also that only
    454       unconditional calls may use the fast scheme, since having to
    455       compute a condition expression could itself trash real
    456       registers.
    457 
    458       Note this requires being able to examine an expression and
    459       determine whether or not evaluation of it might use a fixed
    460       register.  That requires knowledge of how the rest of this insn
    461       selector works.  Currently just the following 3 are regarded as
    462       safe -- hopefully they cover the majority of arguments in
    463       practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
    464    */
    465 
    466    /* Note that the cee->regparms field is meaningless on ARM hosts
    467       (since there is only one calling convention) and so we always
    468       ignore it. */
    469 
    470    n_args = 0;
    471    for (i = 0; args[i]; i++) {
    472       IRExpr* arg = args[i];
    473       if (UNLIKELY(arg->tag == Iex_VECRET)) {
    474          nVECRETs++;
    475       } else if (UNLIKELY(arg->tag == Iex_BBPTR)) {
    476          nBBPTRs++;
    477       }
    478       n_args++;
    479    }
    480 
    481    argregs[0] = hregARM_R0();
    482    argregs[1] = hregARM_R1();
    483    argregs[2] = hregARM_R2();
    484    argregs[3] = hregARM_R3();
    485 
    486    tmpregs[0] = tmpregs[1] = tmpregs[2] =
    487    tmpregs[3] = INVALID_HREG;
    488 
    489    /* First decide which scheme (slow or fast) is to be used.  First
    490       assume the fast scheme, and select slow if any contraindications
    491       (wow) appear. */
    492 
    493    go_fast = True;
    494 
    495    if (guard) {
    496       if (guard->tag == Iex_Const
    497           && guard->Iex.Const.con->tag == Ico_U1
    498           && guard->Iex.Const.con->Ico.U1 == True) {
    499          /* unconditional */
    500       } else {
    501          /* Not manifestly unconditional -- be conservative. */
    502          go_fast = False;
    503       }
    504    }
    505 
    506    if (go_fast) {
    507       for (i = 0; i < n_args; i++) {
    508          if (mightRequireFixedRegs(args[i])) {
    509             go_fast = False;
    510             break;
    511          }
    512       }
    513    }
    514 
    515    if (go_fast) {
    516       if (retTy == Ity_V128 || retTy == Ity_V256)
    517          go_fast = False;
    518    }
    519 
    520    /* At this point the scheme to use has been established.  Generate
    521       code to get the arg values into the argument rregs.  If we run
    522       out of arg regs, give up. */
    523 
    524    if (go_fast) {
    525 
    526       /* FAST SCHEME */
    527       nextArgReg = 0;
    528 
    529       for (i = 0; i < n_args; i++) {
    530          IRExpr* arg = args[i];
    531 
    532          IRType  aTy = Ity_INVALID;
    533          if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
    534             aTy = typeOfIRExpr(env->type_env, arg);
    535 
    536          if (nextArgReg >= ARM_N_ARGREGS)
    537             return False; /* out of argregs */
    538 
    539          if (aTy == Ity_I32) {
    540             addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
    541                                         iselIntExpr_R(env, arg) ));
    542             nextArgReg++;
    543          }
    544          else if (aTy == Ity_I64) {
    545             /* 64-bit args must be passed in an a reg-pair of the form
    546                n:n+1, where n is even.  Hence either r0:r1 or r2:r3.
    547                On a little-endian host, the less significant word is
    548                passed in the lower-numbered register. */
    549             if (nextArgReg & 1) {
    550                if (nextArgReg >= ARM_N_ARGREGS)
    551                   return False; /* out of argregs */
    552                addInstr(env, ARMInstr_Imm32( argregs[nextArgReg], 0xAA ));
    553                nextArgReg++;
    554             }
    555             if (nextArgReg >= ARM_N_ARGREGS)
    556                return False; /* out of argregs */
    557             HReg raHi, raLo;
    558             iselInt64Expr(&raHi, &raLo, env, arg);
    559             addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raLo ));
    560             nextArgReg++;
    561             addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raHi ));
    562             nextArgReg++;
    563          }
    564          else if (arg->tag == Iex_BBPTR) {
    565             vassert(0); //ATC
    566             addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
    567                                         hregARM_R8() ));
    568             nextArgReg++;
    569          }
    570          else if (arg->tag == Iex_VECRET) {
    571             // If this happens, it denotes ill-formed IR
    572             vassert(0);
    573          }
    574          else
    575             return False; /* unhandled arg type */
    576       }
    577 
    578       /* Fast scheme only applies for unconditional calls.  Hence: */
    579       cc = ARMcc_AL;
    580 
    581    } else {
    582 
    583       /* SLOW SCHEME; move via temporaries */
    584       nextArgReg = 0;
    585 
    586       for (i = 0; i < n_args; i++) {
    587          IRExpr* arg = args[i];
    588 
    589          IRType  aTy = Ity_INVALID;
    590          if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
    591             aTy  = typeOfIRExpr(env->type_env, arg);
    592 
    593          if (nextArgReg >= ARM_N_ARGREGS)
    594             return False; /* out of argregs */
    595 
    596          if (aTy == Ity_I32) {
    597             tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
    598             nextArgReg++;
    599          }
    600          else if (aTy == Ity_I64) {
    601             /* Same comment applies as in the Fast-scheme case. */
    602             if (nextArgReg & 1)
    603                nextArgReg++;
    604             if (nextArgReg + 1 >= ARM_N_ARGREGS)
    605                return False; /* out of argregs */
    606             HReg raHi, raLo;
    607             iselInt64Expr(&raHi, &raLo, env, args[i]);
    608             tmpregs[nextArgReg] = raLo;
    609             nextArgReg++;
    610             tmpregs[nextArgReg] = raHi;
    611             nextArgReg++;
    612          }
    613          else if (arg->tag == Iex_BBPTR) {
    614             vassert(0); //ATC
    615             tmpregs[nextArgReg] = hregARM_R8();
    616             nextArgReg++;
    617          }
    618          else if (arg->tag == Iex_VECRET) {
    619             // If this happens, it denotes ill-formed IR
    620             vassert(0);
    621          }
    622          else
    623             return False; /* unhandled arg type */
    624       }
    625 
    626       /* Now we can compute the condition.  We can't do it earlier
    627          because the argument computations could trash the condition
    628          codes.  Be a bit clever to handle the common case where the
    629          guard is 1:Bit. */
    630       cc = ARMcc_AL;
    631       if (guard) {
    632          if (guard->tag == Iex_Const
    633              && guard->Iex.Const.con->tag == Ico_U1
    634              && guard->Iex.Const.con->Ico.U1 == True) {
    635             /* unconditional -- do nothing */
    636          } else {
    637             cc = iselCondCode( env, guard );
    638          }
    639       }
    640 
    641       /* Move the args to their final destinations. */
    642       for (i = 0; i < nextArgReg; i++) {
    643          if (hregIsInvalid(tmpregs[i])) { // Skip invalid regs
    644             addInstr(env, ARMInstr_Imm32( argregs[i], 0xAA ));
    645             continue;
    646          }
    647          /* None of these insns, including any spill code that might
    648             be generated, may alter the condition codes. */
    649          addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
    650       }
    651 
    652    }
    653 
    654    /* Should be assured by checks above */
    655    vassert(nextArgReg <= ARM_N_ARGREGS);
    656 
    657    /* Do final checks, set the return values, and generate the call
    658       instruction proper. */
    659    vassert(nBBPTRs == 0 || nBBPTRs == 1);
    660    vassert(nVECRETs == (retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0);
    661    vassert(*stackAdjustAfterCall == 0);
    662    vassert(is_RetLoc_INVALID(*retloc));
    663    switch (retTy) {
    664          case Ity_INVALID:
    665             /* Function doesn't return a value. */
    666             *retloc = mk_RetLoc_simple(RLPri_None);
    667             break;
    668          case Ity_I64:
    669             *retloc = mk_RetLoc_simple(RLPri_2Int);
    670             break;
    671          case Ity_I32: case Ity_I16: case Ity_I8:
    672             *retloc = mk_RetLoc_simple(RLPri_Int);
    673             break;
    674          case Ity_V128:
    675             vassert(0); // ATC
    676             *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
    677             *stackAdjustAfterCall = 16;
    678             break;
    679          case Ity_V256:
    680             vassert(0); // ATC
    681             *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
    682             *stackAdjustAfterCall = 32;
    683             break;
    684          default:
    685             /* IR can denote other possible return types, but we don't
    686                handle those here. */
    687            vassert(0);
    688    }
    689 
    690    /* Finally, generate the call itself.  This needs the *retloc value
    691       set in the switch above, which is why it's at the end. */
    692 
    693    /* nextArgReg doles out argument registers.  Since these are
    694       assigned in the order r0, r1, r2, r3, its numeric value at this
    695       point, which must be between 0 and 4 inclusive, is going to be
    696       equal to the number of arg regs in use for the call.  Hence bake
    697       that number into the call (we'll need to know it when doing
    698       register allocation, to know what regs the call reads.)
    699 
    700       There is a bit of a twist -- harmless but worth recording.
    701       Suppose the arg types are (Ity_I32, Ity_I64).  Then we will have
    702       the first arg in r0 and the second in r3:r2, but r1 isn't used.
    703       We nevertheless have nextArgReg==4 and bake that into the call
    704       instruction.  This will mean the register allocator wil believe
    705       this insn reads r1 when in fact it doesn't.  But that's
    706       harmless; it just artificially extends the live range of r1
    707       unnecessarily.  The best fix would be to put into the
    708       instruction, a bitmask indicating which of r0/1/2/3 carry live
    709       values.  But that's too much hassle. */
    710 
    711    target = (HWord)Ptr_to_ULong(cee->addr);
    712    addInstr(env, ARMInstr_Call( cc, target, nextArgReg, *retloc ));
    713 
    714    return True; /* success */
    715 }
    716 
    717 
    718 /*---------------------------------------------------------*/
    719 /*--- ISEL: Integer expressions (32/16/8 bit)           ---*/
    720 /*---------------------------------------------------------*/
    721 
    722 /* Select insns for an integer-typed expression, and add them to the
    723    code list.  Return a reg holding the result.  This reg will be a
    724    virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
    725    want to modify it, ask for a new vreg, copy it in there, and modify
    726    the copy.  The register allocator will do its best to map both
    727    vregs to the same real register, so the copies will often disappear
    728    later in the game.
    729 
    730    This should handle expressions of 32, 16 and 8-bit type.  All
    731    results are returned in a 32-bit register.  For 16- and 8-bit
    732    expressions, the upper 16/24 bits are arbitrary, so you should mask
    733    or sign extend partial values if necessary.
    734 */
    735 
    736 /* --------------------- AMode1 --------------------- */
    737 
    738 /* Return an AMode1 which computes the value of the specified
    739    expression, possibly also adding insns to the code list as a
    740    result.  The expression may only be a 32-bit one.
    741 */
    742 
    743 static Bool sane_AMode1 ( ARMAMode1* am )
    744 {
    745    switch (am->tag) {
    746       case ARMam1_RI:
    747          return
    748             toBool( hregClass(am->ARMam1.RI.reg) == HRcInt32
    749                     && (hregIsVirtual(am->ARMam1.RI.reg)
    750                         || sameHReg(am->ARMam1.RI.reg, hregARM_R8()))
    751                     && am->ARMam1.RI.simm13 >= -4095
    752                     && am->ARMam1.RI.simm13 <= 4095 );
    753       case ARMam1_RRS:
    754          return
    755             toBool( hregClass(am->ARMam1.RRS.base) == HRcInt32
    756                     && hregIsVirtual(am->ARMam1.RRS.base)
    757                     && hregClass(am->ARMam1.RRS.index) == HRcInt32
    758                     && hregIsVirtual(am->ARMam1.RRS.index)
    759                     && am->ARMam1.RRS.shift >= 0
    760                     && am->ARMam1.RRS.shift <= 3 );
    761       default:
    762          vpanic("sane_AMode: unknown ARM AMode1 tag");
    763    }
    764 }
    765 
    766 static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e )
    767 {
    768    ARMAMode1* am = iselIntExpr_AMode1_wrk(env, e);
    769    vassert(sane_AMode1(am));
    770    return am;
    771 }
    772 
    773 static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e )
    774 {
    775    IRType ty = typeOfIRExpr(env->type_env,e);
    776    vassert(ty == Ity_I32);
    777 
    778    /* FIXME: add RRS matching */
    779 
    780    /* {Add32,Sub32}(expr,simm13) */
    781    if (e->tag == Iex_Binop
    782        && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
    783        && e->Iex.Binop.arg2->tag == Iex_Const
    784        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
    785       Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
    786       if (simm >= -4095 && simm <= 4095) {
    787          HReg reg;
    788          if (e->Iex.Binop.op == Iop_Sub32)
    789             simm = -simm;
    790          reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
    791          return ARMAMode1_RI(reg, simm);
    792       }
    793    }
    794 
    795    /* Doesn't match anything in particular.  Generate it into
    796       a register and use that. */
    797    {
    798       HReg reg = iselIntExpr_R(env, e);
    799       return ARMAMode1_RI(reg, 0);
    800    }
    801 
    802 }
    803 
    804 
    805 /* --------------------- AMode2 --------------------- */
    806 
    807 /* Return an AMode2 which computes the value of the specified
    808    expression, possibly also adding insns to the code list as a
    809    result.  The expression may only be a 32-bit one.
    810 */
    811 
    812 static Bool sane_AMode2 ( ARMAMode2* am )
    813 {
    814    switch (am->tag) {
    815       case ARMam2_RI:
    816          return
    817             toBool( hregClass(am->ARMam2.RI.reg) == HRcInt32
    818                     && hregIsVirtual(am->ARMam2.RI.reg)
    819                     && am->ARMam2.RI.simm9 >= -255
    820                     && am->ARMam2.RI.simm9 <= 255 );
    821       case ARMam2_RR:
    822          return
    823             toBool( hregClass(am->ARMam2.RR.base) == HRcInt32
    824                     && hregIsVirtual(am->ARMam2.RR.base)
    825                     && hregClass(am->ARMam2.RR.index) == HRcInt32
    826                     && hregIsVirtual(am->ARMam2.RR.index) );
    827       default:
    828          vpanic("sane_AMode: unknown ARM AMode2 tag");
    829    }
    830 }
    831 
    832 static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e )
    833 {
    834    ARMAMode2* am = iselIntExpr_AMode2_wrk(env, e);
    835    vassert(sane_AMode2(am));
    836    return am;
    837 }
    838 
    839 static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e )
    840 {
    841    IRType ty = typeOfIRExpr(env->type_env,e);
    842    vassert(ty == Ity_I32);
    843 
    844    /* FIXME: add RR matching */
    845 
    846    /* {Add32,Sub32}(expr,simm8) */
    847    if (e->tag == Iex_Binop
    848        && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
    849        && e->Iex.Binop.arg2->tag == Iex_Const
    850        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
    851       Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
    852       if (simm >= -255 && simm <= 255) {
    853          HReg reg;
    854          if (e->Iex.Binop.op == Iop_Sub32)
    855             simm = -simm;
    856          reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
    857          return ARMAMode2_RI(reg, simm);
    858       }
    859    }
    860 
    861    /* Doesn't match anything in particular.  Generate it into
    862       a register and use that. */
    863    {
    864       HReg reg = iselIntExpr_R(env, e);
    865       return ARMAMode2_RI(reg, 0);
    866    }
    867 
    868 }
    869 
    870 
    871 /* --------------------- AModeV --------------------- */
    872 
    873 /* Return an AModeV which computes the value of the specified
    874    expression, possibly also adding insns to the code list as a
    875    result.  The expression may only be a 32-bit one.
    876 */
    877 
    878 static Bool sane_AModeV ( ARMAModeV* am )
    879 {
    880   return toBool( hregClass(am->reg) == HRcInt32
    881                  && hregIsVirtual(am->reg)
    882                  && am->simm11 >= -1020 && am->simm11 <= 1020
    883                  && 0 == (am->simm11 & 3) );
    884 }
    885 
    886 static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
    887 {
    888    ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
    889    vassert(sane_AModeV(am));
    890    return am;
    891 }
    892 
    893 static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
    894 {
    895    IRType ty = typeOfIRExpr(env->type_env,e);
    896    vassert(ty == Ity_I32);
    897 
    898    /* {Add32,Sub32}(expr, simm8 << 2) */
    899    if (e->tag == Iex_Binop
    900        && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
    901        && e->Iex.Binop.arg2->tag == Iex_Const
    902        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
    903       Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
    904       if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
    905          HReg reg;
    906          if (e->Iex.Binop.op == Iop_Sub32)
    907             simm = -simm;
    908          reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
    909          return mkARMAModeV(reg, simm);
    910       }
    911    }
    912 
    913    /* Doesn't match anything in particular.  Generate it into
    914       a register and use that. */
    915    {
    916       HReg reg = iselIntExpr_R(env, e);
    917       return mkARMAModeV(reg, 0);
    918    }
    919 
    920 }
    921 
    922 /* -------------------- AModeN -------------------- */
    923 
    924 static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
    925 {
    926    return iselIntExpr_AModeN_wrk(env, e);
    927 }
    928 
    929 static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
    930 {
    931    HReg reg = iselIntExpr_R(env, e);
    932    return mkARMAModeN_R(reg);
    933 }
    934 
    935 
    936 /* --------------------- RI84 --------------------- */
    937 
    938 /* Select instructions to generate 'e' into a RI84.  If mayInv is
    939    true, then the caller will also accept an I84 form that denotes
    940    'not e'.  In this case didInv may not be NULL, and *didInv is set
    941    to True.  This complication is so as to allow generation of an RI84
    942    which is suitable for use in either an AND or BIC instruction,
    943    without knowing (before this call) which one.
    944 */
    945 static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
    946                                    ISelEnv* env, IRExpr* e )
    947 {
    948    ARMRI84* ri;
    949    if (mayInv)
    950       vassert(didInv != NULL);
    951    ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
    952    /* sanity checks ... */
    953    switch (ri->tag) {
    954       case ARMri84_I84:
    955          return ri;
    956       case ARMri84_R:
    957          vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
    958          vassert(hregIsVirtual(ri->ARMri84.R.reg));
    959          return ri;
    960       default:
    961          vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
    962    }
    963 }
    964 
    965 /* DO NOT CALL THIS DIRECTLY ! */
    966 static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
    967                                        ISelEnv* env, IRExpr* e )
    968 {
    969    IRType ty = typeOfIRExpr(env->type_env,e);
    970    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
    971 
    972    if (didInv) *didInv = False;
    973 
    974    /* special case: immediate */
    975    if (e->tag == Iex_Const) {
    976       UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
    977       switch (e->Iex.Const.con->tag) {
    978          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
    979          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
    980          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
    981          default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
    982       }
    983       if (fitsIn8x4(&u8, &u4, u)) {
    984          return ARMRI84_I84( (UShort)u8, (UShort)u4 );
    985       }
    986       if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
    987          vassert(didInv);
    988          *didInv = True;
    989          return ARMRI84_I84( (UShort)u8, (UShort)u4 );
    990       }
    991       /* else fail, fall through to default case */
    992    }
    993 
    994    /* default case: calculate into a register and return that */
    995    {
    996       HReg r = iselIntExpr_R ( env, e );
    997       return ARMRI84_R(r);
    998    }
    999 }
   1000 
   1001 
   1002 /* --------------------- RI5 --------------------- */
   1003 
   1004 /* Select instructions to generate 'e' into a RI5. */
   1005 
   1006 static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e )
   1007 {
   1008    ARMRI5* ri = iselIntExpr_RI5_wrk(env, e);
   1009    /* sanity checks ... */
   1010    switch (ri->tag) {
   1011       case ARMri5_I5:
   1012          return ri;
   1013       case ARMri5_R:
   1014          vassert(hregClass(ri->ARMri5.R.reg) == HRcInt32);
   1015          vassert(hregIsVirtual(ri->ARMri5.R.reg));
   1016          return ri;
   1017       default:
   1018          vpanic("iselIntExpr_RI5: unknown arm RI5 tag");
   1019    }
   1020 }
   1021 
   1022 /* DO NOT CALL THIS DIRECTLY ! */
   1023 static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e )
   1024 {
   1025    IRType ty = typeOfIRExpr(env->type_env,e);
   1026    vassert(ty == Ity_I32 || ty == Ity_I8);
   1027 
   1028    /* special case: immediate */
   1029    if (e->tag == Iex_Const) {
   1030       UInt u; /* both invalid */
   1031       switch (e->Iex.Const.con->tag) {
   1032          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
   1033          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
   1034          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
   1035          default: vpanic("iselIntExpr_RI5.Iex_Const(armh)");
   1036       }
   1037       if (u >= 1 && u <= 31) {
   1038          return ARMRI5_I5(u);
   1039       }
   1040       /* else fail, fall through to default case */
   1041    }
   1042 
   1043    /* default case: calculate into a register and return that */
   1044    {
   1045       HReg r = iselIntExpr_R ( env, e );
   1046       return ARMRI5_R(r);
   1047    }
   1048 }
   1049 
   1050 
   1051 /* ------------------- CondCode ------------------- */
   1052 
   1053 /* Generate code to evaluated a bit-typed expression, returning the
   1054    condition code which would correspond when the expression would
   1055    notionally have returned 1. */
   1056 
   1057 static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e )
   1058 {
   1059    ARMCondCode cc = iselCondCode_wrk(env,e);
   1060    vassert(cc != ARMcc_NV);
   1061    return cc;
   1062 }
   1063 
   1064 static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
   1065 {
   1066    vassert(e);
   1067    vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
   1068 
   1069    /* var */
   1070    if (e->tag == Iex_RdTmp) {
   1071       HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
   1072       /* CmpOrTst doesn't modify rTmp; so this is OK. */
   1073       ARMRI84* one  = ARMRI84_I84(1,0);
   1074       addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
   1075       return ARMcc_NE;
   1076    }
   1077 
   1078    /* Not1(e) */
   1079    if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
   1080       /* Generate code for the arg, and negate the test condition */
   1081       return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
   1082    }
   1083 
   1084    /* --- patterns rooted at: 32to1 --- */
   1085 
   1086    if (e->tag == Iex_Unop
   1087        && e->Iex.Unop.op == Iop_32to1) {
   1088       HReg     rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
   1089       ARMRI84* one  = ARMRI84_I84(1,0);
   1090       addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
   1091       return ARMcc_NE;
   1092    }
   1093 
   1094    /* --- patterns rooted at: CmpNEZ8 --- */
   1095 
   1096    if (e->tag == Iex_Unop
   1097        && e->Iex.Unop.op == Iop_CmpNEZ8) {
   1098       HReg     r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
   1099       ARMRI84* xFF  = ARMRI84_I84(0xFF,0);
   1100       addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r1, xFF));
   1101       return ARMcc_NE;
   1102    }
   1103 
   1104    /* --- patterns rooted at: CmpNEZ32 --- */
   1105 
   1106    if (e->tag == Iex_Unop
   1107        && e->Iex.Unop.op == Iop_CmpNEZ32) {
   1108       HReg     r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
   1109       ARMRI84* zero = ARMRI84_I84(0,0);
   1110       addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r1, zero));
   1111       return ARMcc_NE;
   1112    }
   1113 
   1114    /* --- patterns rooted at: CmpNEZ64 --- */
   1115 
   1116    if (e->tag == Iex_Unop
   1117        && e->Iex.Unop.op == Iop_CmpNEZ64) {
   1118       HReg     tHi, tLo;
   1119       HReg     tmp  = newVRegI(env);
   1120       ARMRI84* zero = ARMRI84_I84(0,0);
   1121       iselInt64Expr(&tHi, &tLo, env, e->Iex.Unop.arg);
   1122       addInstr(env, ARMInstr_Alu(ARMalu_OR, tmp, tHi, ARMRI84_R(tLo)));
   1123       addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, tmp, zero));
   1124       return ARMcc_NE;
   1125    }
   1126 
   1127    /* --- Cmp*32*(x,y) --- */
   1128    if (e->tag == Iex_Binop
   1129        && (e->Iex.Binop.op == Iop_CmpEQ32
   1130            || e->Iex.Binop.op == Iop_CmpNE32
   1131            || e->Iex.Binop.op == Iop_CmpLT32S
   1132            || e->Iex.Binop.op == Iop_CmpLT32U
   1133            || e->Iex.Binop.op == Iop_CmpLE32S
   1134            || e->Iex.Binop.op == Iop_CmpLE32U)) {
   1135       HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1136       ARMRI84* argR = iselIntExpr_RI84(NULL,False,
   1137                                        env, e->Iex.Binop.arg2);
   1138       addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL, argR));
   1139       switch (e->Iex.Binop.op) {
   1140          case Iop_CmpEQ32:  return ARMcc_EQ;
   1141          case Iop_CmpNE32:  return ARMcc_NE;
   1142          case Iop_CmpLT32S: return ARMcc_LT;
   1143          case Iop_CmpLT32U: return ARMcc_LO;
   1144          case Iop_CmpLE32S: return ARMcc_LE;
   1145          case Iop_CmpLE32U: return ARMcc_LS;
   1146          default: vpanic("iselCondCode(arm): CmpXX32");
   1147       }
   1148    }
   1149 
   1150    /* const */
   1151    /* Constant 1:Bit */
   1152    if (e->tag == Iex_Const) {
   1153       HReg r;
   1154       vassert(e->Iex.Const.con->tag == Ico_U1);
   1155       vassert(e->Iex.Const.con->Ico.U1 == True
   1156               || e->Iex.Const.con->Ico.U1 == False);
   1157       r = newVRegI(env);
   1158       addInstr(env, ARMInstr_Imm32(r, 0));
   1159       addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r, ARMRI84_R(r)));
   1160       return e->Iex.Const.con->Ico.U1 ? ARMcc_EQ : ARMcc_NE;
   1161    }
   1162 
   1163    // JRS 2013-Jan-03: this seems completely nonsensical
   1164    /* --- CasCmpEQ* --- */
   1165    /* Ist_Cas has a dummy argument to compare with, so comparison is
   1166       always true. */
   1167    //if (e->tag == Iex_Binop
   1168    //    && (e->Iex.Binop.op == Iop_CasCmpEQ32
   1169    //        || e->Iex.Binop.op == Iop_CasCmpEQ16
   1170    //        || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
   1171    //   return ARMcc_AL;
   1172    //}
   1173 
   1174    ppIRExpr(e);
   1175    vpanic("iselCondCode");
   1176 }
   1177 
   1178 
   1179 /* --------------------- Reg --------------------- */
   1180 
   1181 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
   1182 {
   1183    HReg r = iselIntExpr_R_wrk(env, e);
   1184    /* sanity checks ... */
   1185 #  if 0
   1186    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   1187 #  endif
   1188    vassert(hregClass(r) == HRcInt32);
   1189    vassert(hregIsVirtual(r));
   1190    return r;
   1191 }
   1192 
   1193 /* DO NOT CALL THIS DIRECTLY ! */
   1194 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
   1195 {
   1196    IRType ty = typeOfIRExpr(env->type_env,e);
   1197    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
   1198 
   1199    switch (e->tag) {
   1200 
   1201    /* --------- TEMP --------- */
   1202    case Iex_RdTmp: {
   1203       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
   1204    }
   1205 
   1206    /* --------- LOAD --------- */
   1207    case Iex_Load: {
   1208       HReg dst  = newVRegI(env);
   1209 
   1210       if (e->Iex.Load.end != Iend_LE)
   1211          goto irreducible;
   1212 
   1213       if (ty == Ity_I32) {
   1214          ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
   1215          addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, dst, amode));
   1216          return dst;
   1217       }
   1218       if (ty == Ity_I16) {
   1219          ARMAMode2* amode = iselIntExpr_AMode2 ( env, e->Iex.Load.addr );
   1220          addInstr(env, ARMInstr_LdSt16(ARMcc_AL,
   1221                                        True/*isLoad*/, False/*!signedLoad*/,
   1222                                        dst, amode));
   1223          return dst;
   1224       }
   1225       if (ty == Ity_I8) {
   1226          ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
   1227          addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, True/*isLoad*/, dst, amode));
   1228          return dst;
   1229       }
   1230       break;
   1231    }
   1232 
   1233 //zz   /* --------- TERNARY OP --------- */
   1234 //zz   case Iex_Triop: {
   1235 //zz      IRTriop *triop = e->Iex.Triop.details;
   1236 //zz      /* C3210 flags following FPU partial remainder (fprem), both
   1237 //zz         IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
   1238 //zz      if (triop->op == Iop_PRemC3210F64
   1239 //zz          || triop->op == Iop_PRem1C3210F64) {
   1240 //zz         HReg junk = newVRegF(env);
   1241 //zz         HReg dst  = newVRegI(env);
   1242 //zz         HReg srcL = iselDblExpr(env, triop->arg2);
   1243 //zz         HReg srcR = iselDblExpr(env, triop->arg3);
   1244 //zz         /* XXXROUNDINGFIXME */
   1245 //zz         /* set roundingmode here */
   1246 //zz         addInstr(env, X86Instr_FpBinary(
   1247 //zz                           e->Iex.Binop.op==Iop_PRemC3210F64
   1248 //zz                              ? Xfp_PREM : Xfp_PREM1,
   1249 //zz                           srcL,srcR,junk
   1250 //zz                 ));
   1251 //zz         /* The previous pseudo-insn will have left the FPU's C3210
   1252 //zz            flags set correctly.  So bag them. */
   1253 //zz         addInstr(env, X86Instr_FpStSW_AX());
   1254 //zz         addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
   1255 //zz         addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
   1256 //zz         return dst;
   1257 //zz      }
   1258 //zz
   1259 //zz      break;
   1260 //zz   }
   1261 
   1262    /* --------- BINARY OP --------- */
   1263    case Iex_Binop: {
   1264 
   1265       ARMAluOp   aop = 0; /* invalid */
   1266       ARMShiftOp sop = 0; /* invalid */
   1267 
   1268       /* ADD/SUB/AND/OR/XOR */
   1269       switch (e->Iex.Binop.op) {
   1270          case Iop_And32: {
   1271             Bool     didInv = False;
   1272             HReg     dst    = newVRegI(env);
   1273             HReg     argL   = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1274             ARMRI84* argR   = iselIntExpr_RI84(&didInv, True/*mayInv*/,
   1275                                                env, e->Iex.Binop.arg2);
   1276             addInstr(env, ARMInstr_Alu(didInv ? ARMalu_BIC : ARMalu_AND,
   1277                                        dst, argL, argR));
   1278             return dst;
   1279          }
   1280          case Iop_Or32:  aop = ARMalu_OR;  goto std_binop;
   1281          case Iop_Xor32: aop = ARMalu_XOR; goto std_binop;
   1282          case Iop_Sub32: aop = ARMalu_SUB; goto std_binop;
   1283          case Iop_Add32: aop = ARMalu_ADD; goto std_binop;
   1284          std_binop: {
   1285             HReg     dst  = newVRegI(env);
   1286             HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1287             ARMRI84* argR = iselIntExpr_RI84(NULL, False/*mayInv*/,
   1288                                              env, e->Iex.Binop.arg2);
   1289             addInstr(env, ARMInstr_Alu(aop, dst, argL, argR));
   1290             return dst;
   1291          }
   1292          default: break;
   1293       }
   1294 
   1295       /* SHL/SHR/SAR */
   1296       switch (e->Iex.Binop.op) {
   1297          case Iop_Shl32: sop = ARMsh_SHL; goto sh_binop;
   1298          case Iop_Shr32: sop = ARMsh_SHR; goto sh_binop;
   1299          case Iop_Sar32: sop = ARMsh_SAR; goto sh_binop;
   1300          sh_binop: {
   1301             HReg    dst  = newVRegI(env);
   1302             HReg    argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1303             ARMRI5* argR = iselIntExpr_RI5(env, e->Iex.Binop.arg2);
   1304             addInstr(env, ARMInstr_Shift(sop, dst, argL, argR));
   1305             vassert(ty == Ity_I32); /* else the IR is ill-typed */
   1306             return dst;
   1307          }
   1308          default: break;
   1309       }
   1310 
   1311       /* MUL */
   1312       if (e->Iex.Binop.op == Iop_Mul32) {
   1313          HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1314          HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   1315          HReg dst  = newVRegI(env);
   1316          addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
   1317          addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
   1318          addInstr(env, ARMInstr_Mul(ARMmul_PLAIN));
   1319          addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
   1320          return dst;
   1321       }
   1322 
   1323       /* Handle misc other ops. */
   1324 
   1325       if (e->Iex.Binop.op == Iop_Max32U) {
   1326          HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1327          HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   1328          HReg dst  = newVRegI(env);
   1329          addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL,
   1330                                          ARMRI84_R(argR)));
   1331          addInstr(env, mk_iMOVds_RR(dst, argL));
   1332          addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR)));
   1333          return dst;
   1334       }
   1335 
   1336       if (e->Iex.Binop.op == Iop_CmpF64) {
   1337          HReg dL = iselDblExpr(env, e->Iex.Binop.arg1);
   1338          HReg dR = iselDblExpr(env, e->Iex.Binop.arg2);
   1339          HReg dst = newVRegI(env);
   1340          /* Do the compare (FCMPD) and set NZCV in FPSCR.  Then also do
   1341             FMSTAT, so we can examine the results directly. */
   1342          addInstr(env, ARMInstr_VCmpD(dL, dR));
   1343          /* Create in dst, the IRCmpF64Result encoded result. */
   1344          addInstr(env, ARMInstr_Imm32(dst, 0));
   1345          addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, ARMRI84_I84(0x40,0))); //EQ
   1346          addInstr(env, ARMInstr_CMov(ARMcc_MI, dst, ARMRI84_I84(0x01,0))); //LT
   1347          addInstr(env, ARMInstr_CMov(ARMcc_GT, dst, ARMRI84_I84(0x00,0))); //GT
   1348          addInstr(env, ARMInstr_CMov(ARMcc_VS, dst, ARMRI84_I84(0x45,0))); //UN
   1349          return dst;
   1350       }
   1351 
   1352       if (e->Iex.Binop.op == Iop_F64toI32S
   1353           || e->Iex.Binop.op == Iop_F64toI32U) {
   1354          /* Wretched uglyness all round, due to having to deal
   1355             with rounding modes.  Oh well. */
   1356          /* FIXME: if arg1 is a constant indicating round-to-zero,
   1357             then we could skip all this arsing around with FPSCR and
   1358             simply emit FTO{S,U}IZD. */
   1359          Bool syned = e->Iex.Binop.op == Iop_F64toI32S;
   1360          HReg valD  = iselDblExpr(env, e->Iex.Binop.arg2);
   1361          set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
   1362          /* FTO{S,U}ID valF, valD */
   1363          HReg valF = newVRegF(env);
   1364          addInstr(env, ARMInstr_VCvtID(False/*!iToD*/, syned,
   1365                                        valF, valD));
   1366          set_VFP_rounding_default(env);
   1367          /* VMOV dst, valF */
   1368          HReg dst = newVRegI(env);
   1369          addInstr(env, ARMInstr_VXferS(False/*!toS*/, valF, dst));
   1370          return dst;
   1371       }
   1372 
   1373       if (e->Iex.Binop.op == Iop_GetElem8x8
   1374           || e->Iex.Binop.op == Iop_GetElem16x4
   1375           || e->Iex.Binop.op == Iop_GetElem32x2) {
   1376          HReg res = newVRegI(env);
   1377          HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
   1378          UInt index, size;
   1379          if (e->Iex.Binop.arg2->tag != Iex_Const ||
   1380              typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   1381             vpanic("ARM target supports GetElem with constant "
   1382                    "second argument only\n");
   1383          }
   1384          index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   1385          switch (e->Iex.Binop.op) {
   1386             case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
   1387             case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
   1388             case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
   1389             default: vassert(0);
   1390          }
   1391          addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
   1392                                         mkARMNRS(ARMNRS_Reg, res, 0),
   1393                                         mkARMNRS(ARMNRS_Scalar, arg, index),
   1394                                         size, False));
   1395          return res;
   1396       }
   1397 
   1398       if (e->Iex.Binop.op == Iop_GetElem8x16
   1399           || e->Iex.Binop.op == Iop_GetElem16x8
   1400           || e->Iex.Binop.op == Iop_GetElem32x4) {
   1401          HReg res = newVRegI(env);
   1402          HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
   1403          UInt index, size;
   1404          if (e->Iex.Binop.arg2->tag != Iex_Const ||
   1405              typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   1406             vpanic("ARM target supports GetElem with constant "
   1407                    "second argument only\n");
   1408          }
   1409          index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   1410          switch (e->Iex.Binop.op) {
   1411             case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
   1412             case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
   1413             case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
   1414             default: vassert(0);
   1415          }
   1416          addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
   1417                                         mkARMNRS(ARMNRS_Reg, res, 0),
   1418                                         mkARMNRS(ARMNRS_Scalar, arg, index),
   1419                                         size, True));
   1420          return res;
   1421       }
   1422 
   1423       /* All cases involving host-side helper calls. */
   1424       void* fn = NULL;
   1425       switch (e->Iex.Binop.op) {
   1426          case Iop_Add16x2:
   1427             fn = &h_generic_calc_Add16x2; break;
   1428          case Iop_Sub16x2:
   1429             fn = &h_generic_calc_Sub16x2; break;
   1430          case Iop_HAdd16Ux2:
   1431             fn = &h_generic_calc_HAdd16Ux2; break;
   1432          case Iop_HAdd16Sx2:
   1433             fn = &h_generic_calc_HAdd16Sx2; break;
   1434          case Iop_HSub16Ux2:
   1435             fn = &h_generic_calc_HSub16Ux2; break;
   1436          case Iop_HSub16Sx2:
   1437             fn = &h_generic_calc_HSub16Sx2; break;
   1438          case Iop_QAdd16Sx2:
   1439             fn = &h_generic_calc_QAdd16Sx2; break;
   1440          case Iop_QAdd16Ux2:
   1441             fn = &h_generic_calc_QAdd16Ux2; break;
   1442          case Iop_QSub16Sx2:
   1443             fn = &h_generic_calc_QSub16Sx2; break;
   1444          case Iop_Add8x4:
   1445             fn = &h_generic_calc_Add8x4; break;
   1446          case Iop_Sub8x4:
   1447             fn = &h_generic_calc_Sub8x4; break;
   1448          case Iop_HAdd8Ux4:
   1449             fn = &h_generic_calc_HAdd8Ux4; break;
   1450          case Iop_HAdd8Sx4:
   1451             fn = &h_generic_calc_HAdd8Sx4; break;
   1452          case Iop_HSub8Ux4:
   1453             fn = &h_generic_calc_HSub8Ux4; break;
   1454          case Iop_HSub8Sx4:
   1455             fn = &h_generic_calc_HSub8Sx4; break;
   1456          case Iop_QAdd8Sx4:
   1457             fn = &h_generic_calc_QAdd8Sx4; break;
   1458          case Iop_QAdd8Ux4:
   1459             fn = &h_generic_calc_QAdd8Ux4; break;
   1460          case Iop_QSub8Sx4:
   1461             fn = &h_generic_calc_QSub8Sx4; break;
   1462          case Iop_QSub8Ux4:
   1463             fn = &h_generic_calc_QSub8Ux4; break;
   1464          case Iop_Sad8Ux4:
   1465             fn = &h_generic_calc_Sad8Ux4; break;
   1466          case Iop_QAdd32S:
   1467             fn = &h_generic_calc_QAdd32S; break;
   1468          case Iop_QSub32S:
   1469             fn = &h_generic_calc_QSub32S; break;
   1470          case Iop_QSub16Ux2:
   1471             fn = &h_generic_calc_QSub16Ux2; break;
   1472          case Iop_DivU32:
   1473             fn = &h_calc_udiv32_w_arm_semantics; break;
   1474          case Iop_DivS32:
   1475             fn = &h_calc_sdiv32_w_arm_semantics; break;
   1476          default:
   1477             break;
   1478       }
   1479 
   1480       if (fn) {
   1481          HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1482          HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   1483          HReg res  = newVRegI(env);
   1484          addInstr(env, mk_iMOVds_RR(hregARM_R0(), regL));
   1485          addInstr(env, mk_iMOVds_RR(hregARM_R1(), regR));
   1486          addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn),
   1487                                       2, mk_RetLoc_simple(RLPri_Int) ));
   1488          addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
   1489          return res;
   1490       }
   1491 
   1492       break;
   1493    }
   1494 
   1495    /* --------- UNARY OP --------- */
   1496    case Iex_Unop: {
   1497 
   1498 //zz      /* 1Uto8(32to1(expr32)) */
   1499 //zz      if (e->Iex.Unop.op == Iop_1Uto8) {
   1500 //zz         DECLARE_PATTERN(p_32to1_then_1Uto8);
   1501 //zz         DEFINE_PATTERN(p_32to1_then_1Uto8,
   1502 //zz                        unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
   1503 //zz         if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
   1504 //zz            IRExpr* expr32 = mi.bindee[0];
   1505 //zz            HReg dst = newVRegI(env);
   1506 //zz            HReg src = iselIntExpr_R(env, expr32);
   1507 //zz            addInstr(env, mk_iMOVsd_RR(src,dst) );
   1508 //zz            addInstr(env, X86Instr_Alu32R(Xalu_AND,
   1509 //zz                                          X86RMI_Imm(1), dst));
   1510 //zz            return dst;
   1511 //zz         }
   1512 //zz      }
   1513 //zz
   1514 //zz      /* 8Uto32(LDle(expr32)) */
   1515 //zz      if (e->Iex.Unop.op == Iop_8Uto32) {
   1516 //zz         DECLARE_PATTERN(p_LDle8_then_8Uto32);
   1517 //zz         DEFINE_PATTERN(p_LDle8_then_8Uto32,
   1518 //zz                        unop(Iop_8Uto32,
   1519 //zz                             IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
   1520 //zz         if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
   1521 //zz            HReg dst = newVRegI(env);
   1522 //zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
   1523 //zz            addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
   1524 //zz            return dst;
   1525 //zz         }
   1526 //zz      }
   1527 //zz
   1528 //zz      /* 8Sto32(LDle(expr32)) */
   1529 //zz      if (e->Iex.Unop.op == Iop_8Sto32) {
   1530 //zz         DECLARE_PATTERN(p_LDle8_then_8Sto32);
   1531 //zz         DEFINE_PATTERN(p_LDle8_then_8Sto32,
   1532 //zz                        unop(Iop_8Sto32,
   1533 //zz                             IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
   1534 //zz         if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
   1535 //zz            HReg dst = newVRegI(env);
   1536 //zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
   1537 //zz            addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
   1538 //zz            return dst;
   1539 //zz         }
   1540 //zz      }
   1541 //zz
   1542 //zz      /* 16Uto32(LDle(expr32)) */
   1543 //zz      if (e->Iex.Unop.op == Iop_16Uto32) {
   1544 //zz         DECLARE_PATTERN(p_LDle16_then_16Uto32);
   1545 //zz         DEFINE_PATTERN(p_LDle16_then_16Uto32,
   1546 //zz                        unop(Iop_16Uto32,
   1547 //zz                             IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
   1548 //zz         if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
   1549 //zz            HReg dst = newVRegI(env);
   1550 //zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
   1551 //zz            addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
   1552 //zz            return dst;
   1553 //zz         }
   1554 //zz      }
   1555 //zz
   1556 //zz      /* 8Uto32(GET:I8) */
   1557 //zz      if (e->Iex.Unop.op == Iop_8Uto32) {
   1558 //zz         if (e->Iex.Unop.arg->tag == Iex_Get) {
   1559 //zz            HReg      dst;
   1560 //zz            X86AMode* amode;
   1561 //zz            vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
   1562 //zz            dst = newVRegI(env);
   1563 //zz            amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
   1564 //zz                                hregX86_EBP());
   1565 //zz            addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
   1566 //zz            return dst;
   1567 //zz         }
   1568 //zz      }
   1569 //zz
   1570 //zz      /* 16to32(GET:I16) */
   1571 //zz      if (e->Iex.Unop.op == Iop_16Uto32) {
   1572 //zz         if (e->Iex.Unop.arg->tag == Iex_Get) {
   1573 //zz            HReg      dst;
   1574 //zz            X86AMode* amode;
   1575 //zz            vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
   1576 //zz            dst = newVRegI(env);
   1577 //zz            amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
   1578 //zz                                hregX86_EBP());
   1579 //zz            addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
   1580 //zz            return dst;
   1581 //zz         }
   1582 //zz      }
   1583 
   1584       switch (e->Iex.Unop.op) {
   1585          case Iop_8Uto32: {
   1586             HReg dst = newVRegI(env);
   1587             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1588             addInstr(env, ARMInstr_Alu(ARMalu_AND,
   1589                                        dst, src, ARMRI84_I84(0xFF,0)));
   1590             return dst;
   1591          }
   1592 //zz         case Iop_8Uto16:
   1593 //zz         case Iop_8Uto32:
   1594 //zz         case Iop_16Uto32: {
   1595 //zz            HReg dst = newVRegI(env);
   1596 //zz            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1597 //zz            UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
   1598 //zz            addInstr(env, mk_iMOVsd_RR(src,dst) );
   1599 //zz            addInstr(env, X86Instr_Alu32R(Xalu_AND,
   1600 //zz                                          X86RMI_Imm(mask), dst));
   1601 //zz            return dst;
   1602 //zz         }
   1603 //zz         case Iop_8Sto16:
   1604 //zz         case Iop_8Sto32:
   1605          case Iop_16Uto32: {
   1606             HReg dst = newVRegI(env);
   1607             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1608             ARMRI5* amt = ARMRI5_I5(16);
   1609             addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
   1610             addInstr(env, ARMInstr_Shift(ARMsh_SHR, dst, dst, amt));
   1611             return dst;
   1612          }
   1613          case Iop_8Sto32:
   1614          case Iop_16Sto32: {
   1615             HReg dst = newVRegI(env);
   1616             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1617             ARMRI5* amt = ARMRI5_I5(e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24);
   1618             addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
   1619             addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
   1620             return dst;
   1621          }
   1622 //zz         case Iop_Not8:
   1623 //zz         case Iop_Not16:
   1624          case Iop_Not32: {
   1625             HReg dst = newVRegI(env);
   1626             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1627             addInstr(env, ARMInstr_Unary(ARMun_NOT, dst, src));
   1628             return dst;
   1629          }
   1630          case Iop_64HIto32: {
   1631             HReg rHi, rLo;
   1632             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
   1633             return rHi; /* and abandon rLo .. poor wee thing :-) */
   1634          }
   1635          case Iop_64to32: {
   1636             HReg rHi, rLo;
   1637             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
   1638             return rLo; /* similar stupid comment to the above ... */
   1639          }
   1640          case Iop_64to8: {
   1641             HReg rHi, rLo;
   1642             if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
   1643                HReg tHi = newVRegI(env);
   1644                HReg tLo = newVRegI(env);
   1645                HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
   1646                addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
   1647                rHi = tHi;
   1648                rLo = tLo;
   1649             } else {
   1650                iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
   1651             }
   1652             return rLo;
   1653          }
   1654 
   1655          case Iop_1Uto32:
   1656             /* 1Uto32(tmp).  Since I1 values generated into registers
   1657                are guaranteed to have value either only zero or one,
   1658                we can simply return the value of the register in this
   1659                case. */
   1660             if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
   1661                HReg dst = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
   1662                return dst;
   1663             }
   1664             /* else fall through */
   1665          case Iop_1Uto8: {
   1666             HReg        dst  = newVRegI(env);
   1667             ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
   1668             addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
   1669             addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
   1670             return dst;
   1671          }
   1672 
   1673          case Iop_1Sto32: {
   1674             HReg        dst  = newVRegI(env);
   1675             ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
   1676             ARMRI5*     amt  = ARMRI5_I5(31);
   1677             /* This is really rough.  We could do much better here;
   1678                perhaps mvn{cond} dst, #0 as the second insn?
   1679                (same applies to 1Sto64) */
   1680             addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
   1681             addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
   1682             addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
   1683             addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
   1684             return dst;
   1685          }
   1686 
   1687 
   1688 //zz         case Iop_1Sto8:
   1689 //zz         case Iop_1Sto16:
   1690 //zz         case Iop_1Sto32: {
   1691 //zz            /* could do better than this, but for now ... */
   1692 //zz            HReg dst         = newVRegI(env);
   1693 //zz            X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
   1694 //zz            addInstr(env, X86Instr_Set32(cond,dst));
   1695 //zz            addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
   1696 //zz            addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
   1697 //zz            return dst;
   1698 //zz         }
   1699 //zz         case Iop_Ctz32: {
   1700 //zz            /* Count trailing zeroes, implemented by x86 'bsfl' */
   1701 //zz            HReg dst = newVRegI(env);
   1702 //zz            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1703 //zz            addInstr(env, X86Instr_Bsfr32(True,src,dst));
   1704 //zz            return dst;
   1705 //zz         }
   1706          case Iop_Clz32: {
   1707             /* Count leading zeroes; easy on ARM. */
   1708             HReg dst = newVRegI(env);
   1709             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1710             addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
   1711             return dst;
   1712          }
   1713 
   1714          case Iop_CmpwNEZ32: {
   1715             HReg dst = newVRegI(env);
   1716             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1717             addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
   1718             addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
   1719             addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
   1720             return dst;
   1721          }
   1722 
   1723          case Iop_Left32: {
   1724             HReg dst = newVRegI(env);
   1725             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   1726             addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
   1727             addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
   1728             return dst;
   1729          }
   1730 
   1731 //zz         case Iop_V128to32: {
   1732 //zz            HReg      dst  = newVRegI(env);
   1733 //zz            HReg      vec  = iselVecExpr(env, e->Iex.Unop.arg);
   1734 //zz            X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
   1735 //zz            sub_from_esp(env, 16);
   1736 //zz            addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
   1737 //zz            addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
   1738 //zz            add_to_esp(env, 16);
   1739 //zz            return dst;
   1740 //zz         }
   1741 //zz
   1742          case Iop_ReinterpF32asI32: {
   1743             HReg dst = newVRegI(env);
   1744             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
   1745             addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
   1746             return dst;
   1747          }
   1748 
   1749 //zz
   1750 //zz         case Iop_16to8:
   1751          case Iop_32to8:
   1752          case Iop_32to16:
   1753             /* These are no-ops. */
   1754             return iselIntExpr_R(env, e->Iex.Unop.arg);
   1755 
   1756          default:
   1757             break;
   1758       }
   1759 
   1760       /* All Unop cases involving host-side helper calls. */
   1761       void* fn = NULL;
   1762       switch (e->Iex.Unop.op) {
   1763          case Iop_CmpNEZ16x2:
   1764             fn = &h_generic_calc_CmpNEZ16x2; break;
   1765          case Iop_CmpNEZ8x4:
   1766             fn = &h_generic_calc_CmpNEZ8x4; break;
   1767          default:
   1768             break;
   1769       }
   1770 
   1771       if (fn) {
   1772          HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
   1773          HReg res = newVRegI(env);
   1774          addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
   1775          addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn),
   1776                                       1, mk_RetLoc_simple(RLPri_Int) ));
   1777          addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
   1778          return res;
   1779       }
   1780 
   1781       break;
   1782    }
   1783 
   1784    /* --------- GET --------- */
   1785    case Iex_Get: {
   1786       if (ty == Ity_I32
   1787           && 0 == (e->Iex.Get.offset & 3)
   1788           && e->Iex.Get.offset < 4096-4) {
   1789          HReg dst = newVRegI(env);
   1790          addInstr(env, ARMInstr_LdSt32(
   1791                           ARMcc_AL, True/*isLoad*/,
   1792                           dst,
   1793                           ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset)));
   1794          return dst;
   1795       }
   1796 //zz      if (ty == Ity_I8 || ty == Ity_I16) {
   1797 //zz         HReg dst = newVRegI(env);
   1798 //zz         addInstr(env, X86Instr_LoadEX(
   1799 //zz                          toUChar(ty==Ity_I8 ? 1 : 2),
   1800 //zz                          False,
   1801 //zz                          X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
   1802 //zz                          dst));
   1803 //zz         return dst;
   1804 //zz      }
   1805       break;
   1806    }
   1807 
   1808 //zz   case Iex_GetI: {
   1809 //zz      X86AMode* am
   1810 //zz         = genGuestArrayOffset(
   1811 //zz              env, e->Iex.GetI.descr,
   1812 //zz                   e->Iex.GetI.ix, e->Iex.GetI.bias );
   1813 //zz      HReg dst = newVRegI(env);
   1814 //zz      if (ty == Ity_I8) {
   1815 //zz         addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
   1816 //zz         return dst;
   1817 //zz      }
   1818 //zz      if (ty == Ity_I32) {
   1819 //zz         addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
   1820 //zz         return dst;
   1821 //zz      }
   1822 //zz      break;
   1823 //zz   }
   1824 
   1825    /* --------- CCALL --------- */
   1826    case Iex_CCall: {
   1827       HReg    dst = newVRegI(env);
   1828       vassert(ty == e->Iex.CCall.retty);
   1829 
   1830       /* be very restrictive for now.  Only 32/64-bit ints allowed for
   1831          args, and 32 bits for return type.  Don't forget to change
   1832          the RetLoc if more types are allowed in future. */
   1833       if (e->Iex.CCall.retty != Ity_I32)
   1834          goto irreducible;
   1835 
   1836       /* Marshal args, do the call, clear stack. */
   1837       UInt   addToSp = 0;
   1838       RetLoc rloc    = mk_RetLoc_INVALID();
   1839       Bool   ok      = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
   1840                                      e->Iex.CCall.cee, e->Iex.CCall.retty,
   1841                                      e->Iex.CCall.args );
   1842       /* */
   1843       if (ok) {
   1844          vassert(is_sane_RetLoc(rloc));
   1845          vassert(rloc.pri == RLPri_Int);
   1846          vassert(addToSp == 0);
   1847          addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
   1848          return dst;
   1849       }
   1850       /* else fall through; will hit the irreducible: label */
   1851    }
   1852 
   1853    /* --------- LITERAL --------- */
   1854    /* 32 literals */
   1855    case Iex_Const: {
   1856       UInt u   = 0;
   1857       HReg dst = newVRegI(env);
   1858       switch (e->Iex.Const.con->tag) {
   1859          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
   1860          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
   1861          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
   1862          default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm)");
   1863       }
   1864       addInstr(env, ARMInstr_Imm32(dst, u));
   1865       return dst;
   1866    }
   1867 
   1868    /* --------- MULTIPLEX --------- */
   1869    case Iex_ITE: { // VFD
   1870       /* ITE(ccexpr, iftrue, iffalse) */
   1871       if (ty == Ity_I32) {
   1872          ARMCondCode cc;
   1873          HReg     r1  = iselIntExpr_R(env, e->Iex.ITE.iftrue);
   1874          ARMRI84* r0  = iselIntExpr_RI84(NULL, False, env, e->Iex.ITE.iffalse);
   1875          HReg     dst = newVRegI(env);
   1876          addInstr(env, mk_iMOVds_RR(dst, r1));
   1877          cc = iselCondCode(env, e->Iex.ITE.cond);
   1878          addInstr(env, ARMInstr_CMov(cc ^ 1, dst, r0));
   1879          return dst;
   1880       }
   1881       break;
   1882    }
   1883 
   1884    default:
   1885    break;
   1886    } /* switch (e->tag) */
   1887 
   1888    /* We get here if no pattern matched. */
   1889   irreducible:
   1890    ppIRExpr(e);
   1891    vpanic("iselIntExpr_R: cannot reduce tree");
   1892 }
   1893 
   1894 
   1895 /* -------------------- 64-bit -------------------- */
   1896 
   1897 /* Compute a 64-bit value into a register pair, which is returned as
   1898    the first two parameters.  As with iselIntExpr_R, these may be
   1899    either real or virtual regs; in any case they must not be changed
   1900    by subsequent code emitted by the caller.  */
   1901 
   1902 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
   1903 {
   1904    iselInt64Expr_wrk(rHi, rLo, env, e);
   1905 #  if 0
   1906    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   1907 #  endif
   1908    vassert(hregClass(*rHi) == HRcInt32);
   1909    vassert(hregIsVirtual(*rHi));
   1910    vassert(hregClass(*rLo) == HRcInt32);
   1911    vassert(hregIsVirtual(*rLo));
   1912 }
   1913 
   1914 /* DO NOT CALL THIS DIRECTLY ! */
   1915 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
   1916 {
   1917    vassert(e);
   1918    vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
   1919 
   1920    /* 64-bit literal */
   1921    if (e->tag == Iex_Const) {
   1922       ULong   w64 = e->Iex.Const.con->Ico.U64;
   1923       UInt    wHi = toUInt(w64 >> 32);
   1924       UInt    wLo = toUInt(w64);
   1925       HReg    tHi = newVRegI(env);
   1926       HReg    tLo = newVRegI(env);
   1927       vassert(e->Iex.Const.con->tag == Ico_U64);
   1928       addInstr(env, ARMInstr_Imm32(tHi, wHi));
   1929       addInstr(env, ARMInstr_Imm32(tLo, wLo));
   1930       *rHi = tHi;
   1931       *rLo = tLo;
   1932       return;
   1933    }
   1934 
   1935    /* read 64-bit IRTemp */
   1936    if (e->tag == Iex_RdTmp) {
   1937       if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
   1938          HReg tHi = newVRegI(env);
   1939          HReg tLo = newVRegI(env);
   1940          HReg tmp = iselNeon64Expr(env, e);
   1941          addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
   1942          *rHi = tHi;
   1943          *rLo = tLo;
   1944       } else {
   1945          lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
   1946       }
   1947       return;
   1948    }
   1949 
   1950    /* 64-bit load */
   1951    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
   1952       HReg      tLo, tHi, rA;
   1953       vassert(e->Iex.Load.ty == Ity_I64);
   1954       rA  = iselIntExpr_R(env, e->Iex.Load.addr);
   1955       tHi = newVRegI(env);
   1956       tLo = newVRegI(env);
   1957       addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
   1958                                     tHi, ARMAMode1_RI(rA, 4)));
   1959       addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
   1960                                     tLo, ARMAMode1_RI(rA, 0)));
   1961       *rHi = tHi;
   1962       *rLo = tLo;
   1963       return;
   1964    }
   1965 
   1966    /* 64-bit GET */
   1967    if (e->tag == Iex_Get) {
   1968       ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
   1969       ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
   1970       HReg tHi = newVRegI(env);
   1971       HReg tLo = newVRegI(env);
   1972       addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tHi, am4));
   1973       addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tLo, am0));
   1974       *rHi = tHi;
   1975       *rLo = tLo;
   1976       return;
   1977    }
   1978 
   1979    /* --------- BINARY ops --------- */
   1980    if (e->tag == Iex_Binop) {
   1981       switch (e->Iex.Binop.op) {
   1982 
   1983          /* 32 x 32 -> 64 multiply */
   1984          case Iop_MullS32:
   1985          case Iop_MullU32: {
   1986             HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
   1987             HReg     argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   1988             HReg     tHi  = newVRegI(env);
   1989             HReg     tLo  = newVRegI(env);
   1990             ARMMulOp mop  = e->Iex.Binop.op == Iop_MullS32
   1991                                ? ARMmul_SX : ARMmul_ZX;
   1992             addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
   1993             addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
   1994             addInstr(env, ARMInstr_Mul(mop));
   1995             addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
   1996             addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
   1997             *rHi = tHi;
   1998             *rLo = tLo;
   1999             return;
   2000          }
   2001 
   2002          case Iop_Or64: {
   2003             HReg xLo, xHi, yLo, yHi;
   2004             HReg tHi = newVRegI(env);
   2005             HReg tLo = newVRegI(env);
   2006             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
   2007             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
   2008             addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
   2009             addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
   2010             *rHi = tHi;
   2011             *rLo = tLo;
   2012             return;
   2013          }
   2014 
   2015          case Iop_Add64: {
   2016             HReg xLo, xHi, yLo, yHi;
   2017             HReg tHi = newVRegI(env);
   2018             HReg tLo = newVRegI(env);
   2019             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
   2020             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
   2021             addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
   2022             addInstr(env, ARMInstr_Alu(ARMalu_ADC,  tHi, xHi, ARMRI84_R(yHi)));
   2023             *rHi = tHi;
   2024             *rLo = tLo;
   2025             return;
   2026          }
   2027 
   2028          /* 32HLto64(e1,e2) */
   2029          case Iop_32HLto64: {
   2030             *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
   2031             *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
   2032             return;
   2033          }
   2034 
   2035          default:
   2036             break;
   2037       }
   2038    }
   2039 
   2040    /* --------- UNARY ops --------- */
   2041    if (e->tag == Iex_Unop) {
   2042       switch (e->Iex.Unop.op) {
   2043 
   2044          /* ReinterpF64asI64 */
   2045          case Iop_ReinterpF64asI64: {
   2046             HReg dstHi = newVRegI(env);
   2047             HReg dstLo = newVRegI(env);
   2048             HReg src   = iselDblExpr(env, e->Iex.Unop.arg);
   2049             addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
   2050             *rHi = dstHi;
   2051             *rLo = dstLo;
   2052             return;
   2053          }
   2054 
   2055          /* Left64(e) */
   2056          case Iop_Left64: {
   2057             HReg yLo, yHi;
   2058             HReg tHi  = newVRegI(env);
   2059             HReg tLo  = newVRegI(env);
   2060             HReg zero = newVRegI(env);
   2061             /* yHi:yLo = arg */
   2062             iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
   2063             /* zero = 0 */
   2064             addInstr(env, ARMInstr_Imm32(zero, 0));
   2065             /* tLo = 0 - yLo, and set carry */
   2066             addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
   2067                                        tLo, zero, ARMRI84_R(yLo)));
   2068             /* tHi = 0 - yHi - carry */
   2069             addInstr(env, ARMInstr_Alu(ARMalu_SBC,
   2070                                        tHi, zero, ARMRI84_R(yHi)));
   2071             /* So now we have tHi:tLo = -arg.  To finish off, or 'arg'
   2072                back in, so as to give the final result
   2073                tHi:tLo = arg | -arg. */
   2074             addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
   2075             addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
   2076             *rHi = tHi;
   2077             *rLo = tLo;
   2078             return;
   2079          }
   2080 
   2081          /* CmpwNEZ64(e) */
   2082          case Iop_CmpwNEZ64: {
   2083             HReg srcLo, srcHi;
   2084             HReg tmp1 = newVRegI(env);
   2085             HReg tmp2 = newVRegI(env);
   2086             /* srcHi:srcLo = arg */
   2087             iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
   2088             /* tmp1 = srcHi | srcLo */
   2089             addInstr(env, ARMInstr_Alu(ARMalu_OR,
   2090                                        tmp1, srcHi, ARMRI84_R(srcLo)));
   2091             /* tmp2 = (tmp1 | -tmp1) >>s 31 */
   2092             addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
   2093             addInstr(env, ARMInstr_Alu(ARMalu_OR,
   2094                                        tmp2, tmp2, ARMRI84_R(tmp1)));
   2095             addInstr(env, ARMInstr_Shift(ARMsh_SAR,
   2096                                          tmp2, tmp2, ARMRI5_I5(31)));
   2097             *rHi = tmp2;
   2098             *rLo = tmp2;
   2099             return;
   2100          }
   2101 
   2102          case Iop_1Sto64: {
   2103             HReg        dst  = newVRegI(env);
   2104             ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
   2105             ARMRI5*     amt  = ARMRI5_I5(31);
   2106             /* This is really rough.  We could do much better here;
   2107                perhaps mvn{cond} dst, #0 as the second insn?
   2108                (same applies to 1Sto32) */
   2109             addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
   2110             addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
   2111             addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
   2112             addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
   2113             *rHi = dst;
   2114             *rLo = dst;
   2115             return;
   2116          }
   2117 
   2118          default:
   2119             break;
   2120       }
   2121    } /* if (e->tag == Iex_Unop) */
   2122 
   2123    /* --------- MULTIPLEX --------- */
   2124    if (e->tag == Iex_ITE) { // VFD
   2125       IRType tyC;
   2126       HReg   r1hi, r1lo, r0hi, r0lo, dstHi, dstLo;
   2127       ARMCondCode cc;
   2128       tyC = typeOfIRExpr(env->type_env,e->Iex.ITE.cond);
   2129       vassert(tyC == Ity_I1);
   2130       iselInt64Expr(&r1hi, &r1lo, env, e->Iex.ITE.iftrue);
   2131       iselInt64Expr(&r0hi, &r0lo, env, e->Iex.ITE.iffalse);
   2132       dstHi = newVRegI(env);
   2133       dstLo = newVRegI(env);
   2134       addInstr(env, mk_iMOVds_RR(dstHi, r1hi));
   2135       addInstr(env, mk_iMOVds_RR(dstLo, r1lo));
   2136       cc = iselCondCode(env, e->Iex.ITE.cond);
   2137       addInstr(env, ARMInstr_CMov(cc ^ 1, dstHi, ARMRI84_R(r0hi)));
   2138       addInstr(env, ARMInstr_CMov(cc ^ 1, dstLo, ARMRI84_R(r0lo)));
   2139       *rHi = dstHi;
   2140       *rLo = dstLo;
   2141       return;
   2142    }
   2143 
   2144    /* It is convenient sometimes to call iselInt64Expr even when we
   2145       have NEON support (e.g. in do_helper_call we need 64-bit
   2146       arguments as 2 x 32 regs). */
   2147    if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
   2148       HReg tHi = newVRegI(env);
   2149       HReg tLo = newVRegI(env);
   2150       HReg tmp = iselNeon64Expr(env, e);
   2151       addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
   2152       *rHi = tHi;
   2153       *rLo = tLo;
   2154       return ;
   2155    }
   2156 
   2157    ppIRExpr(e);
   2158    vpanic("iselInt64Expr");
   2159 }
   2160 
   2161 
   2162 /*---------------------------------------------------------*/
   2163 /*--- ISEL: Vector (NEON) expressions (64 or 128 bit)   ---*/
   2164 /*---------------------------------------------------------*/
   2165 
   2166 static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e )
   2167 {
   2168    HReg r = iselNeon64Expr_wrk( env, e );
   2169    vassert(hregClass(r) == HRcFlt64);
   2170    vassert(hregIsVirtual(r));
   2171    return r;
   2172 }
   2173 
   2174 /* DO NOT CALL THIS DIRECTLY */
   2175 static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e )
   2176 {
   2177    IRType ty = typeOfIRExpr(env->type_env, e);
   2178    MatchInfo mi;
   2179    vassert(e);
   2180    vassert(ty == Ity_I64);
   2181 
   2182    if (e->tag == Iex_RdTmp) {
   2183       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
   2184    }
   2185 
   2186    if (e->tag == Iex_Const) {
   2187       HReg rLo, rHi;
   2188       HReg res = newVRegD(env);
   2189       iselInt64Expr(&rHi, &rLo, env, e);
   2190       addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
   2191       return res;
   2192    }
   2193 
   2194    /* 64-bit load */
   2195    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
   2196       HReg res = newVRegD(env);
   2197       ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
   2198       vassert(ty == Ity_I64);
   2199       addInstr(env, ARMInstr_NLdStD(True, res, am));
   2200       return res;
   2201    }
   2202 
   2203    /* 64-bit GET */
   2204    if (e->tag == Iex_Get) {
   2205       HReg addr = newVRegI(env);
   2206       HReg res = newVRegD(env);
   2207       vassert(ty == Ity_I64);
   2208       addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
   2209       addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
   2210       return res;
   2211    }
   2212 
   2213    /* --------- BINARY ops --------- */
   2214    if (e->tag == Iex_Binop) {
   2215       switch (e->Iex.Binop.op) {
   2216 
   2217          /* 32 x 32 -> 64 multiply */
   2218          case Iop_MullS32:
   2219          case Iop_MullU32: {
   2220             HReg rLo, rHi;
   2221             HReg res = newVRegD(env);
   2222             iselInt64Expr(&rHi, &rLo, env, e);
   2223             addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
   2224             return res;
   2225          }
   2226 
   2227          case Iop_And64: {
   2228             HReg res = newVRegD(env);
   2229             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2230             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2231             addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
   2232                                            res, argL, argR, 4, False));
   2233             return res;
   2234          }
   2235          case Iop_Or64: {
   2236             HReg res = newVRegD(env);
   2237             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2238             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2239             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
   2240                                            res, argL, argR, 4, False));
   2241             return res;
   2242          }
   2243          case Iop_Xor64: {
   2244             HReg res = newVRegD(env);
   2245             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2246             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2247             addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
   2248                                            res, argL, argR, 4, False));
   2249             return res;
   2250          }
   2251 
   2252          /* 32HLto64(e1,e2) */
   2253          case Iop_32HLto64: {
   2254             HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
   2255             HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
   2256             HReg res = newVRegD(env);
   2257             addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
   2258             return res;
   2259          }
   2260 
   2261          case Iop_Add8x8:
   2262          case Iop_Add16x4:
   2263          case Iop_Add32x2:
   2264          case Iop_Add64: {
   2265             HReg res = newVRegD(env);
   2266             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2267             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2268             UInt size;
   2269             switch (e->Iex.Binop.op) {
   2270                case Iop_Add8x8: size = 0; break;
   2271                case Iop_Add16x4: size = 1; break;
   2272                case Iop_Add32x2: size = 2; break;
   2273                case Iop_Add64: size = 3; break;
   2274                default: vassert(0);
   2275             }
   2276             addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
   2277                                            res, argL, argR, size, False));
   2278             return res;
   2279          }
   2280          case Iop_Add32Fx2: {
   2281             HReg res = newVRegD(env);
   2282             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2283             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2284             UInt size = 0;
   2285             addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
   2286                                            res, argL, argR, size, False));
   2287             return res;
   2288          }
   2289          case Iop_Recps32Fx2: {
   2290             HReg res = newVRegD(env);
   2291             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2292             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2293             UInt size = 0;
   2294             addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
   2295                                            res, argL, argR, size, False));
   2296             return res;
   2297          }
   2298          case Iop_Rsqrts32Fx2: {
   2299             HReg res = newVRegD(env);
   2300             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2301             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2302             UInt size = 0;
   2303             addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
   2304                                            res, argL, argR, size, False));
   2305             return res;
   2306          }
   2307 
   2308          // These 6 verified 18 Apr 2013
   2309          case Iop_InterleaveHI32x2:
   2310          case Iop_InterleaveLO32x2:
   2311          case Iop_InterleaveOddLanes8x8:
   2312          case Iop_InterleaveEvenLanes8x8:
   2313          case Iop_InterleaveOddLanes16x4:
   2314          case Iop_InterleaveEvenLanes16x4: {
   2315             HReg rD   = newVRegD(env);
   2316             HReg rM   = newVRegD(env);
   2317             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2318             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2319             UInt size;
   2320             Bool resRd;  // is the result in rD or rM ?
   2321             switch (e->Iex.Binop.op) {
   2322                case Iop_InterleaveOddLanes8x8:   resRd = False; size = 0; break;
   2323                case Iop_InterleaveEvenLanes8x8:  resRd = True;  size = 0; break;
   2324                case Iop_InterleaveOddLanes16x4:  resRd = False; size = 1; break;
   2325                case Iop_InterleaveEvenLanes16x4: resRd = True;  size = 1; break;
   2326                case Iop_InterleaveHI32x2:        resRd = False; size = 2; break;
   2327                case Iop_InterleaveLO32x2:        resRd = True;  size = 2; break;
   2328                default: vassert(0);
   2329             }
   2330             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
   2331             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
   2332             addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, False));
   2333             return resRd ? rD : rM;
   2334          }
   2335 
   2336          // These 4 verified 18 Apr 2013
   2337          case Iop_InterleaveHI8x8:
   2338          case Iop_InterleaveLO8x8:
   2339          case Iop_InterleaveHI16x4:
   2340          case Iop_InterleaveLO16x4: {
   2341             HReg rD   = newVRegD(env);
   2342             HReg rM   = newVRegD(env);
   2343             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2344             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2345             UInt size;
   2346             Bool resRd;  // is the result in rD or rM ?
   2347             switch (e->Iex.Binop.op) {
   2348                case Iop_InterleaveHI8x8:  resRd = False; size = 0; break;
   2349                case Iop_InterleaveLO8x8:  resRd = True;  size = 0; break;
   2350                case Iop_InterleaveHI16x4: resRd = False; size = 1; break;
   2351                case Iop_InterleaveLO16x4: resRd = True;  size = 1; break;
   2352                default: vassert(0);
   2353             }
   2354             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
   2355             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
   2356             addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, False));
   2357             return resRd ? rD : rM;
   2358          }
   2359 
   2360          // These 4 verified 18 Apr 2013
   2361          case Iop_CatOddLanes8x8:
   2362          case Iop_CatEvenLanes8x8:
   2363          case Iop_CatOddLanes16x4:
   2364          case Iop_CatEvenLanes16x4: {
   2365             HReg rD   = newVRegD(env);
   2366             HReg rM   = newVRegD(env);
   2367             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2368             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2369             UInt size;
   2370             Bool resRd;  // is the result in rD or rM ?
   2371             switch (e->Iex.Binop.op) {
   2372                case Iop_CatOddLanes8x8:   resRd = False; size = 0; break;
   2373                case Iop_CatEvenLanes8x8:  resRd = True;  size = 0; break;
   2374                case Iop_CatOddLanes16x4:  resRd = False; size = 1; break;
   2375                case Iop_CatEvenLanes16x4: resRd = True;  size = 1; break;
   2376                default: vassert(0);
   2377             }
   2378             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
   2379             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
   2380             addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, False));
   2381             return resRd ? rD : rM;
   2382          }
   2383 
   2384          case Iop_QAdd8Ux8:
   2385          case Iop_QAdd16Ux4:
   2386          case Iop_QAdd32Ux2:
   2387          case Iop_QAdd64Ux1: {
   2388             HReg res = newVRegD(env);
   2389             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2390             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2391             UInt size;
   2392             switch (e->Iex.Binop.op) {
   2393                case Iop_QAdd8Ux8: size = 0; break;
   2394                case Iop_QAdd16Ux4: size = 1; break;
   2395                case Iop_QAdd32Ux2: size = 2; break;
   2396                case Iop_QAdd64Ux1: size = 3; break;
   2397                default: vassert(0);
   2398             }
   2399             addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
   2400                                            res, argL, argR, size, False));
   2401             return res;
   2402          }
   2403          case Iop_QAdd8Sx8:
   2404          case Iop_QAdd16Sx4:
   2405          case Iop_QAdd32Sx2:
   2406          case Iop_QAdd64Sx1: {
   2407             HReg res = newVRegD(env);
   2408             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2409             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2410             UInt size;
   2411             switch (e->Iex.Binop.op) {
   2412                case Iop_QAdd8Sx8: size = 0; break;
   2413                case Iop_QAdd16Sx4: size = 1; break;
   2414                case Iop_QAdd32Sx2: size = 2; break;
   2415                case Iop_QAdd64Sx1: size = 3; break;
   2416                default: vassert(0);
   2417             }
   2418             addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
   2419                                            res, argL, argR, size, False));
   2420             return res;
   2421          }
   2422          case Iop_Sub8x8:
   2423          case Iop_Sub16x4:
   2424          case Iop_Sub32x2:
   2425          case Iop_Sub64: {
   2426             HReg res = newVRegD(env);
   2427             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2428             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2429             UInt size;
   2430             switch (e->Iex.Binop.op) {
   2431                case Iop_Sub8x8: size = 0; break;
   2432                case Iop_Sub16x4: size = 1; break;
   2433                case Iop_Sub32x2: size = 2; break;
   2434                case Iop_Sub64: size = 3; break;
   2435                default: vassert(0);
   2436             }
   2437             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
   2438                                            res, argL, argR, size, False));
   2439             return res;
   2440          }
   2441          case Iop_Sub32Fx2: {
   2442             HReg res = newVRegD(env);
   2443             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2444             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2445             UInt size = 0;
   2446             addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
   2447                                            res, argL, argR, size, False));
   2448             return res;
   2449          }
   2450          case Iop_QSub8Ux8:
   2451          case Iop_QSub16Ux4:
   2452          case Iop_QSub32Ux2:
   2453          case Iop_QSub64Ux1: {
   2454             HReg res = newVRegD(env);
   2455             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2456             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2457             UInt size;
   2458             switch (e->Iex.Binop.op) {
   2459                case Iop_QSub8Ux8: size = 0; break;
   2460                case Iop_QSub16Ux4: size = 1; break;
   2461                case Iop_QSub32Ux2: size = 2; break;
   2462                case Iop_QSub64Ux1: size = 3; break;
   2463                default: vassert(0);
   2464             }
   2465             addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
   2466                                            res, argL, argR, size, False));
   2467             return res;
   2468          }
   2469          case Iop_QSub8Sx8:
   2470          case Iop_QSub16Sx4:
   2471          case Iop_QSub32Sx2:
   2472          case Iop_QSub64Sx1: {
   2473             HReg res = newVRegD(env);
   2474             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2475             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2476             UInt size;
   2477             switch (e->Iex.Binop.op) {
   2478                case Iop_QSub8Sx8: size = 0; break;
   2479                case Iop_QSub16Sx4: size = 1; break;
   2480                case Iop_QSub32Sx2: size = 2; break;
   2481                case Iop_QSub64Sx1: size = 3; break;
   2482                default: vassert(0);
   2483             }
   2484             addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
   2485                                            res, argL, argR, size, False));
   2486             return res;
   2487          }
   2488          case Iop_Max8Ux8:
   2489          case Iop_Max16Ux4:
   2490          case Iop_Max32Ux2: {
   2491             HReg res = newVRegD(env);
   2492             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2493             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2494             UInt size;
   2495             switch (e->Iex.Binop.op) {
   2496                case Iop_Max8Ux8: size = 0; break;
   2497                case Iop_Max16Ux4: size = 1; break;
   2498                case Iop_Max32Ux2: size = 2; break;
   2499                default: vassert(0);
   2500             }
   2501             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
   2502                                            res, argL, argR, size, False));
   2503             return res;
   2504          }
   2505          case Iop_Max8Sx8:
   2506          case Iop_Max16Sx4:
   2507          case Iop_Max32Sx2: {
   2508             HReg res = newVRegD(env);
   2509             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2510             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2511             UInt size;
   2512             switch (e->Iex.Binop.op) {
   2513                case Iop_Max8Sx8: size = 0; break;
   2514                case Iop_Max16Sx4: size = 1; break;
   2515                case Iop_Max32Sx2: size = 2; break;
   2516                default: vassert(0);
   2517             }
   2518             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
   2519                                            res, argL, argR, size, False));
   2520             return res;
   2521          }
   2522          case Iop_Min8Ux8:
   2523          case Iop_Min16Ux4:
   2524          case Iop_Min32Ux2: {
   2525             HReg res = newVRegD(env);
   2526             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2527             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2528             UInt size;
   2529             switch (e->Iex.Binop.op) {
   2530                case Iop_Min8Ux8: size = 0; break;
   2531                case Iop_Min16Ux4: size = 1; break;
   2532                case Iop_Min32Ux2: size = 2; break;
   2533                default: vassert(0);
   2534             }
   2535             addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
   2536                                            res, argL, argR, size, False));
   2537             return res;
   2538          }
   2539          case Iop_Min8Sx8:
   2540          case Iop_Min16Sx4:
   2541          case Iop_Min32Sx2: {
   2542             HReg res = newVRegD(env);
   2543             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2544             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2545             UInt size;
   2546             switch (e->Iex.Binop.op) {
   2547                case Iop_Min8Sx8: size = 0; break;
   2548                case Iop_Min16Sx4: size = 1; break;
   2549                case Iop_Min32Sx2: size = 2; break;
   2550                default: vassert(0);
   2551             }
   2552             addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
   2553                                            res, argL, argR, size, False));
   2554             return res;
   2555          }
   2556          case Iop_Sar8x8:
   2557          case Iop_Sar16x4:
   2558          case Iop_Sar32x2: {
   2559             HReg res = newVRegD(env);
   2560             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2561             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2562             HReg argR2 = newVRegD(env);
   2563             HReg zero = newVRegD(env);
   2564             UInt size;
   2565             switch (e->Iex.Binop.op) {
   2566                case Iop_Sar8x8: size = 0; break;
   2567                case Iop_Sar16x4: size = 1; break;
   2568                case Iop_Sar32x2: size = 2; break;
   2569                case Iop_Sar64: size = 3; break;
   2570                default: vassert(0);
   2571             }
   2572             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
   2573             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
   2574                                            argR2, zero, argR, size, False));
   2575             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
   2576                                           res, argL, argR2, size, False));
   2577             return res;
   2578          }
   2579          case Iop_Sal8x8:
   2580          case Iop_Sal16x4:
   2581          case Iop_Sal32x2:
   2582          case Iop_Sal64x1: {
   2583             HReg res = newVRegD(env);
   2584             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2585             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2586             UInt size;
   2587             switch (e->Iex.Binop.op) {
   2588                case Iop_Sal8x8: size = 0; break;
   2589                case Iop_Sal16x4: size = 1; break;
   2590                case Iop_Sal32x2: size = 2; break;
   2591                case Iop_Sal64x1: size = 3; break;
   2592                default: vassert(0);
   2593             }
   2594             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
   2595                                           res, argL, argR, size, False));
   2596             return res;
   2597          }
   2598          case Iop_Shr8x8:
   2599          case Iop_Shr16x4:
   2600          case Iop_Shr32x2: {
   2601             HReg res = newVRegD(env);
   2602             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2603             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2604             HReg argR2 = newVRegD(env);
   2605             HReg zero = newVRegD(env);
   2606             UInt size;
   2607             switch (e->Iex.Binop.op) {
   2608                case Iop_Shr8x8: size = 0; break;
   2609                case Iop_Shr16x4: size = 1; break;
   2610                case Iop_Shr32x2: size = 2; break;
   2611                default: vassert(0);
   2612             }
   2613             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
   2614             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
   2615                                            argR2, zero, argR, size, False));
   2616             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   2617                                           res, argL, argR2, size, False));
   2618             return res;
   2619          }
   2620          case Iop_Shl8x8:
   2621          case Iop_Shl16x4:
   2622          case Iop_Shl32x2: {
   2623             HReg res = newVRegD(env);
   2624             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2625             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2626             UInt size;
   2627             switch (e->Iex.Binop.op) {
   2628                case Iop_Shl8x8: size = 0; break;
   2629                case Iop_Shl16x4: size = 1; break;
   2630                case Iop_Shl32x2: size = 2; break;
   2631                default: vassert(0);
   2632             }
   2633             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   2634                                           res, argL, argR, size, False));
   2635             return res;
   2636          }
   2637          case Iop_QShl8x8:
   2638          case Iop_QShl16x4:
   2639          case Iop_QShl32x2:
   2640          case Iop_QShl64x1: {
   2641             HReg res = newVRegD(env);
   2642             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2643             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2644             UInt size;
   2645             switch (e->Iex.Binop.op) {
   2646                case Iop_QShl8x8: size = 0; break;
   2647                case Iop_QShl16x4: size = 1; break;
   2648                case Iop_QShl32x2: size = 2; break;
   2649                case Iop_QShl64x1: size = 3; break;
   2650                default: vassert(0);
   2651             }
   2652             addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
   2653                                           res, argL, argR, size, False));
   2654             return res;
   2655          }
   2656          case Iop_QSal8x8:
   2657          case Iop_QSal16x4:
   2658          case Iop_QSal32x2:
   2659          case Iop_QSal64x1: {
   2660             HReg res = newVRegD(env);
   2661             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2662             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2663             UInt size;
   2664             switch (e->Iex.Binop.op) {
   2665                case Iop_QSal8x8: size = 0; break;
   2666                case Iop_QSal16x4: size = 1; break;
   2667                case Iop_QSal32x2: size = 2; break;
   2668                case Iop_QSal64x1: size = 3; break;
   2669                default: vassert(0);
   2670             }
   2671             addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
   2672                                           res, argL, argR, size, False));
   2673             return res;
   2674          }
   2675          case Iop_QShlN8x8:
   2676          case Iop_QShlN16x4:
   2677          case Iop_QShlN32x2:
   2678          case Iop_QShlN64x1: {
   2679             HReg res = newVRegD(env);
   2680             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2681             UInt size, imm;
   2682             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   2683                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   2684                vpanic("ARM taget supports Iop_QShlNAxB with constant "
   2685                       "second argument only\n");
   2686             }
   2687             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   2688             switch (e->Iex.Binop.op) {
   2689                case Iop_QShlN8x8: size = 8 | imm; break;
   2690                case Iop_QShlN16x4: size = 16 | imm; break;
   2691                case Iop_QShlN32x2: size = 32 | imm; break;
   2692                case Iop_QShlN64x1: size = 64 | imm; break;
   2693                default: vassert(0);
   2694             }
   2695             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
   2696                                           res, argL, size, False));
   2697             return res;
   2698          }
   2699          case Iop_QShlN8Sx8:
   2700          case Iop_QShlN16Sx4:
   2701          case Iop_QShlN32Sx2:
   2702          case Iop_QShlN64Sx1: {
   2703             HReg res = newVRegD(env);
   2704             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2705             UInt size, imm;
   2706             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   2707                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   2708                vpanic("ARM taget supports Iop_QShlNAxB with constant "
   2709                       "second argument only\n");
   2710             }
   2711             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   2712             switch (e->Iex.Binop.op) {
   2713                case Iop_QShlN8Sx8: size = 8 | imm; break;
   2714                case Iop_QShlN16Sx4: size = 16 | imm; break;
   2715                case Iop_QShlN32Sx2: size = 32 | imm; break;
   2716                case Iop_QShlN64Sx1: size = 64 | imm; break;
   2717                default: vassert(0);
   2718             }
   2719             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
   2720                                           res, argL, size, False));
   2721             return res;
   2722          }
   2723          case Iop_QSalN8x8:
   2724          case Iop_QSalN16x4:
   2725          case Iop_QSalN32x2:
   2726          case Iop_QSalN64x1: {
   2727             HReg res = newVRegD(env);
   2728             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2729             UInt size, imm;
   2730             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   2731                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   2732                vpanic("ARM taget supports Iop_QShlNAxB with constant "
   2733                       "second argument only\n");
   2734             }
   2735             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   2736             switch (e->Iex.Binop.op) {
   2737                case Iop_QSalN8x8: size = 8 | imm; break;
   2738                case Iop_QSalN16x4: size = 16 | imm; break;
   2739                case Iop_QSalN32x2: size = 32 | imm; break;
   2740                case Iop_QSalN64x1: size = 64 | imm; break;
   2741                default: vassert(0);
   2742             }
   2743             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
   2744                                           res, argL, size, False));
   2745             return res;
   2746          }
   2747          case Iop_ShrN8x8:
   2748          case Iop_ShrN16x4:
   2749          case Iop_ShrN32x2:
   2750          case Iop_Shr64: {
   2751             HReg res = newVRegD(env);
   2752             HReg tmp = newVRegD(env);
   2753             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2754             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   2755             HReg argR2 = newVRegI(env);
   2756             UInt size;
   2757             switch (e->Iex.Binop.op) {
   2758                case Iop_ShrN8x8: size = 0; break;
   2759                case Iop_ShrN16x4: size = 1; break;
   2760                case Iop_ShrN32x2: size = 2; break;
   2761                case Iop_Shr64: size = 3; break;
   2762                default: vassert(0);
   2763             }
   2764             addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
   2765             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
   2766             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   2767                                           res, argL, tmp, size, False));
   2768             return res;
   2769          }
   2770          case Iop_ShlN8x8:
   2771          case Iop_ShlN16x4:
   2772          case Iop_ShlN32x2:
   2773          case Iop_Shl64: {
   2774             HReg res = newVRegD(env);
   2775             HReg tmp = newVRegD(env);
   2776             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2777             /* special-case Shl64(x, imm8) since the Neon front
   2778                end produces a lot of those for V{LD,ST}{1,2,3,4}. */
   2779             if (e->Iex.Binop.op == Iop_Shl64
   2780                 && e->Iex.Binop.arg2->tag == Iex_Const) {
   2781                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
   2782                Int nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   2783                if (nshift >= 1 && nshift <= 63) {
   2784                   addInstr(env, ARMInstr_NShl64(res, argL, nshift));
   2785                   return res;
   2786                }
   2787                /* else fall through to general case */
   2788             }
   2789             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   2790             UInt size;
   2791             switch (e->Iex.Binop.op) {
   2792                case Iop_ShlN8x8:  size = 0; break;
   2793                case Iop_ShlN16x4: size = 1; break;
   2794                case Iop_ShlN32x2: size = 2; break;
   2795                case Iop_Shl64:    size = 3; break;
   2796                default: vassert(0);
   2797             }
   2798             addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
   2799                                           tmp, argR, 0, False));
   2800             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   2801                                           res, argL, tmp, size, False));
   2802             return res;
   2803          }
   2804          case Iop_SarN8x8:
   2805          case Iop_SarN16x4:
   2806          case Iop_SarN32x2:
   2807          case Iop_Sar64: {
   2808             HReg res = newVRegD(env);
   2809             HReg tmp = newVRegD(env);
   2810             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2811             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   2812             HReg argR2 = newVRegI(env);
   2813             UInt size;
   2814             switch (e->Iex.Binop.op) {
   2815                case Iop_SarN8x8: size = 0; break;
   2816                case Iop_SarN16x4: size = 1; break;
   2817                case Iop_SarN32x2: size = 2; break;
   2818                case Iop_Sar64: size = 3; break;
   2819                default: vassert(0);
   2820             }
   2821             addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
   2822             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
   2823             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
   2824                                           res, argL, tmp, size, False));
   2825             return res;
   2826          }
   2827          case Iop_CmpGT8Ux8:
   2828          case Iop_CmpGT16Ux4:
   2829          case Iop_CmpGT32Ux2: {
   2830             HReg res = newVRegD(env);
   2831             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2832             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2833             UInt size;
   2834             switch (e->Iex.Binop.op) {
   2835                case Iop_CmpGT8Ux8: size = 0; break;
   2836                case Iop_CmpGT16Ux4: size = 1; break;
   2837                case Iop_CmpGT32Ux2: size = 2; break;
   2838                default: vassert(0);
   2839             }
   2840             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
   2841                                            res, argL, argR, size, False));
   2842             return res;
   2843          }
   2844          case Iop_CmpGT8Sx8:
   2845          case Iop_CmpGT16Sx4:
   2846          case Iop_CmpGT32Sx2: {
   2847             HReg res = newVRegD(env);
   2848             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2849             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2850             UInt size;
   2851             switch (e->Iex.Binop.op) {
   2852                case Iop_CmpGT8Sx8: size = 0; break;
   2853                case Iop_CmpGT16Sx4: size = 1; break;
   2854                case Iop_CmpGT32Sx2: size = 2; break;
   2855                default: vassert(0);
   2856             }
   2857             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
   2858                                            res, argL, argR, size, False));
   2859             return res;
   2860          }
   2861          case Iop_CmpEQ8x8:
   2862          case Iop_CmpEQ16x4:
   2863          case Iop_CmpEQ32x2: {
   2864             HReg res = newVRegD(env);
   2865             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2866             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2867             UInt size;
   2868             switch (e->Iex.Binop.op) {
   2869                case Iop_CmpEQ8x8: size = 0; break;
   2870                case Iop_CmpEQ16x4: size = 1; break;
   2871                case Iop_CmpEQ32x2: size = 2; break;
   2872                default: vassert(0);
   2873             }
   2874             addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
   2875                                            res, argL, argR, size, False));
   2876             return res;
   2877          }
   2878          case Iop_Mul8x8:
   2879          case Iop_Mul16x4:
   2880          case Iop_Mul32x2: {
   2881             HReg res = newVRegD(env);
   2882             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2883             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2884             UInt size = 0;
   2885             switch(e->Iex.Binop.op) {
   2886                case Iop_Mul8x8: size = 0; break;
   2887                case Iop_Mul16x4: size = 1; break;
   2888                case Iop_Mul32x2: size = 2; break;
   2889                default: vassert(0);
   2890             }
   2891             addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
   2892                                            res, argL, argR, size, False));
   2893             return res;
   2894          }
   2895          case Iop_Mul32Fx2: {
   2896             HReg res = newVRegD(env);
   2897             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2898             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2899             UInt size = 0;
   2900             addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
   2901                                            res, argL, argR, size, False));
   2902             return res;
   2903          }
   2904          case Iop_QDMulHi16Sx4:
   2905          case Iop_QDMulHi32Sx2: {
   2906             HReg res = newVRegD(env);
   2907             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2908             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2909             UInt size = 0;
   2910             switch(e->Iex.Binop.op) {
   2911                case Iop_QDMulHi16Sx4: size = 1; break;
   2912                case Iop_QDMulHi32Sx2: size = 2; break;
   2913                default: vassert(0);
   2914             }
   2915             addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
   2916                                            res, argL, argR, size, False));
   2917             return res;
   2918          }
   2919 
   2920          case Iop_QRDMulHi16Sx4:
   2921          case Iop_QRDMulHi32Sx2: {
   2922             HReg res = newVRegD(env);
   2923             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2924             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2925             UInt size = 0;
   2926             switch(e->Iex.Binop.op) {
   2927                case Iop_QRDMulHi16Sx4: size = 1; break;
   2928                case Iop_QRDMulHi32Sx2: size = 2; break;
   2929                default: vassert(0);
   2930             }
   2931             addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
   2932                                            res, argL, argR, size, False));
   2933             return res;
   2934          }
   2935 
   2936          case Iop_PwAdd8x8:
   2937          case Iop_PwAdd16x4:
   2938          case Iop_PwAdd32x2: {
   2939             HReg res = newVRegD(env);
   2940             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2941             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2942             UInt size = 0;
   2943             switch(e->Iex.Binop.op) {
   2944                case Iop_PwAdd8x8: size = 0; break;
   2945                case Iop_PwAdd16x4: size = 1; break;
   2946                case Iop_PwAdd32x2: size = 2; break;
   2947                default: vassert(0);
   2948             }
   2949             addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
   2950                                            res, argL, argR, size, False));
   2951             return res;
   2952          }
   2953          case Iop_PwAdd32Fx2: {
   2954             HReg res = newVRegD(env);
   2955             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2956             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2957             UInt size = 0;
   2958             addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
   2959                                            res, argL, argR, size, False));
   2960             return res;
   2961          }
   2962          case Iop_PwMin8Ux8:
   2963          case Iop_PwMin16Ux4:
   2964          case Iop_PwMin32Ux2: {
   2965             HReg res = newVRegD(env);
   2966             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2967             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2968             UInt size = 0;
   2969             switch(e->Iex.Binop.op) {
   2970                case Iop_PwMin8Ux8: size = 0; break;
   2971                case Iop_PwMin16Ux4: size = 1; break;
   2972                case Iop_PwMin32Ux2: size = 2; break;
   2973                default: vassert(0);
   2974             }
   2975             addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
   2976                                            res, argL, argR, size, False));
   2977             return res;
   2978          }
   2979          case Iop_PwMin8Sx8:
   2980          case Iop_PwMin16Sx4:
   2981          case Iop_PwMin32Sx2: {
   2982             HReg res = newVRegD(env);
   2983             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   2984             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   2985             UInt size = 0;
   2986             switch(e->Iex.Binop.op) {
   2987                case Iop_PwMin8Sx8: size = 0; break;
   2988                case Iop_PwMin16Sx4: size = 1; break;
   2989                case Iop_PwMin32Sx2: size = 2; break;
   2990                default: vassert(0);
   2991             }
   2992             addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
   2993                                            res, argL, argR, size, False));
   2994             return res;
   2995          }
   2996          case Iop_PwMax8Ux8:
   2997          case Iop_PwMax16Ux4:
   2998          case Iop_PwMax32Ux2: {
   2999             HReg res = newVRegD(env);
   3000             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   3001             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   3002             UInt size = 0;
   3003             switch(e->Iex.Binop.op) {
   3004                case Iop_PwMax8Ux8: size = 0; break;
   3005                case Iop_PwMax16Ux4: size = 1; break;
   3006                case Iop_PwMax32Ux2: size = 2; break;
   3007                default: vassert(0);
   3008             }
   3009             addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
   3010                                            res, argL, argR, size, False));
   3011             return res;
   3012          }
   3013          case Iop_PwMax8Sx8:
   3014          case Iop_PwMax16Sx4:
   3015          case Iop_PwMax32Sx2: {
   3016             HReg res = newVRegD(env);
   3017             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   3018             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   3019             UInt size = 0;
   3020             switch(e->Iex.Binop.op) {
   3021                case Iop_PwMax8Sx8: size = 0; break;
   3022                case Iop_PwMax16Sx4: size = 1; break;
   3023                case Iop_PwMax32Sx2: size = 2; break;
   3024                default: vassert(0);
   3025             }
   3026             addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
   3027                                            res, argL, argR, size, False));
   3028             return res;
   3029          }
   3030          case Iop_Perm8x8: {
   3031             HReg res = newVRegD(env);
   3032             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   3033             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   3034             addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
   3035                                            res, argL, argR, 0, False));
   3036             return res;
   3037          }
   3038          case Iop_PolynomialMul8x8: {
   3039             HReg res = newVRegD(env);
   3040             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   3041             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   3042             UInt size = 0;
   3043             addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
   3044                                            res, argL, argR, size, False));
   3045             return res;
   3046          }
   3047          case Iop_Max32Fx2: {
   3048             HReg res = newVRegD(env);
   3049             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   3050             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   3051             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
   3052                                            res, argL, argR, 2, False));
   3053             return res;
   3054          }
   3055          case Iop_Min32Fx2: {
   3056             HReg res = newVRegD(env);
   3057             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   3058             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   3059             addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
   3060                                            res, argL, argR, 2, False));
   3061             return res;
   3062          }
   3063          case Iop_PwMax32Fx2: {
   3064             HReg res = newVRegD(env);
   3065             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   3066             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   3067             addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
   3068                                            res, argL, argR, 2, False));
   3069             return res;
   3070          }
   3071          case Iop_PwMin32Fx2: {
   3072             HReg res = newVRegD(env);
   3073             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   3074             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   3075             addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
   3076                                            res, argL, argR, 2, False));
   3077             return res;
   3078          }
   3079          case Iop_CmpGT32Fx2: {
   3080             HReg res = newVRegD(env);
   3081             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   3082             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   3083             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
   3084                                            res, argL, argR, 2, False));
   3085             return res;
   3086          }
   3087          case Iop_CmpGE32Fx2: {
   3088             HReg res = newVRegD(env);
   3089             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   3090             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   3091             addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
   3092                                            res, argL, argR, 2, False));
   3093             return res;
   3094          }
   3095          case Iop_CmpEQ32Fx2: {
   3096             HReg res = newVRegD(env);
   3097             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   3098             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   3099             addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
   3100                                            res, argL, argR, 2, False));
   3101             return res;
   3102          }
   3103          case Iop_F32ToFixed32Ux2_RZ:
   3104          case Iop_F32ToFixed32Sx2_RZ:
   3105          case Iop_Fixed32UToF32x2_RN:
   3106          case Iop_Fixed32SToF32x2_RN: {
   3107             HReg res = newVRegD(env);
   3108             HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
   3109             ARMNeonUnOp op;
   3110             UInt imm6;
   3111             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   3112                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   3113                   vpanic("ARM supports FP <-> Fixed conversion with constant "
   3114                          "second argument less than 33 only\n");
   3115             }
   3116             imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   3117             vassert(imm6 <= 32 && imm6 > 0);
   3118             imm6 = 64 - imm6;
   3119             switch(e->Iex.Binop.op) {
   3120                case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
   3121                case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
   3122                case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
   3123                case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
   3124                default: vassert(0);
   3125             }
   3126             addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
   3127             return res;
   3128          }
   3129          /*
   3130          FIXME: is this here or not?
   3131          case Iop_VDup8x8:
   3132          case Iop_VDup16x4:
   3133          case Iop_VDup32x2: {
   3134             HReg res = newVRegD(env);
   3135             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   3136             UInt index;
   3137             UInt imm4;
   3138             UInt size = 0;
   3139             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   3140                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   3141                   vpanic("ARM supports Iop_VDup with constant "
   3142                          "second argument less than 16 only\n");
   3143             }
   3144             index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   3145             switch(e->Iex.Binop.op) {
   3146                case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
   3147                case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
   3148                case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
   3149                default: vassert(0);
   3150             }
   3151             if (imm4 >= 16) {
   3152                vpanic("ARM supports Iop_VDup with constant "
   3153                       "second argument less than 16 only\n");
   3154             }
   3155             addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
   3156                                           res, argL, imm4, False));
   3157             return res;
   3158          }
   3159          */
   3160          default:
   3161             break;
   3162       }
   3163    }
   3164 
   3165    /* --------- UNARY ops --------- */
   3166    if (e->tag == Iex_Unop) {
   3167       switch (e->Iex.Unop.op) {
   3168 
   3169          /* 32Uto64 */
   3170          case Iop_32Uto64: {
   3171             HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
   3172             HReg rHi = newVRegI(env);
   3173             HReg res = newVRegD(env);
   3174             addInstr(env, ARMInstr_Imm32(rHi, 0));
   3175             addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
   3176             return res;
   3177          }
   3178 
   3179          /* 32Sto64 */
   3180          case Iop_32Sto64: {
   3181             HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
   3182             HReg rHi = newVRegI(env);
   3183             addInstr(env, mk_iMOVds_RR(rHi, rLo));
   3184             addInstr(env, ARMInstr_Shift(ARMsh_SAR, rHi, rHi, ARMRI5_I5(31)));
   3185             HReg res = newVRegD(env);
   3186             addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
   3187             return res;
   3188          }
   3189 
   3190          /* The next 3 are pass-throughs */
   3191          /* ReinterpF64asI64 */
   3192          case Iop_ReinterpF64asI64:
   3193          /* Left64(e) */
   3194          case Iop_Left64:
   3195          /* CmpwNEZ64(e) */
   3196          case Iop_1Sto64: {
   3197             HReg rLo, rHi;
   3198             HReg res = newVRegD(env);
   3199             iselInt64Expr(&rHi, &rLo, env, e);
   3200             addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
   3201             return res;
   3202          }
   3203 
   3204          case Iop_Not64: {
   3205             DECLARE_PATTERN(p_veqz_8x8);
   3206             DECLARE_PATTERN(p_veqz_16x4);
   3207             DECLARE_PATTERN(p_veqz_32x2);
   3208             DECLARE_PATTERN(p_vcge_8sx8);
   3209             DECLARE_PATTERN(p_vcge_16sx4);
   3210             DECLARE_PATTERN(p_vcge_32sx2);
   3211             DECLARE_PATTERN(p_vcge_8ux8);
   3212             DECLARE_PATTERN(p_vcge_16ux4);
   3213             DECLARE_PATTERN(p_vcge_32ux2);
   3214             DEFINE_PATTERN(p_veqz_8x8,
   3215                   unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
   3216             DEFINE_PATTERN(p_veqz_16x4,
   3217                   unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
   3218             DEFINE_PATTERN(p_veqz_32x2,
   3219                   unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
   3220             DEFINE_PATTERN(p_vcge_8sx8,
   3221                   unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
   3222             DEFINE_PATTERN(p_vcge_16sx4,
   3223                   unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
   3224             DEFINE_PATTERN(p_vcge_32sx2,
   3225                   unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
   3226             DEFINE_PATTERN(p_vcge_8ux8,
   3227                   unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
   3228             DEFINE_PATTERN(p_vcge_16ux4,
   3229                   unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
   3230             DEFINE_PATTERN(p_vcge_32ux2,
   3231                   unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
   3232             if (matchIRExpr(&mi, p_veqz_8x8, e)) {
   3233                HReg res = newVRegD(env);
   3234                HReg arg = iselNeon64Expr(env, mi.bindee[0]);
   3235                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
   3236                return res;
   3237             } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
   3238                HReg res = newVRegD(env);
   3239                HReg arg = iselNeon64Expr(env, mi.bindee[0]);
   3240                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
   3241                return res;
   3242             } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
   3243                HReg res = newVRegD(env);
   3244                HReg arg = iselNeon64Expr(env, mi.bindee[0]);
   3245                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
   3246                return res;
   3247             } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
   3248                HReg res = newVRegD(env);
   3249                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
   3250                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
   3251                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
   3252                                               res, argL, argR, 0, False));
   3253                return res;
   3254             } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
   3255                HReg res = newVRegD(env);
   3256                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
   3257                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
   3258                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
   3259                                               res, argL, argR, 1, False));
   3260                return res;
   3261             } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
   3262                HReg res = newVRegD(env);
   3263                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
   3264                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
   3265                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
   3266                                               res, argL, argR, 2, False));
   3267                return res;
   3268             } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
   3269                HReg res = newVRegD(env);
   3270                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
   3271                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
   3272                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
   3273                                               res, argL, argR, 0, False));
   3274                return res;
   3275             } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
   3276                HReg res = newVRegD(env);
   3277                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
   3278                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
   3279                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
   3280                                               res, argL, argR, 1, False));
   3281                return res;
   3282             } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
   3283                HReg res = newVRegD(env);
   3284                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
   3285                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
   3286                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
   3287                                               res, argL, argR, 2, False));
   3288                return res;
   3289             } else {
   3290                HReg res = newVRegD(env);
   3291                HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3292                addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
   3293                return res;
   3294             }
   3295          }
   3296          case Iop_Dup8x8:
   3297          case Iop_Dup16x4:
   3298          case Iop_Dup32x2: {
   3299             HReg res, arg;
   3300             UInt size;
   3301             DECLARE_PATTERN(p_vdup_8x8);
   3302             DECLARE_PATTERN(p_vdup_16x4);
   3303             DECLARE_PATTERN(p_vdup_32x2);
   3304             DEFINE_PATTERN(p_vdup_8x8,
   3305                   unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
   3306             DEFINE_PATTERN(p_vdup_16x4,
   3307                   unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
   3308             DEFINE_PATTERN(p_vdup_32x2,
   3309                   unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
   3310             if (matchIRExpr(&mi, p_vdup_8x8, e)) {
   3311                UInt index;
   3312                UInt imm4;
   3313                if (mi.bindee[1]->tag == Iex_Const &&
   3314                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
   3315                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
   3316                   imm4 = (index << 1) + 1;
   3317                   if (index < 8) {
   3318                      res = newVRegD(env);
   3319                      arg = iselNeon64Expr(env, mi.bindee[0]);
   3320                      addInstr(env, ARMInstr_NUnaryS(
   3321                                       ARMneon_VDUP,
   3322                                       mkARMNRS(ARMNRS_Reg, res, 0),
   3323                                       mkARMNRS(ARMNRS_Scalar, arg, index),
   3324                                       imm4, False
   3325                              ));
   3326                      return res;
   3327                   }
   3328                }
   3329             } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
   3330                UInt index;
   3331                UInt imm4;
   3332                if (mi.bindee[1]->tag == Iex_Const &&
   3333                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
   3334                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
   3335                   imm4 = (index << 2) + 2;
   3336                   if (index < 4) {
   3337                      res = newVRegD(env);
   3338                      arg = iselNeon64Expr(env, mi.bindee[0]);
   3339                      addInstr(env, ARMInstr_NUnaryS(
   3340                                       ARMneon_VDUP,
   3341                                       mkARMNRS(ARMNRS_Reg, res, 0),
   3342                                       mkARMNRS(ARMNRS_Scalar, arg, index),
   3343                                       imm4, False
   3344                              ));
   3345                      return res;
   3346                   }
   3347                }
   3348             } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
   3349                UInt index;
   3350                UInt imm4;
   3351                if (mi.bindee[1]->tag == Iex_Const &&
   3352                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
   3353                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
   3354                   imm4 = (index << 3) + 4;
   3355                   if (index < 2) {
   3356                      res = newVRegD(env);
   3357                      arg = iselNeon64Expr(env, mi.bindee[0]);
   3358                      addInstr(env, ARMInstr_NUnaryS(
   3359                                       ARMneon_VDUP,
   3360                                       mkARMNRS(ARMNRS_Reg, res, 0),
   3361                                       mkARMNRS(ARMNRS_Scalar, arg, index),
   3362                                       imm4, False
   3363                              ));
   3364                      return res;
   3365                   }
   3366                }
   3367             }
   3368             arg = iselIntExpr_R(env, e->Iex.Unop.arg);
   3369             res = newVRegD(env);
   3370             switch (e->Iex.Unop.op) {
   3371                case Iop_Dup8x8: size = 0; break;
   3372                case Iop_Dup16x4: size = 1; break;
   3373                case Iop_Dup32x2: size = 2; break;
   3374                default: vassert(0);
   3375             }
   3376             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
   3377             return res;
   3378          }
   3379          case Iop_Abs8x8:
   3380          case Iop_Abs16x4:
   3381          case Iop_Abs32x2: {
   3382             HReg res = newVRegD(env);
   3383             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3384             UInt size = 0;
   3385             switch(e->Iex.Binop.op) {
   3386                case Iop_Abs8x8: size = 0; break;
   3387                case Iop_Abs16x4: size = 1; break;
   3388                case Iop_Abs32x2: size = 2; break;
   3389                default: vassert(0);
   3390             }
   3391             addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
   3392             return res;
   3393          }
   3394          case Iop_Reverse64_8x8:
   3395          case Iop_Reverse64_16x4:
   3396          case Iop_Reverse64_32x2: {
   3397             HReg res = newVRegD(env);
   3398             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3399             UInt size = 0;
   3400             switch(e->Iex.Binop.op) {
   3401                case Iop_Reverse64_8x8: size = 0; break;
   3402                case Iop_Reverse64_16x4: size = 1; break;
   3403                case Iop_Reverse64_32x2: size = 2; break;
   3404                default: vassert(0);
   3405             }
   3406             addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
   3407                                           res, arg, size, False));
   3408             return res;
   3409          }
   3410          case Iop_Reverse32_8x8:
   3411          case Iop_Reverse32_16x4: {
   3412             HReg res = newVRegD(env);
   3413             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3414             UInt size = 0;
   3415             switch(e->Iex.Binop.op) {
   3416                case Iop_Reverse32_8x8: size = 0; break;
   3417                case Iop_Reverse32_16x4: size = 1; break;
   3418                default: vassert(0);
   3419             }
   3420             addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
   3421                                           res, arg, size, False));
   3422             return res;
   3423          }
   3424          case Iop_Reverse16_8x8: {
   3425             HReg res = newVRegD(env);
   3426             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3427             UInt size = 0;
   3428             addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
   3429                                           res, arg, size, False));
   3430             return res;
   3431          }
   3432          case Iop_CmpwNEZ64: {
   3433             HReg x_lsh = newVRegD(env);
   3434             HReg x_rsh = newVRegD(env);
   3435             HReg lsh_amt = newVRegD(env);
   3436             HReg rsh_amt = newVRegD(env);
   3437             HReg zero = newVRegD(env);
   3438             HReg tmp = newVRegD(env);
   3439             HReg tmp2 = newVRegD(env);
   3440             HReg res = newVRegD(env);
   3441             HReg x = newVRegD(env);
   3442             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3443             addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
   3444             addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
   3445             addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
   3446             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
   3447             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
   3448                                            rsh_amt, zero, lsh_amt, 2, False));
   3449             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   3450                                           x_lsh, x, lsh_amt, 3, False));
   3451             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   3452                                           x_rsh, x, rsh_amt, 3, False));
   3453             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
   3454                                            tmp, x_lsh, x_rsh, 0, False));
   3455             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
   3456                                            res, tmp, x, 0, False));
   3457             return res;
   3458          }
   3459          case Iop_CmpNEZ8x8:
   3460          case Iop_CmpNEZ16x4:
   3461          case Iop_CmpNEZ32x2: {
   3462             HReg res = newVRegD(env);
   3463             HReg tmp = newVRegD(env);
   3464             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3465             UInt size;
   3466             switch (e->Iex.Unop.op) {
   3467                case Iop_CmpNEZ8x8: size = 0; break;
   3468                case Iop_CmpNEZ16x4: size = 1; break;
   3469                case Iop_CmpNEZ32x2: size = 2; break;
   3470                default: vassert(0);
   3471             }
   3472             addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
   3473             addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
   3474             return res;
   3475          }
   3476          case Iop_NarrowUn16to8x8:
   3477          case Iop_NarrowUn32to16x4:
   3478          case Iop_NarrowUn64to32x2: {
   3479             HReg res = newVRegD(env);
   3480             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3481             UInt size = 0;
   3482             switch(e->Iex.Binop.op) {
   3483                case Iop_NarrowUn16to8x8:  size = 0; break;
   3484                case Iop_NarrowUn32to16x4: size = 1; break;
   3485                case Iop_NarrowUn64to32x2: size = 2; break;
   3486                default: vassert(0);
   3487             }
   3488             addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
   3489                                           res, arg, size, False));
   3490             return res;
   3491          }
   3492          case Iop_QNarrowUn16Sto8Sx8:
   3493          case Iop_QNarrowUn32Sto16Sx4:
   3494          case Iop_QNarrowUn64Sto32Sx2: {
   3495             HReg res = newVRegD(env);
   3496             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3497             UInt size = 0;
   3498             switch(e->Iex.Binop.op) {
   3499                case Iop_QNarrowUn16Sto8Sx8:  size = 0; break;
   3500                case Iop_QNarrowUn32Sto16Sx4: size = 1; break;
   3501                case Iop_QNarrowUn64Sto32Sx2: size = 2; break;
   3502                default: vassert(0);
   3503             }
   3504             addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
   3505                                           res, arg, size, False));
   3506             return res;
   3507          }
   3508          case Iop_QNarrowUn16Sto8Ux8:
   3509          case Iop_QNarrowUn32Sto16Ux4:
   3510          case Iop_QNarrowUn64Sto32Ux2: {
   3511             HReg res = newVRegD(env);
   3512             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3513             UInt size = 0;
   3514             switch(e->Iex.Binop.op) {
   3515                case Iop_QNarrowUn16Sto8Ux8:  size = 0; break;
   3516                case Iop_QNarrowUn32Sto16Ux4: size = 1; break;
   3517                case Iop_QNarrowUn64Sto32Ux2: size = 2; break;
   3518                default: vassert(0);
   3519             }
   3520             addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
   3521                                           res, arg, size, False));
   3522             return res;
   3523          }
   3524          case Iop_QNarrowUn16Uto8Ux8:
   3525          case Iop_QNarrowUn32Uto16Ux4:
   3526          case Iop_QNarrowUn64Uto32Ux2: {
   3527             HReg res = newVRegD(env);
   3528             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3529             UInt size = 0;
   3530             switch(e->Iex.Binop.op) {
   3531                case Iop_QNarrowUn16Uto8Ux8:  size = 0; break;
   3532                case Iop_QNarrowUn32Uto16Ux4: size = 1; break;
   3533                case Iop_QNarrowUn64Uto32Ux2: size = 2; break;
   3534                default: vassert(0);
   3535             }
   3536             addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
   3537                                           res, arg, size, False));
   3538             return res;
   3539          }
   3540          case Iop_PwAddL8Sx8:
   3541          case Iop_PwAddL16Sx4:
   3542          case Iop_PwAddL32Sx2: {
   3543             HReg res = newVRegD(env);
   3544             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3545             UInt size = 0;
   3546             switch(e->Iex.Binop.op) {
   3547                case Iop_PwAddL8Sx8: size = 0; break;
   3548                case Iop_PwAddL16Sx4: size = 1; break;
   3549                case Iop_PwAddL32Sx2: size = 2; break;
   3550                default: vassert(0);
   3551             }
   3552             addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
   3553                                           res, arg, size, False));
   3554             return res;
   3555          }
   3556          case Iop_PwAddL8Ux8:
   3557          case Iop_PwAddL16Ux4:
   3558          case Iop_PwAddL32Ux2: {
   3559             HReg res = newVRegD(env);
   3560             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3561             UInt size = 0;
   3562             switch(e->Iex.Binop.op) {
   3563                case Iop_PwAddL8Ux8: size = 0; break;
   3564                case Iop_PwAddL16Ux4: size = 1; break;
   3565                case Iop_PwAddL32Ux2: size = 2; break;
   3566                default: vassert(0);
   3567             }
   3568             addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
   3569                                           res, arg, size, False));
   3570             return res;
   3571          }
   3572          case Iop_Cnt8x8: {
   3573             HReg res = newVRegD(env);
   3574             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3575             UInt size = 0;
   3576             addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
   3577                                           res, arg, size, False));
   3578             return res;
   3579          }
   3580          case Iop_Clz8Sx8:
   3581          case Iop_Clz16Sx4:
   3582          case Iop_Clz32Sx2: {
   3583             HReg res = newVRegD(env);
   3584             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3585             UInt size = 0;
   3586             switch(e->Iex.Binop.op) {
   3587                case Iop_Clz8Sx8: size = 0; break;
   3588                case Iop_Clz16Sx4: size = 1; break;
   3589                case Iop_Clz32Sx2: size = 2; break;
   3590                default: vassert(0);
   3591             }
   3592             addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
   3593                                           res, arg, size, False));
   3594             return res;
   3595          }
   3596          case Iop_Cls8Sx8:
   3597          case Iop_Cls16Sx4:
   3598          case Iop_Cls32Sx2: {
   3599             HReg res = newVRegD(env);
   3600             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3601             UInt size = 0;
   3602             switch(e->Iex.Binop.op) {
   3603                case Iop_Cls8Sx8: size = 0; break;
   3604                case Iop_Cls16Sx4: size = 1; break;
   3605                case Iop_Cls32Sx2: size = 2; break;
   3606                default: vassert(0);
   3607             }
   3608             addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
   3609                                           res, arg, size, False));
   3610             return res;
   3611          }
   3612          case Iop_FtoI32Sx2_RZ: {
   3613             HReg res = newVRegD(env);
   3614             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3615             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
   3616                                           res, arg, 2, False));
   3617             return res;
   3618          }
   3619          case Iop_FtoI32Ux2_RZ: {
   3620             HReg res = newVRegD(env);
   3621             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3622             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
   3623                                           res, arg, 2, False));
   3624             return res;
   3625          }
   3626          case Iop_I32StoFx2: {
   3627             HReg res = newVRegD(env);
   3628             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3629             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
   3630                                           res, arg, 2, False));
   3631             return res;
   3632          }
   3633          case Iop_I32UtoFx2: {
   3634             HReg res = newVRegD(env);
   3635             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3636             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
   3637                                           res, arg, 2, False));
   3638             return res;
   3639          }
   3640          case Iop_F32toF16x4: {
   3641             HReg res = newVRegD(env);
   3642             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3643             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
   3644                                           res, arg, 2, False));
   3645             return res;
   3646          }
   3647          case Iop_Recip32Fx2: {
   3648             HReg res = newVRegD(env);
   3649             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   3650             addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
   3651                                           res, argL, 0, False));
   3652             return res;
   3653          }
   3654          case Iop_Recip32x2: {
   3655             HReg res = newVRegD(env);
   3656             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   3657             addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
   3658                                           res, argL, 0, False));
   3659             return res;
   3660          }
   3661          case Iop_Abs32Fx2: {
   3662             DECLARE_PATTERN(p_vabd_32fx2);
   3663             DEFINE_PATTERN(p_vabd_32fx2,
   3664                            unop(Iop_Abs32Fx2,
   3665                                 binop(Iop_Sub32Fx2,
   3666                                       bind(0),
   3667                                       bind(1))));
   3668             if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
   3669                HReg res = newVRegD(env);
   3670                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
   3671                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
   3672                addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
   3673                                               res, argL, argR, 0, False));
   3674                return res;
   3675             } else {
   3676                HReg res = newVRegD(env);
   3677                HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3678                addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
   3679                                              res, arg, 0, False));
   3680                return res;
   3681             }
   3682          }
   3683          case Iop_Rsqrte32Fx2: {
   3684             HReg res = newVRegD(env);
   3685             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3686             addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
   3687                                           res, arg, 0, False));
   3688             return res;
   3689          }
   3690          case Iop_Rsqrte32x2: {
   3691             HReg res = newVRegD(env);
   3692             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3693             addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
   3694                                           res, arg, 0, False));
   3695             return res;
   3696          }
   3697          case Iop_Neg32Fx2: {
   3698             HReg res = newVRegD(env);
   3699             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   3700             addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
   3701                                           res, arg, 0, False));
   3702             return res;
   3703          }
   3704          default:
   3705             break;
   3706       }
   3707    } /* if (e->tag == Iex_Unop) */
   3708 
   3709    if (e->tag == Iex_Triop) {
   3710       IRTriop *triop = e->Iex.Triop.details;
   3711 
   3712       switch (triop->op) {
   3713          case Iop_Extract64: {
   3714             HReg res = newVRegD(env);
   3715             HReg argL = iselNeon64Expr(env, triop->arg1);
   3716             HReg argR = iselNeon64Expr(env, triop->arg2);
   3717             UInt imm4;
   3718             if (triop->arg3->tag != Iex_Const ||
   3719                 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
   3720                vpanic("ARM target supports Iop_Extract64 with constant "
   3721                       "third argument less than 16 only\n");
   3722             }
   3723             imm4 = triop->arg3->Iex.Const.con->Ico.U8;
   3724             if (imm4 >= 8) {
   3725                vpanic("ARM target supports Iop_Extract64 with constant "
   3726                       "third argument less than 16 only\n");
   3727             }
   3728             addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
   3729                                            res, argL, argR, imm4, False));
   3730             return res;
   3731          }
   3732          case Iop_SetElem8x8:
   3733          case Iop_SetElem16x4:
   3734          case Iop_SetElem32x2: {
   3735             HReg res = newVRegD(env);
   3736             HReg dreg = iselNeon64Expr(env, triop->arg1);
   3737             HReg arg = iselIntExpr_R(env, triop->arg3);
   3738             UInt index, size;
   3739             if (triop->arg2->tag != Iex_Const ||
   3740                 typeOfIRExpr(env->type_env, triop->arg2) != Ity_I8) {
   3741                vpanic("ARM target supports SetElem with constant "
   3742                       "second argument only\n");
   3743             }
   3744             index = triop->arg2->Iex.Const.con->Ico.U8;
   3745             switch (triop->op) {
   3746                case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
   3747                case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
   3748                case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
   3749                default: vassert(0);
   3750             }
   3751             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
   3752             addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
   3753                                            mkARMNRS(ARMNRS_Scalar, res, index),
   3754                                            mkARMNRS(ARMNRS_Reg, arg, 0),
   3755                                            size, False));
   3756             return res;
   3757          }
   3758          default:
   3759             break;
   3760       }
   3761    }
   3762 
   3763    /* --------- MULTIPLEX --------- */
   3764    if (e->tag == Iex_ITE) { // VFD
   3765       HReg rLo, rHi;
   3766       HReg res = newVRegD(env);
   3767       iselInt64Expr(&rHi, &rLo, env, e);
   3768       addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
   3769       return res;
   3770    }
   3771 
   3772    ppIRExpr(e);
   3773    vpanic("iselNeon64Expr");
   3774 }
   3775 
   3776 static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e )
   3777 {
   3778    HReg r = iselNeonExpr_wrk( env, e );
   3779    vassert(hregClass(r) == HRcVec128);
   3780    vassert(hregIsVirtual(r));
   3781    return r;
   3782 }
   3783 
   3784 /* DO NOT CALL THIS DIRECTLY */
   3785 static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e )
   3786 {
   3787    IRType ty = typeOfIRExpr(env->type_env, e);
   3788    MatchInfo mi;
   3789    vassert(e);
   3790    vassert(ty == Ity_V128);
   3791 
   3792    if (e->tag == Iex_RdTmp) {
   3793       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
   3794    }
   3795 
   3796    if (e->tag == Iex_Const) {
   3797       /* At the moment there should be no 128-bit constants in IR for ARM
   3798          generated during disassemble. They are represented as Iop_64HLtoV128
   3799          binary operation and are handled among binary ops. */
   3800       /* But zero can be created by valgrind internal optimizer */
   3801       if (e->Iex.Const.con->Ico.V128 == 0x0000) {
   3802          HReg res = newVRegV(env);
   3803          addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(6, 0)));
   3804          return res;
   3805       }
   3806       if (e->Iex.Const.con->Ico.V128 == 0xFFFF) {
   3807          HReg res = newVRegV(env);
   3808          addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(6, 255)));
   3809          return res;
   3810       }
   3811       ppIRExpr(e);
   3812       vpanic("128-bit constant is not implemented");
   3813    }
   3814 
   3815    if (e->tag == Iex_Load) {
   3816       HReg res = newVRegV(env);
   3817       ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
   3818       vassert(ty == Ity_V128);
   3819       addInstr(env, ARMInstr_NLdStQ(True, res, am));
   3820       return res;
   3821    }
   3822 
   3823    if (e->tag == Iex_Get) {
   3824       HReg addr = newVRegI(env);
   3825       HReg res = newVRegV(env);
   3826       vassert(ty == Ity_V128);
   3827       addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
   3828       addInstr(env, ARMInstr_NLdStQ(True, res, mkARMAModeN_R(addr)));
   3829       return res;
   3830    }
   3831 
   3832    if (e->tag == Iex_Unop) {
   3833       switch (e->Iex.Unop.op) {
   3834          case Iop_NotV128: {
   3835             DECLARE_PATTERN(p_veqz_8x16);
   3836             DECLARE_PATTERN(p_veqz_16x8);
   3837             DECLARE_PATTERN(p_veqz_32x4);
   3838             DECLARE_PATTERN(p_vcge_8sx16);
   3839             DECLARE_PATTERN(p_vcge_16sx8);
   3840             DECLARE_PATTERN(p_vcge_32sx4);
   3841             DECLARE_PATTERN(p_vcge_8ux16);
   3842             DECLARE_PATTERN(p_vcge_16ux8);
   3843             DECLARE_PATTERN(p_vcge_32ux4);
   3844             DEFINE_PATTERN(p_veqz_8x16,
   3845                   unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
   3846             DEFINE_PATTERN(p_veqz_16x8,
   3847                   unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
   3848             DEFINE_PATTERN(p_veqz_32x4,
   3849                   unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
   3850             DEFINE_PATTERN(p_vcge_8sx16,
   3851                   unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
   3852             DEFINE_PATTERN(p_vcge_16sx8,
   3853                   unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
   3854             DEFINE_PATTERN(p_vcge_32sx4,
   3855                   unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
   3856             DEFINE_PATTERN(p_vcge_8ux16,
   3857                   unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
   3858             DEFINE_PATTERN(p_vcge_16ux8,
   3859                   unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
   3860             DEFINE_PATTERN(p_vcge_32ux4,
   3861                   unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
   3862             if (matchIRExpr(&mi, p_veqz_8x16, e)) {
   3863                HReg res = newVRegV(env);
   3864                HReg arg = iselNeonExpr(env, mi.bindee[0]);
   3865                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
   3866                return res;
   3867             } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
   3868                HReg res = newVRegV(env);
   3869                HReg arg = iselNeonExpr(env, mi.bindee[0]);
   3870                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
   3871                return res;
   3872             } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
   3873                HReg res = newVRegV(env);
   3874                HReg arg = iselNeonExpr(env, mi.bindee[0]);
   3875                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
   3876                return res;
   3877             } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
   3878                HReg res = newVRegV(env);
   3879                HReg argL = iselNeonExpr(env, mi.bindee[0]);
   3880                HReg argR = iselNeonExpr(env, mi.bindee[1]);
   3881                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
   3882                                               res, argL, argR, 0, True));
   3883                return res;
   3884             } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
   3885                HReg res = newVRegV(env);
   3886                HReg argL = iselNeonExpr(env, mi.bindee[0]);
   3887                HReg argR = iselNeonExpr(env, mi.bindee[1]);
   3888                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
   3889                                               res, argL, argR, 1, True));
   3890                return res;
   3891             } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
   3892                HReg res = newVRegV(env);
   3893                HReg argL = iselNeonExpr(env, mi.bindee[0]);
   3894                HReg argR = iselNeonExpr(env, mi.bindee[1]);
   3895                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
   3896                                               res, argL, argR, 2, True));
   3897                return res;
   3898             } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
   3899                HReg res = newVRegV(env);
   3900                HReg argL = iselNeonExpr(env, mi.bindee[0]);
   3901                HReg argR = iselNeonExpr(env, mi.bindee[1]);
   3902                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
   3903                                               res, argL, argR, 0, True));
   3904                return res;
   3905             } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
   3906                HReg res = newVRegV(env);
   3907                HReg argL = iselNeonExpr(env, mi.bindee[0]);
   3908                HReg argR = iselNeonExpr(env, mi.bindee[1]);
   3909                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
   3910                                               res, argL, argR, 1, True));
   3911                return res;
   3912             } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
   3913                HReg res = newVRegV(env);
   3914                HReg argL = iselNeonExpr(env, mi.bindee[0]);
   3915                HReg argR = iselNeonExpr(env, mi.bindee[1]);
   3916                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
   3917                                               res, argL, argR, 2, True));
   3918                return res;
   3919             } else {
   3920                HReg res = newVRegV(env);
   3921                HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   3922                addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
   3923                return res;
   3924             }
   3925          }
   3926          case Iop_Dup8x16:
   3927          case Iop_Dup16x8:
   3928          case Iop_Dup32x4: {
   3929             HReg res, arg;
   3930             UInt size;
   3931             DECLARE_PATTERN(p_vdup_8x16);
   3932             DECLARE_PATTERN(p_vdup_16x8);
   3933             DECLARE_PATTERN(p_vdup_32x4);
   3934             DEFINE_PATTERN(p_vdup_8x16,
   3935                   unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
   3936             DEFINE_PATTERN(p_vdup_16x8,
   3937                   unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
   3938             DEFINE_PATTERN(p_vdup_32x4,
   3939                   unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
   3940             if (matchIRExpr(&mi, p_vdup_8x16, e)) {
   3941                UInt index;
   3942                UInt imm4;
   3943                if (mi.bindee[1]->tag == Iex_Const &&
   3944                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
   3945                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
   3946                   imm4 = (index << 1) + 1;
   3947                   if (index < 8) {
   3948                      res = newVRegV(env);
   3949                      arg = iselNeon64Expr(env, mi.bindee[0]);
   3950                      addInstr(env, ARMInstr_NUnaryS(
   3951                                       ARMneon_VDUP,
   3952                                       mkARMNRS(ARMNRS_Reg, res, 0),
   3953                                       mkARMNRS(ARMNRS_Scalar, arg, index),
   3954                                       imm4, True
   3955                              ));
   3956                      return res;
   3957                   }
   3958                }
   3959             } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
   3960                UInt index;
   3961                UInt imm4;
   3962                if (mi.bindee[1]->tag == Iex_Const &&
   3963                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
   3964                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
   3965                   imm4 = (index << 2) + 2;
   3966                   if (index < 4) {
   3967                      res = newVRegV(env);
   3968                      arg = iselNeon64Expr(env, mi.bindee[0]);
   3969                      addInstr(env, ARMInstr_NUnaryS(
   3970                                       ARMneon_VDUP,
   3971                                       mkARMNRS(ARMNRS_Reg, res, 0),
   3972                                       mkARMNRS(ARMNRS_Scalar, arg, index),
   3973                                       imm4, True
   3974                              ));
   3975                      return res;
   3976                   }
   3977                }
   3978             } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
   3979                UInt index;
   3980                UInt imm4;
   3981                if (mi.bindee[1]->tag == Iex_Const &&
   3982                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
   3983                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
   3984                   imm4 = (index << 3) + 4;
   3985                   if (index < 2) {
   3986                      res = newVRegV(env);
   3987                      arg = iselNeon64Expr(env, mi.bindee[0]);
   3988                      addInstr(env, ARMInstr_NUnaryS(
   3989                                       ARMneon_VDUP,
   3990                                       mkARMNRS(ARMNRS_Reg, res, 0),
   3991                                       mkARMNRS(ARMNRS_Scalar, arg, index),
   3992                                       imm4, True
   3993                              ));
   3994                      return res;
   3995                   }
   3996                }
   3997             }
   3998             arg = iselIntExpr_R(env, e->Iex.Unop.arg);
   3999             res = newVRegV(env);
   4000             switch (e->Iex.Unop.op) {
   4001                case Iop_Dup8x16: size = 0; break;
   4002                case Iop_Dup16x8: size = 1; break;
   4003                case Iop_Dup32x4: size = 2; break;
   4004                default: vassert(0);
   4005             }
   4006             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
   4007             return res;
   4008          }
   4009          case Iop_Abs8x16:
   4010          case Iop_Abs16x8:
   4011          case Iop_Abs32x4: {
   4012             HReg res = newVRegV(env);
   4013             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4014             UInt size = 0;
   4015             switch(e->Iex.Binop.op) {
   4016                case Iop_Abs8x16: size = 0; break;
   4017                case Iop_Abs16x8: size = 1; break;
   4018                case Iop_Abs32x4: size = 2; break;
   4019                default: vassert(0);
   4020             }
   4021             addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
   4022             return res;
   4023          }
   4024          case Iop_Reverse64_8x16:
   4025          case Iop_Reverse64_16x8:
   4026          case Iop_Reverse64_32x4: {
   4027             HReg res = newVRegV(env);
   4028             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4029             UInt size = 0;
   4030             switch(e->Iex.Binop.op) {
   4031                case Iop_Reverse64_8x16: size = 0; break;
   4032                case Iop_Reverse64_16x8: size = 1; break;
   4033                case Iop_Reverse64_32x4: size = 2; break;
   4034                default: vassert(0);
   4035             }
   4036             addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
   4037                                           res, arg, size, True));
   4038             return res;
   4039          }
   4040          case Iop_Reverse32_8x16:
   4041          case Iop_Reverse32_16x8: {
   4042             HReg res = newVRegV(env);
   4043             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4044             UInt size = 0;
   4045             switch(e->Iex.Binop.op) {
   4046                case Iop_Reverse32_8x16: size = 0; break;
   4047                case Iop_Reverse32_16x8: size = 1; break;
   4048                default: vassert(0);
   4049             }
   4050             addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
   4051                                           res, arg, size, True));
   4052             return res;
   4053          }
   4054          case Iop_Reverse16_8x16: {
   4055             HReg res = newVRegV(env);
   4056             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4057             UInt size = 0;
   4058             addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
   4059                                           res, arg, size, True));
   4060             return res;
   4061          }
   4062          case Iop_CmpNEZ64x2: {
   4063             HReg x_lsh = newVRegV(env);
   4064             HReg x_rsh = newVRegV(env);
   4065             HReg lsh_amt = newVRegV(env);
   4066             HReg rsh_amt = newVRegV(env);
   4067             HReg zero = newVRegV(env);
   4068             HReg tmp = newVRegV(env);
   4069             HReg tmp2 = newVRegV(env);
   4070             HReg res = newVRegV(env);
   4071             HReg x = newVRegV(env);
   4072             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4073             addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
   4074             addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
   4075             addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
   4076             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
   4077             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
   4078                                            rsh_amt, zero, lsh_amt, 2, True));
   4079             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   4080                                           x_lsh, x, lsh_amt, 3, True));
   4081             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   4082                                           x_rsh, x, rsh_amt, 3, True));
   4083             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
   4084                                            tmp, x_lsh, x_rsh, 0, True));
   4085             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
   4086                                            res, tmp, x, 0, True));
   4087             return res;
   4088          }
   4089          case Iop_CmpNEZ8x16:
   4090          case Iop_CmpNEZ16x8:
   4091          case Iop_CmpNEZ32x4: {
   4092             HReg res = newVRegV(env);
   4093             HReg tmp = newVRegV(env);
   4094             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4095             UInt size;
   4096             switch (e->Iex.Unop.op) {
   4097                case Iop_CmpNEZ8x16: size = 0; break;
   4098                case Iop_CmpNEZ16x8: size = 1; break;
   4099                case Iop_CmpNEZ32x4: size = 2; break;
   4100                default: vassert(0);
   4101             }
   4102             addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True));
   4103             addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True));
   4104             return res;
   4105          }
   4106          case Iop_Widen8Uto16x8:
   4107          case Iop_Widen16Uto32x4:
   4108          case Iop_Widen32Uto64x2: {
   4109             HReg res = newVRegV(env);
   4110             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   4111             UInt size;
   4112             switch (e->Iex.Unop.op) {
   4113                case Iop_Widen8Uto16x8:  size = 0; break;
   4114                case Iop_Widen16Uto32x4: size = 1; break;
   4115                case Iop_Widen32Uto64x2: size = 2; break;
   4116                default: vassert(0);
   4117             }
   4118             addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
   4119                                           res, arg, size, True));
   4120             return res;
   4121          }
   4122          case Iop_Widen8Sto16x8:
   4123          case Iop_Widen16Sto32x4:
   4124          case Iop_Widen32Sto64x2: {
   4125             HReg res = newVRegV(env);
   4126             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   4127             UInt size;
   4128             switch (e->Iex.Unop.op) {
   4129                case Iop_Widen8Sto16x8:  size = 0; break;
   4130                case Iop_Widen16Sto32x4: size = 1; break;
   4131                case Iop_Widen32Sto64x2: size = 2; break;
   4132                default: vassert(0);
   4133             }
   4134             addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
   4135                                           res, arg, size, True));
   4136             return res;
   4137          }
   4138          case Iop_PwAddL8Sx16:
   4139          case Iop_PwAddL16Sx8:
   4140          case Iop_PwAddL32Sx4: {
   4141             HReg res = newVRegV(env);
   4142             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4143             UInt size = 0;
   4144             switch(e->Iex.Binop.op) {
   4145                case Iop_PwAddL8Sx16: size = 0; break;
   4146                case Iop_PwAddL16Sx8: size = 1; break;
   4147                case Iop_PwAddL32Sx4: size = 2; break;
   4148                default: vassert(0);
   4149             }
   4150             addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
   4151                                           res, arg, size, True));
   4152             return res;
   4153          }
   4154          case Iop_PwAddL8Ux16:
   4155          case Iop_PwAddL16Ux8:
   4156          case Iop_PwAddL32Ux4: {
   4157             HReg res = newVRegV(env);
   4158             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4159             UInt size = 0;
   4160             switch(e->Iex.Binop.op) {
   4161                case Iop_PwAddL8Ux16: size = 0; break;
   4162                case Iop_PwAddL16Ux8: size = 1; break;
   4163                case Iop_PwAddL32Ux4: size = 2; break;
   4164                default: vassert(0);
   4165             }
   4166             addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
   4167                                           res, arg, size, True));
   4168             return res;
   4169          }
   4170          case Iop_Cnt8x16: {
   4171             HReg res = newVRegV(env);
   4172             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4173             UInt size = 0;
   4174             addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
   4175             return res;
   4176          }
   4177          case Iop_Clz8Sx16:
   4178          case Iop_Clz16Sx8:
   4179          case Iop_Clz32Sx4: {
   4180             HReg res = newVRegV(env);
   4181             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4182             UInt size = 0;
   4183             switch(e->Iex.Binop.op) {
   4184                case Iop_Clz8Sx16: size = 0; break;
   4185                case Iop_Clz16Sx8: size = 1; break;
   4186                case Iop_Clz32Sx4: size = 2; break;
   4187                default: vassert(0);
   4188             }
   4189             addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
   4190             return res;
   4191          }
   4192          case Iop_Cls8Sx16:
   4193          case Iop_Cls16Sx8:
   4194          case Iop_Cls32Sx4: {
   4195             HReg res = newVRegV(env);
   4196             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4197             UInt size = 0;
   4198             switch(e->Iex.Binop.op) {
   4199                case Iop_Cls8Sx16: size = 0; break;
   4200                case Iop_Cls16Sx8: size = 1; break;
   4201                case Iop_Cls32Sx4: size = 2; break;
   4202                default: vassert(0);
   4203             }
   4204             addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
   4205             return res;
   4206          }
   4207          case Iop_FtoI32Sx4_RZ: {
   4208             HReg res = newVRegV(env);
   4209             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4210             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
   4211                                           res, arg, 2, True));
   4212             return res;
   4213          }
   4214          case Iop_FtoI32Ux4_RZ: {
   4215             HReg res = newVRegV(env);
   4216             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4217             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
   4218                                           res, arg, 2, True));
   4219             return res;
   4220          }
   4221          case Iop_I32StoFx4: {
   4222             HReg res = newVRegV(env);
   4223             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4224             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
   4225                                           res, arg, 2, True));
   4226             return res;
   4227          }
   4228          case Iop_I32UtoFx4: {
   4229             HReg res = newVRegV(env);
   4230             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4231             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
   4232                                           res, arg, 2, True));
   4233             return res;
   4234          }
   4235          case Iop_F16toF32x4: {
   4236             HReg res = newVRegV(env);
   4237             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
   4238             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
   4239                                           res, arg, 2, True));
   4240             return res;
   4241          }
   4242          case Iop_Recip32Fx4: {
   4243             HReg res = newVRegV(env);
   4244             HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
   4245             addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
   4246                                           res, argL, 0, True));
   4247             return res;
   4248          }
   4249          case Iop_Recip32x4: {
   4250             HReg res = newVRegV(env);
   4251             HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
   4252             addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
   4253                                           res, argL, 0, True));
   4254             return res;
   4255          }
   4256          case Iop_Abs32Fx4: {
   4257             HReg res = newVRegV(env);
   4258             HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
   4259             addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
   4260                                           res, argL, 0, True));
   4261             return res;
   4262          }
   4263          case Iop_Rsqrte32Fx4: {
   4264             HReg res = newVRegV(env);
   4265             HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
   4266             addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
   4267                                           res, argL, 0, True));
   4268             return res;
   4269          }
   4270          case Iop_Rsqrte32x4: {
   4271             HReg res = newVRegV(env);
   4272             HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
   4273             addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
   4274                                           res, argL, 0, True));
   4275             return res;
   4276          }
   4277          case Iop_Neg32Fx4: {
   4278             HReg res = newVRegV(env);
   4279             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
   4280             addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
   4281                                           res, arg, 0, True));
   4282             return res;
   4283          }
   4284          /* ... */
   4285          default:
   4286             break;
   4287       }
   4288    }
   4289 
   4290    if (e->tag == Iex_Binop) {
   4291       switch (e->Iex.Binop.op) {
   4292          case Iop_64HLtoV128:
   4293             /* Try to match into single "VMOV reg, imm" instruction */
   4294             if (e->Iex.Binop.arg1->tag == Iex_Const &&
   4295                 e->Iex.Binop.arg2->tag == Iex_Const &&
   4296                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg1) == Ity_I64 &&
   4297                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) == Ity_I64 &&
   4298                 e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 ==
   4299                            e->Iex.Binop.arg2->Iex.Const.con->Ico.U64) {
   4300                ULong imm64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
   4301                ARMNImm *imm = Imm64_to_ARMNImm(imm64);
   4302                if (imm) {
   4303                   HReg res = newVRegV(env);
   4304                   addInstr(env, ARMInstr_NeonImm(res, imm));
   4305                   return res;
   4306                }
   4307                if ((imm64 >> 32) == 0LL &&
   4308                    (imm = Imm64_to_ARMNImm(imm64 | (imm64 << 32))) != NULL) {
   4309                   HReg tmp1 = newVRegV(env);
   4310                   HReg tmp2 = newVRegV(env);
   4311                   HReg res = newVRegV(env);
   4312                   if (imm->type < 10) {
   4313                      addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0x0f)));
   4314                      addInstr(env, ARMInstr_NeonImm(tmp2, imm));
   4315                      addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
   4316                                                     res, tmp1, tmp2, 4, True));
   4317                      return res;
   4318                   }
   4319                }
   4320                if ((imm64 & 0xFFFFFFFFLL) == 0LL &&
   4321                    (imm = Imm64_to_ARMNImm(imm64 | (imm64 >> 32))) != NULL) {
   4322                   HReg tmp1 = newVRegV(env);
   4323                   HReg tmp2 = newVRegV(env);
   4324                   HReg res = newVRegV(env);
   4325                   if (imm->type < 10) {
   4326                      addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0xf0)));
   4327                      addInstr(env, ARMInstr_NeonImm(tmp2, imm));
   4328                      addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
   4329                                                     res, tmp1, tmp2, 4, True));
   4330                      return res;
   4331                   }
   4332                }
   4333             }
   4334             /* Does not match "VMOV Reg, Imm" form.  We'll have to do
   4335                it the slow way. */
   4336             {
   4337                /* local scope */
   4338                /* Done via the stack for ease of use. */
   4339                /* FIXME: assumes little endian host */
   4340                HReg       w3, w2, w1, w0;
   4341                HReg       res  = newVRegV(env);
   4342                ARMAMode1* sp_0  = ARMAMode1_RI(hregARM_R13(), 0);
   4343                ARMAMode1* sp_4  = ARMAMode1_RI(hregARM_R13(), 4);
   4344                ARMAMode1* sp_8  = ARMAMode1_RI(hregARM_R13(), 8);
   4345                ARMAMode1* sp_12 = ARMAMode1_RI(hregARM_R13(), 12);
   4346                ARMRI84*   c_16  = ARMRI84_I84(16,0);
   4347                /* Make space for SP */
   4348                addInstr(env, ARMInstr_Alu(ARMalu_SUB, hregARM_R13(),
   4349                                                       hregARM_R13(), c_16));
   4350 
   4351                /* Store the less significant 64 bits */
   4352                iselInt64Expr(&w1, &w0, env, e->Iex.Binop.arg2);
   4353                addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
   4354                                              w0, sp_0));
   4355                addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
   4356                                              w1, sp_4));
   4357 
   4358                /* Store the more significant 64 bits */
   4359                iselInt64Expr(&w3, &w2, env, e->Iex.Binop.arg1);
   4360                addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
   4361                                              w2, sp_8));
   4362                addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
   4363                                              w3, sp_12));
   4364 
   4365                 /* Load result back from stack. */
   4366                 addInstr(env, ARMInstr_NLdStQ(True/*load*/, res,
   4367                                               mkARMAModeN_R(hregARM_R13())));
   4368 
   4369                 /* Restore SP */
   4370                 addInstr(env, ARMInstr_Alu(ARMalu_ADD, hregARM_R13(),
   4371                                            hregARM_R13(), c_16));
   4372                 return res;
   4373             } /* local scope */
   4374             goto neon_expr_bad;
   4375          case Iop_AndV128: {
   4376             HReg res = newVRegV(env);
   4377             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4378             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4379             addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
   4380                                            res, argL, argR, 4, True));
   4381             return res;
   4382          }
   4383          case Iop_OrV128: {
   4384             HReg res = newVRegV(env);
   4385             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4386             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4387             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
   4388                                            res, argL, argR, 4, True));
   4389             return res;
   4390          }
   4391          case Iop_XorV128: {
   4392             HReg res = newVRegV(env);
   4393             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4394             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4395             addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
   4396                                            res, argL, argR, 4, True));
   4397             return res;
   4398          }
   4399          case Iop_Add8x16:
   4400          case Iop_Add16x8:
   4401          case Iop_Add32x4:
   4402          case Iop_Add64x2: {
   4403             /*
   4404             FIXME: remove this if not used
   4405             DECLARE_PATTERN(p_vrhadd_32sx4);
   4406             ULong one = (1LL << 32) | 1LL;
   4407             DEFINE_PATTERN(p_vrhadd_32sx4,
   4408                   binop(Iop_Add32x4,
   4409                         binop(Iop_Add32x4,
   4410                               binop(Iop_SarN32x4,
   4411                                     bind(0),
   4412                                     mkU8(1)),
   4413                               binop(Iop_SarN32x4,
   4414                                     bind(1),
   4415                                     mkU8(1))),
   4416                         binop(Iop_SarN32x4,
   4417                               binop(Iop_Add32x4,
   4418                                     binop(Iop_Add32x4,
   4419                                           binop(Iop_AndV128,
   4420                                                 bind(0),
   4421                                                 mkU128(one)),
   4422                                           binop(Iop_AndV128,
   4423                                                 bind(1),
   4424                                                 mkU128(one))),
   4425                                     mkU128(one)),
   4426                               mkU8(1))));
   4427             */
   4428             HReg res = newVRegV(env);
   4429             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4430             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4431             UInt size;
   4432             switch (e->Iex.Binop.op) {
   4433                case Iop_Add8x16: size = 0; break;
   4434                case Iop_Add16x8: size = 1; break;
   4435                case Iop_Add32x4: size = 2; break;
   4436                case Iop_Add64x2: size = 3; break;
   4437                default:
   4438                   ppIROp(e->Iex.Binop.op);
   4439                   vpanic("Illegal element size in VADD");
   4440             }
   4441             addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
   4442                                            res, argL, argR, size, True));
   4443             return res;
   4444          }
   4445          case Iop_Recps32Fx4: {
   4446             HReg res = newVRegV(env);
   4447             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4448             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4449             UInt size = 0;
   4450             addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
   4451                                            res, argL, argR, size, True));
   4452             return res;
   4453          }
   4454          case Iop_Rsqrts32Fx4: {
   4455             HReg res = newVRegV(env);
   4456             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4457             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4458             UInt size = 0;
   4459             addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
   4460                                            res, argL, argR, size, True));
   4461             return res;
   4462          }
   4463 
   4464          // These 6 verified 18 Apr 2013
   4465          case Iop_InterleaveEvenLanes8x16:
   4466          case Iop_InterleaveOddLanes8x16:
   4467          case Iop_InterleaveEvenLanes16x8:
   4468          case Iop_InterleaveOddLanes16x8:
   4469          case Iop_InterleaveEvenLanes32x4:
   4470          case Iop_InterleaveOddLanes32x4: {
   4471             HReg rD   = newVRegV(env);
   4472             HReg rM   = newVRegV(env);
   4473             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4474             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4475             UInt size;
   4476             Bool resRd;  // is the result in rD or rM ?
   4477             switch (e->Iex.Binop.op) {
   4478                case Iop_InterleaveOddLanes8x16:  resRd = False; size = 0; break;
   4479                case Iop_InterleaveEvenLanes8x16: resRd = True;  size = 0; break;
   4480                case Iop_InterleaveOddLanes16x8:  resRd = False; size = 1; break;
   4481                case Iop_InterleaveEvenLanes16x8: resRd = True;  size = 1; break;
   4482                case Iop_InterleaveOddLanes32x4:  resRd = False; size = 2; break;
   4483                case Iop_InterleaveEvenLanes32x4: resRd = True;  size = 2; break;
   4484                default: vassert(0);
   4485             }
   4486             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
   4487             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
   4488             addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, True));
   4489             return resRd ? rD : rM;
   4490          }
   4491 
   4492          // These 6 verified 18 Apr 2013
   4493          case Iop_InterleaveHI8x16:
   4494          case Iop_InterleaveLO8x16:
   4495          case Iop_InterleaveHI16x8:
   4496          case Iop_InterleaveLO16x8:
   4497          case Iop_InterleaveHI32x4:
   4498          case Iop_InterleaveLO32x4: {
   4499             HReg rD   = newVRegV(env);
   4500             HReg rM   = newVRegV(env);
   4501             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4502             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4503             UInt size;
   4504             Bool resRd;  // is the result in rD or rM ?
   4505             switch (e->Iex.Binop.op) {
   4506                case Iop_InterleaveHI8x16: resRd = False; size = 0; break;
   4507                case Iop_InterleaveLO8x16: resRd = True;  size = 0; break;
   4508                case Iop_InterleaveHI16x8: resRd = False; size = 1; break;
   4509                case Iop_InterleaveLO16x8: resRd = True;  size = 1; break;
   4510                case Iop_InterleaveHI32x4: resRd = False; size = 2; break;
   4511                case Iop_InterleaveLO32x4: resRd = True;  size = 2; break;
   4512                default: vassert(0);
   4513             }
   4514             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
   4515             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
   4516             addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, True));
   4517             return resRd ? rD : rM;
   4518          }
   4519 
   4520          // These 6 verified 18 Apr 2013
   4521          case Iop_CatOddLanes8x16:
   4522          case Iop_CatEvenLanes8x16:
   4523          case Iop_CatOddLanes16x8:
   4524          case Iop_CatEvenLanes16x8:
   4525          case Iop_CatOddLanes32x4:
   4526          case Iop_CatEvenLanes32x4: {
   4527             HReg rD   = newVRegV(env);
   4528             HReg rM   = newVRegV(env);
   4529             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4530             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4531             UInt size;
   4532             Bool resRd;  // is the result in rD or rM ?
   4533             switch (e->Iex.Binop.op) {
   4534                case Iop_CatOddLanes8x16:  resRd = False; size = 0; break;
   4535                case Iop_CatEvenLanes8x16: resRd = True;  size = 0; break;
   4536                case Iop_CatOddLanes16x8:  resRd = False; size = 1; break;
   4537                case Iop_CatEvenLanes16x8: resRd = True;  size = 1; break;
   4538                case Iop_CatOddLanes32x4:  resRd = False; size = 2; break;
   4539                case Iop_CatEvenLanes32x4: resRd = True;  size = 2; break;
   4540                default: vassert(0);
   4541             }
   4542             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
   4543             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
   4544             addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, True));
   4545             return resRd ? rD : rM;
   4546          }
   4547 
   4548          case Iop_QAdd8Ux16:
   4549          case Iop_QAdd16Ux8:
   4550          case Iop_QAdd32Ux4:
   4551          case Iop_QAdd64Ux2: {
   4552             HReg res = newVRegV(env);
   4553             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4554             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4555             UInt size;
   4556             switch (e->Iex.Binop.op) {
   4557                case Iop_QAdd8Ux16: size = 0; break;
   4558                case Iop_QAdd16Ux8: size = 1; break;
   4559                case Iop_QAdd32Ux4: size = 2; break;
   4560                case Iop_QAdd64Ux2: size = 3; break;
   4561                default:
   4562                   ppIROp(e->Iex.Binop.op);
   4563                   vpanic("Illegal element size in VQADDU");
   4564             }
   4565             addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
   4566                                            res, argL, argR, size, True));
   4567             return res;
   4568          }
   4569          case Iop_QAdd8Sx16:
   4570          case Iop_QAdd16Sx8:
   4571          case Iop_QAdd32Sx4:
   4572          case Iop_QAdd64Sx2: {
   4573             HReg res = newVRegV(env);
   4574             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4575             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4576             UInt size;
   4577             switch (e->Iex.Binop.op) {
   4578                case Iop_QAdd8Sx16: size = 0; break;
   4579                case Iop_QAdd16Sx8: size = 1; break;
   4580                case Iop_QAdd32Sx4: size = 2; break;
   4581                case Iop_QAdd64Sx2: size = 3; break;
   4582                default:
   4583                   ppIROp(e->Iex.Binop.op);
   4584                   vpanic("Illegal element size in VQADDS");
   4585             }
   4586             addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
   4587                                            res, argL, argR, size, True));
   4588             return res;
   4589          }
   4590          case Iop_Sub8x16:
   4591          case Iop_Sub16x8:
   4592          case Iop_Sub32x4:
   4593          case Iop_Sub64x2: {
   4594             HReg res = newVRegV(env);
   4595             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4596             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4597             UInt size;
   4598             switch (e->Iex.Binop.op) {
   4599                case Iop_Sub8x16: size = 0; break;
   4600                case Iop_Sub16x8: size = 1; break;
   4601                case Iop_Sub32x4: size = 2; break;
   4602                case Iop_Sub64x2: size = 3; break;
   4603                default:
   4604                   ppIROp(e->Iex.Binop.op);
   4605                   vpanic("Illegal element size in VSUB");
   4606             }
   4607             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
   4608                                            res, argL, argR, size, True));
   4609             return res;
   4610          }
   4611          case Iop_QSub8Ux16:
   4612          case Iop_QSub16Ux8:
   4613          case Iop_QSub32Ux4:
   4614          case Iop_QSub64Ux2: {
   4615             HReg res = newVRegV(env);
   4616             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4617             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4618             UInt size;
   4619             switch (e->Iex.Binop.op) {
   4620                case Iop_QSub8Ux16: size = 0; break;
   4621                case Iop_QSub16Ux8: size = 1; break;
   4622                case Iop_QSub32Ux4: size = 2; break;
   4623                case Iop_QSub64Ux2: size = 3; break;
   4624                default:
   4625                   ppIROp(e->Iex.Binop.op);
   4626                   vpanic("Illegal element size in VQSUBU");
   4627             }
   4628             addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
   4629                                            res, argL, argR, size, True));
   4630             return res;
   4631          }
   4632          case Iop_QSub8Sx16:
   4633          case Iop_QSub16Sx8:
   4634          case Iop_QSub32Sx4:
   4635          case Iop_QSub64Sx2: {
   4636             HReg res = newVRegV(env);
   4637             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4638             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4639             UInt size;
   4640             switch (e->Iex.Binop.op) {
   4641                case Iop_QSub8Sx16: size = 0; break;
   4642                case Iop_QSub16Sx8: size = 1; break;
   4643                case Iop_QSub32Sx4: size = 2; break;
   4644                case Iop_QSub64Sx2: size = 3; break;
   4645                default:
   4646                   ppIROp(e->Iex.Binop.op);
   4647                   vpanic("Illegal element size in VQSUBS");
   4648             }
   4649             addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
   4650                                            res, argL, argR, size, True));
   4651             return res;
   4652          }
   4653          case Iop_Max8Ux16:
   4654          case Iop_Max16Ux8:
   4655          case Iop_Max32Ux4: {
   4656             HReg res = newVRegV(env);
   4657             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4658             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4659             UInt size;
   4660             switch (e->Iex.Binop.op) {
   4661                case Iop_Max8Ux16: size = 0; break;
   4662                case Iop_Max16Ux8: size = 1; break;
   4663                case Iop_Max32Ux4: size = 2; break;
   4664                default: vpanic("Illegal element size in VMAXU");
   4665             }
   4666             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
   4667                                            res, argL, argR, size, True));
   4668             return res;
   4669          }
   4670          case Iop_Max8Sx16:
   4671          case Iop_Max16Sx8:
   4672          case Iop_Max32Sx4: {
   4673             HReg res = newVRegV(env);
   4674             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4675             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4676             UInt size;
   4677             switch (e->Iex.Binop.op) {
   4678                case Iop_Max8Sx16: size = 0; break;
   4679                case Iop_Max16Sx8: size = 1; break;
   4680                case Iop_Max32Sx4: size = 2; break;
   4681                default: vpanic("Illegal element size in VMAXU");
   4682             }
   4683             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
   4684                                            res, argL, argR, size, True));
   4685             return res;
   4686          }
   4687          case Iop_Min8Ux16:
   4688          case Iop_Min16Ux8:
   4689          case Iop_Min32Ux4: {
   4690             HReg res = newVRegV(env);
   4691             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4692             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4693             UInt size;
   4694             switch (e->Iex.Binop.op) {
   4695                case Iop_Min8Ux16: size = 0; break;
   4696                case Iop_Min16Ux8: size = 1; break;
   4697                case Iop_Min32Ux4: size = 2; break;
   4698                default: vpanic("Illegal element size in VMAXU");
   4699             }
   4700             addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
   4701                                            res, argL, argR, size, True));
   4702             return res;
   4703          }
   4704          case Iop_Min8Sx16:
   4705          case Iop_Min16Sx8:
   4706          case Iop_Min32Sx4: {
   4707             HReg res = newVRegV(env);
   4708             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4709             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4710             UInt size;
   4711             switch (e->Iex.Binop.op) {
   4712                case Iop_Min8Sx16: size = 0; break;
   4713                case Iop_Min16Sx8: size = 1; break;
   4714                case Iop_Min32Sx4: size = 2; break;
   4715                default: vpanic("Illegal element size in VMAXU");
   4716             }
   4717             addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
   4718                                            res, argL, argR, size, True));
   4719             return res;
   4720          }
   4721          case Iop_Sar8x16:
   4722          case Iop_Sar16x8:
   4723          case Iop_Sar32x4:
   4724          case Iop_Sar64x2: {
   4725             HReg res = newVRegV(env);
   4726             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4727             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4728             HReg argR2 = newVRegV(env);
   4729             HReg zero = newVRegV(env);
   4730             UInt size;
   4731             switch (e->Iex.Binop.op) {
   4732                case Iop_Sar8x16: size = 0; break;
   4733                case Iop_Sar16x8: size = 1; break;
   4734                case Iop_Sar32x4: size = 2; break;
   4735                case Iop_Sar64x2: size = 3; break;
   4736                default: vassert(0);
   4737             }
   4738             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
   4739             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
   4740                                            argR2, zero, argR, size, True));
   4741             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
   4742                                           res, argL, argR2, size, True));
   4743             return res;
   4744          }
   4745          case Iop_Sal8x16:
   4746          case Iop_Sal16x8:
   4747          case Iop_Sal32x4:
   4748          case Iop_Sal64x2: {
   4749             HReg res = newVRegV(env);
   4750             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4751             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4752             UInt size;
   4753             switch (e->Iex.Binop.op) {
   4754                case Iop_Sal8x16: size = 0; break;
   4755                case Iop_Sal16x8: size = 1; break;
   4756                case Iop_Sal32x4: size = 2; break;
   4757                case Iop_Sal64x2: size = 3; break;
   4758                default: vassert(0);
   4759             }
   4760             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
   4761                                           res, argL, argR, size, True));
   4762             return res;
   4763          }
   4764          case Iop_Shr8x16:
   4765          case Iop_Shr16x8:
   4766          case Iop_Shr32x4:
   4767          case Iop_Shr64x2: {
   4768             HReg res = newVRegV(env);
   4769             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4770             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4771             HReg argR2 = newVRegV(env);
   4772             HReg zero = newVRegV(env);
   4773             UInt size;
   4774             switch (e->Iex.Binop.op) {
   4775                case Iop_Shr8x16: size = 0; break;
   4776                case Iop_Shr16x8: size = 1; break;
   4777                case Iop_Shr32x4: size = 2; break;
   4778                case Iop_Shr64x2: size = 3; break;
   4779                default: vassert(0);
   4780             }
   4781             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
   4782             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
   4783                                            argR2, zero, argR, size, True));
   4784             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   4785                                           res, argL, argR2, size, True));
   4786             return res;
   4787          }
   4788          case Iop_Shl8x16:
   4789          case Iop_Shl16x8:
   4790          case Iop_Shl32x4:
   4791          case Iop_Shl64x2: {
   4792             HReg res = newVRegV(env);
   4793             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4794             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4795             UInt size;
   4796             switch (e->Iex.Binop.op) {
   4797                case Iop_Shl8x16: size = 0; break;
   4798                case Iop_Shl16x8: size = 1; break;
   4799                case Iop_Shl32x4: size = 2; break;
   4800                case Iop_Shl64x2: size = 3; break;
   4801                default: vassert(0);
   4802             }
   4803             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   4804                                           res, argL, argR, size, True));
   4805             return res;
   4806          }
   4807          case Iop_QShl8x16:
   4808          case Iop_QShl16x8:
   4809          case Iop_QShl32x4:
   4810          case Iop_QShl64x2: {
   4811             HReg res = newVRegV(env);
   4812             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4813             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4814             UInt size;
   4815             switch (e->Iex.Binop.op) {
   4816                case Iop_QShl8x16: size = 0; break;
   4817                case Iop_QShl16x8: size = 1; break;
   4818                case Iop_QShl32x4: size = 2; break;
   4819                case Iop_QShl64x2: size = 3; break;
   4820                default: vassert(0);
   4821             }
   4822             addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
   4823                                           res, argL, argR, size, True));
   4824             return res;
   4825          }
   4826          case Iop_QSal8x16:
   4827          case Iop_QSal16x8:
   4828          case Iop_QSal32x4:
   4829          case Iop_QSal64x2: {
   4830             HReg res = newVRegV(env);
   4831             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4832             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4833             UInt size;
   4834             switch (e->Iex.Binop.op) {
   4835                case Iop_QSal8x16: size = 0; break;
   4836                case Iop_QSal16x8: size = 1; break;
   4837                case Iop_QSal32x4: size = 2; break;
   4838                case Iop_QSal64x2: size = 3; break;
   4839                default: vassert(0);
   4840             }
   4841             addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
   4842                                           res, argL, argR, size, True));
   4843             return res;
   4844          }
   4845          case Iop_QShlN8x16:
   4846          case Iop_QShlN16x8:
   4847          case Iop_QShlN32x4:
   4848          case Iop_QShlN64x2: {
   4849             HReg res = newVRegV(env);
   4850             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4851             UInt size, imm;
   4852             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   4853                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   4854                vpanic("ARM taget supports Iop_QShlNAxB with constant "
   4855                       "second argument only\n");
   4856             }
   4857             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   4858             switch (e->Iex.Binop.op) {
   4859                case Iop_QShlN8x16: size = 8 | imm; break;
   4860                case Iop_QShlN16x8: size = 16 | imm; break;
   4861                case Iop_QShlN32x4: size = 32 | imm; break;
   4862                case Iop_QShlN64x2: size = 64 | imm; break;
   4863                default: vassert(0);
   4864             }
   4865             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
   4866                                           res, argL, size, True));
   4867             return res;
   4868          }
   4869          case Iop_QShlN8Sx16:
   4870          case Iop_QShlN16Sx8:
   4871          case Iop_QShlN32Sx4:
   4872          case Iop_QShlN64Sx2: {
   4873             HReg res = newVRegV(env);
   4874             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4875             UInt size, imm;
   4876             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   4877                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   4878                vpanic("ARM taget supports Iop_QShlNASxB with constant "
   4879                       "second argument only\n");
   4880             }
   4881             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   4882             switch (e->Iex.Binop.op) {
   4883                case Iop_QShlN8Sx16: size = 8 | imm; break;
   4884                case Iop_QShlN16Sx8: size = 16 | imm; break;
   4885                case Iop_QShlN32Sx4: size = 32 | imm; break;
   4886                case Iop_QShlN64Sx2: size = 64 | imm; break;
   4887                default: vassert(0);
   4888             }
   4889             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
   4890                                           res, argL, size, True));
   4891             return res;
   4892          }
   4893          case Iop_QSalN8x16:
   4894          case Iop_QSalN16x8:
   4895          case Iop_QSalN32x4:
   4896          case Iop_QSalN64x2: {
   4897             HReg res = newVRegV(env);
   4898             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4899             UInt size, imm;
   4900             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   4901                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   4902                vpanic("ARM taget supports Iop_QShlNAxB with constant "
   4903                       "second argument only\n");
   4904             }
   4905             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   4906             switch (e->Iex.Binop.op) {
   4907                case Iop_QSalN8x16: size = 8 | imm; break;
   4908                case Iop_QSalN16x8: size = 16 | imm; break;
   4909                case Iop_QSalN32x4: size = 32 | imm; break;
   4910                case Iop_QSalN64x2: size = 64 | imm; break;
   4911                default: vassert(0);
   4912             }
   4913             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
   4914                                           res, argL, size, True));
   4915             return res;
   4916          }
   4917          case Iop_ShrN8x16:
   4918          case Iop_ShrN16x8:
   4919          case Iop_ShrN32x4:
   4920          case Iop_ShrN64x2: {
   4921             HReg res = newVRegV(env);
   4922             HReg tmp = newVRegV(env);
   4923             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4924             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   4925             HReg argR2 = newVRegI(env);
   4926             UInt size;
   4927             switch (e->Iex.Binop.op) {
   4928                case Iop_ShrN8x16: size = 0; break;
   4929                case Iop_ShrN16x8: size = 1; break;
   4930                case Iop_ShrN32x4: size = 2; break;
   4931                case Iop_ShrN64x2: size = 3; break;
   4932                default: vassert(0);
   4933             }
   4934             addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
   4935             addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
   4936                                           tmp, argR2, 0, True));
   4937             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   4938                                           res, argL, tmp, size, True));
   4939             return res;
   4940          }
   4941          case Iop_ShlN8x16:
   4942          case Iop_ShlN16x8:
   4943          case Iop_ShlN32x4:
   4944          case Iop_ShlN64x2: {
   4945             HReg res = newVRegV(env);
   4946             HReg tmp = newVRegV(env);
   4947             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4948             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   4949             UInt size;
   4950             switch (e->Iex.Binop.op) {
   4951                case Iop_ShlN8x16: size = 0; break;
   4952                case Iop_ShlN16x8: size = 1; break;
   4953                case Iop_ShlN32x4: size = 2; break;
   4954                case Iop_ShlN64x2: size = 3; break;
   4955                default: vassert(0);
   4956             }
   4957             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True));
   4958             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
   4959                                           res, argL, tmp, size, True));
   4960             return res;
   4961          }
   4962          case Iop_SarN8x16:
   4963          case Iop_SarN16x8:
   4964          case Iop_SarN32x4:
   4965          case Iop_SarN64x2: {
   4966             HReg res = newVRegV(env);
   4967             HReg tmp = newVRegV(env);
   4968             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4969             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
   4970             HReg argR2 = newVRegI(env);
   4971             UInt size;
   4972             switch (e->Iex.Binop.op) {
   4973                case Iop_SarN8x16: size = 0; break;
   4974                case Iop_SarN16x8: size = 1; break;
   4975                case Iop_SarN32x4: size = 2; break;
   4976                case Iop_SarN64x2: size = 3; break;
   4977                default: vassert(0);
   4978             }
   4979             addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
   4980             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True));
   4981             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
   4982                                           res, argL, tmp, size, True));
   4983             return res;
   4984          }
   4985          case Iop_CmpGT8Ux16:
   4986          case Iop_CmpGT16Ux8:
   4987          case Iop_CmpGT32Ux4: {
   4988             HReg res = newVRegV(env);
   4989             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   4990             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   4991             UInt size;
   4992             switch (e->Iex.Binop.op) {
   4993                case Iop_CmpGT8Ux16: size = 0; break;
   4994                case Iop_CmpGT16Ux8: size = 1; break;
   4995                case Iop_CmpGT32Ux4: size = 2; break;
   4996                default: vassert(0);
   4997             }
   4998             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
   4999                                            res, argL, argR, size, True));
   5000             return res;
   5001          }
   5002          case Iop_CmpGT8Sx16:
   5003          case Iop_CmpGT16Sx8:
   5004          case Iop_CmpGT32Sx4: {
   5005             HReg res = newVRegV(env);
   5006             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5007             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5008             UInt size;
   5009             switch (e->Iex.Binop.op) {
   5010                case Iop_CmpGT8Sx16: size = 0; break;
   5011                case Iop_CmpGT16Sx8: size = 1; break;
   5012                case Iop_CmpGT32Sx4: size = 2; break;
   5013                default: vassert(0);
   5014             }
   5015             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
   5016                                            res, argL, argR, size, True));
   5017             return res;
   5018          }
   5019          case Iop_CmpEQ8x16:
   5020          case Iop_CmpEQ16x8:
   5021          case Iop_CmpEQ32x4: {
   5022             HReg res = newVRegV(env);
   5023             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5024             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5025             UInt size;
   5026             switch (e->Iex.Binop.op) {
   5027                case Iop_CmpEQ8x16: size = 0; break;
   5028                case Iop_CmpEQ16x8: size = 1; break;
   5029                case Iop_CmpEQ32x4: size = 2; break;
   5030                default: vassert(0);
   5031             }
   5032             addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
   5033                                            res, argL, argR, size, True));
   5034             return res;
   5035          }
   5036          case Iop_Mul8x16:
   5037          case Iop_Mul16x8:
   5038          case Iop_Mul32x4: {
   5039             HReg res = newVRegV(env);
   5040             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5041             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5042             UInt size = 0;
   5043             switch(e->Iex.Binop.op) {
   5044                case Iop_Mul8x16: size = 0; break;
   5045                case Iop_Mul16x8: size = 1; break;
   5046                case Iop_Mul32x4: size = 2; break;
   5047                default: vassert(0);
   5048             }
   5049             addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
   5050                                            res, argL, argR, size, True));
   5051             return res;
   5052          }
   5053          case Iop_Mull8Ux8:
   5054          case Iop_Mull16Ux4:
   5055          case Iop_Mull32Ux2: {
   5056             HReg res = newVRegV(env);
   5057             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   5058             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   5059             UInt size = 0;
   5060             switch(e->Iex.Binop.op) {
   5061                case Iop_Mull8Ux8: size = 0; break;
   5062                case Iop_Mull16Ux4: size = 1; break;
   5063                case Iop_Mull32Ux2: size = 2; break;
   5064                default: vassert(0);
   5065             }
   5066             addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
   5067                                            res, argL, argR, size, True));
   5068             return res;
   5069          }
   5070 
   5071          case Iop_Mull8Sx8:
   5072          case Iop_Mull16Sx4:
   5073          case Iop_Mull32Sx2: {
   5074             HReg res = newVRegV(env);
   5075             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   5076             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   5077             UInt size = 0;
   5078             switch(e->Iex.Binop.op) {
   5079                case Iop_Mull8Sx8: size = 0; break;
   5080                case Iop_Mull16Sx4: size = 1; break;
   5081                case Iop_Mull32Sx2: size = 2; break;
   5082                default: vassert(0);
   5083             }
   5084             addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
   5085                                            res, argL, argR, size, True));
   5086             return res;
   5087          }
   5088 
   5089          case Iop_QDMulHi16Sx8:
   5090          case Iop_QDMulHi32Sx4: {
   5091             HReg res = newVRegV(env);
   5092             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5093             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5094             UInt size = 0;
   5095             switch(e->Iex.Binop.op) {
   5096                case Iop_QDMulHi16Sx8: size = 1; break;
   5097                case Iop_QDMulHi32Sx4: size = 2; break;
   5098                default: vassert(0);
   5099             }
   5100             addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
   5101                                            res, argL, argR, size, True));
   5102             return res;
   5103          }
   5104 
   5105          case Iop_QRDMulHi16Sx8:
   5106          case Iop_QRDMulHi32Sx4: {
   5107             HReg res = newVRegV(env);
   5108             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5109             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5110             UInt size = 0;
   5111             switch(e->Iex.Binop.op) {
   5112                case Iop_QRDMulHi16Sx8: size = 1; break;
   5113                case Iop_QRDMulHi32Sx4: size = 2; break;
   5114                default: vassert(0);
   5115             }
   5116             addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
   5117                                            res, argL, argR, size, True));
   5118             return res;
   5119          }
   5120 
   5121          case Iop_QDMulLong16Sx4:
   5122          case Iop_QDMulLong32Sx2: {
   5123             HReg res = newVRegV(env);
   5124             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   5125             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   5126             UInt size = 0;
   5127             switch(e->Iex.Binop.op) {
   5128                case Iop_QDMulLong16Sx4: size = 1; break;
   5129                case Iop_QDMulLong32Sx2: size = 2; break;
   5130                default: vassert(0);
   5131             }
   5132             addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
   5133                                            res, argL, argR, size, True));
   5134             return res;
   5135          }
   5136          case Iop_PolynomialMul8x16: {
   5137             HReg res = newVRegV(env);
   5138             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5139             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5140             UInt size = 0;
   5141             addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
   5142                                            res, argL, argR, size, True));
   5143             return res;
   5144          }
   5145          case Iop_Max32Fx4: {
   5146             HReg res = newVRegV(env);
   5147             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5148             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5149             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
   5150                                            res, argL, argR, 2, True));
   5151             return res;
   5152          }
   5153          case Iop_Min32Fx4: {
   5154             HReg res = newVRegV(env);
   5155             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5156             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5157             addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
   5158                                            res, argL, argR, 2, True));
   5159             return res;
   5160          }
   5161          case Iop_PwMax32Fx4: {
   5162             HReg res = newVRegV(env);
   5163             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5164             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5165             addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
   5166                                            res, argL, argR, 2, True));
   5167             return res;
   5168          }
   5169          case Iop_PwMin32Fx4: {
   5170             HReg res = newVRegV(env);
   5171             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5172             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5173             addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
   5174                                            res, argL, argR, 2, True));
   5175             return res;
   5176          }
   5177          case Iop_CmpGT32Fx4: {
   5178             HReg res = newVRegV(env);
   5179             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5180             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5181             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
   5182                                            res, argL, argR, 2, True));
   5183             return res;
   5184          }
   5185          case Iop_CmpGE32Fx4: {
   5186             HReg res = newVRegV(env);
   5187             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5188             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5189             addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
   5190                                            res, argL, argR, 2, True));
   5191             return res;
   5192          }
   5193          case Iop_CmpEQ32Fx4: {
   5194             HReg res = newVRegV(env);
   5195             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5196             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5197             addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
   5198                                            res, argL, argR, 2, True));
   5199             return res;
   5200          }
   5201 
   5202          case Iop_PolynomialMull8x8: {
   5203             HReg res = newVRegV(env);
   5204             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   5205             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
   5206             UInt size = 0;
   5207             addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
   5208                                            res, argL, argR, size, True));
   5209             return res;
   5210          }
   5211          case Iop_F32ToFixed32Ux4_RZ:
   5212          case Iop_F32ToFixed32Sx4_RZ:
   5213          case Iop_Fixed32UToF32x4_RN:
   5214          case Iop_Fixed32SToF32x4_RN: {
   5215             HReg res = newVRegV(env);
   5216             HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
   5217             ARMNeonUnOp op;
   5218             UInt imm6;
   5219             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   5220                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   5221                   vpanic("ARM supports FP <-> Fixed conversion with constant "
   5222                          "second argument less than 33 only\n");
   5223             }
   5224             imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   5225             vassert(imm6 <= 32 && imm6 > 0);
   5226             imm6 = 64 - imm6;
   5227             switch(e->Iex.Binop.op) {
   5228                case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
   5229                case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
   5230                case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
   5231                case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
   5232                default: vassert(0);
   5233             }
   5234             addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
   5235             return res;
   5236          }
   5237          /*
   5238          FIXME remove if not used
   5239          case Iop_VDup8x16:
   5240          case Iop_VDup16x8:
   5241          case Iop_VDup32x4: {
   5242             HReg res = newVRegV(env);
   5243             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
   5244             UInt imm4;
   5245             UInt index;
   5246             if (e->Iex.Binop.arg2->tag != Iex_Const ||
   5247                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
   5248                   vpanic("ARM supports Iop_VDup with constant "
   5249                          "second argument less than 16 only\n");
   5250             }
   5251             index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
   5252             switch(e->Iex.Binop.op) {
   5253                case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
   5254                case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
   5255                case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
   5256                default: vassert(0);
   5257             }
   5258             if (imm4 >= 16) {
   5259                vpanic("ARM supports Iop_VDup with constant "
   5260                       "second argument less than 16 only\n");
   5261             }
   5262             addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
   5263                                           res, argL, imm4, True));
   5264             return res;
   5265          }
   5266          */
   5267          case Iop_PwAdd8x16:
   5268          case Iop_PwAdd16x8:
   5269          case Iop_PwAdd32x4: {
   5270             HReg res = newVRegV(env);
   5271             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
   5272             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
   5273             UInt size = 0;
   5274             switch(e->Iex.Binop.op) {
   5275                case Iop_PwAdd8x16: size = 0; break;
   5276                case Iop_PwAdd16x8: size = 1; break;
   5277                case Iop_PwAdd32x4: size = 2; break;
   5278                default: vassert(0);
   5279             }
   5280             addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
   5281                                            res, argL, argR, size, True));
   5282             return res;
   5283          }
   5284          /* ... */
   5285          default:
   5286             break;
   5287       }
   5288    }
   5289 
   5290    if (e->tag == Iex_Triop) {
   5291       IRTriop *triop = e->Iex.Triop.details;
   5292 
   5293       switch (triop->op) {
   5294          case Iop_ExtractV128: {
   5295             HReg res = newVRegV(env);
   5296             HReg argL = iselNeonExpr(env, triop->arg1);
   5297             HReg argR = iselNeonExpr(env, triop->arg2);
   5298             UInt imm4;
   5299             if (triop->arg3->tag != Iex_Const ||
   5300                 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
   5301                vpanic("ARM target supports Iop_ExtractV128 with constant "
   5302                       "third argument less than 16 only\n");
   5303             }
   5304             imm4 = triop->arg3->Iex.Const.con->Ico.U8;
   5305             if (imm4 >= 16) {
   5306                vpanic("ARM target supports Iop_ExtractV128 with constant "
   5307                       "third argument less than 16 only\n");
   5308             }
   5309             addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
   5310                                            res, argL, argR, imm4, True));
   5311             return res;
   5312          }
   5313          case Iop_Mul32Fx4:
   5314          case Iop_Sub32Fx4:
   5315          case Iop_Add32Fx4: {
   5316             HReg res = newVRegV(env);
   5317             HReg argL = iselNeonExpr(env, triop->arg2);
   5318             HReg argR = iselNeonExpr(env, triop->arg3);
   5319             UInt size = 0;
   5320             ARMNeonBinOp op = ARMneon_INVALID;
   5321             switch (triop->op) {
   5322                case Iop_Mul32Fx4: op = ARMneon_VMULFP; break;
   5323                case Iop_Sub32Fx4: op = ARMneon_VSUBFP; break;
   5324                case Iop_Add32Fx4: op = ARMneon_VADDFP; break;
   5325                default: vassert(0);
   5326             }
   5327             addInstr(env, ARMInstr_NBinary(op, res, argL, argR, size, True));
   5328             return res;
   5329          }
   5330          default:
   5331             break;
   5332       }
   5333    }
   5334 
   5335    if (e->tag == Iex_ITE) { // VFD
   5336       ARMCondCode cc;
   5337       HReg r1  = iselNeonExpr(env, e->Iex.ITE.iftrue);
   5338       HReg r0  = iselNeonExpr(env, e->Iex.ITE.iffalse);
   5339       HReg dst = newVRegV(env);
   5340       addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, r1, 4, True));
   5341       cc = iselCondCode(env, e->Iex.ITE.cond);
   5342       addInstr(env, ARMInstr_NCMovQ(cc ^ 1, dst, r0));
   5343       return dst;
   5344    }
   5345 
   5346   neon_expr_bad:
   5347    ppIRExpr(e);
   5348    vpanic("iselNeonExpr_wrk");
   5349 }
   5350 
   5351 /*---------------------------------------------------------*/
   5352 /*--- ISEL: Floating point expressions (64 bit)         ---*/
   5353 /*---------------------------------------------------------*/
   5354 
   5355 /* Compute a 64-bit floating point value into a register, the identity
   5356    of which is returned.  As with iselIntExpr_R, the reg may be either
   5357    real or virtual; in any case it must not be changed by subsequent
   5358    code emitted by the caller.  */
   5359 
   5360 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
   5361 {
   5362    HReg r = iselDblExpr_wrk( env, e );
   5363 #  if 0
   5364    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   5365 #  endif
   5366    vassert(hregClass(r) == HRcFlt64);
   5367    vassert(hregIsVirtual(r));
   5368    return r;
   5369 }
   5370 
   5371 /* DO NOT CALL THIS DIRECTLY */
   5372 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
   5373 {
   5374    IRType ty = typeOfIRExpr(env->type_env,e);
   5375    vassert(e);
   5376    vassert(ty == Ity_F64);
   5377 
   5378    if (e->tag == Iex_RdTmp) {
   5379       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
   5380    }
   5381 
   5382    if (e->tag == Iex_Const) {
   5383       /* Just handle the zero case. */
   5384       IRConst* con = e->Iex.Const.con;
   5385       if (con->tag == Ico_F64i && con->Ico.F64i == 0ULL) {
   5386          HReg z32 = newVRegI(env);
   5387          HReg dst = newVRegD(env);
   5388          addInstr(env, ARMInstr_Imm32(z32, 0));
   5389          addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, z32, z32));
   5390          return dst;
   5391       }
   5392    }
   5393 
   5394    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
   5395       ARMAModeV* am;
   5396       HReg res = newVRegD(env);
   5397       vassert(e->Iex.Load.ty == Ity_F64);
   5398       am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
   5399       addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
   5400       return res;
   5401    }
   5402 
   5403    if (e->tag == Iex_Get) {
   5404       // XXX This won't work if offset > 1020 or is not 0 % 4.
   5405       // In which case we'll have to generate more longwinded code.
   5406       ARMAModeV* am  = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
   5407       HReg       res = newVRegD(env);
   5408       addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
   5409       return res;
   5410    }
   5411 
   5412    if (e->tag == Iex_Unop) {
   5413       switch (e->Iex.Unop.op) {
   5414          case Iop_ReinterpI64asF64: {
   5415             if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
   5416                return iselNeon64Expr(env, e->Iex.Unop.arg);
   5417             } else {
   5418                HReg srcHi, srcLo;
   5419                HReg dst = newVRegD(env);
   5420                iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
   5421                addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
   5422                return dst;
   5423             }
   5424          }
   5425          case Iop_NegF64: {
   5426             HReg src = iselDblExpr(env, e->Iex.Unop.arg);
   5427             HReg dst = newVRegD(env);
   5428             addInstr(env, ARMInstr_VUnaryD(ARMvfpu_NEG, dst, src));
   5429             return dst;
   5430          }
   5431          case Iop_AbsF64: {
   5432             HReg src = iselDblExpr(env, e->Iex.Unop.arg);
   5433             HReg dst = newVRegD(env);
   5434             addInstr(env, ARMInstr_VUnaryD(ARMvfpu_ABS, dst, src));
   5435             return dst;
   5436          }
   5437          case Iop_F32toF64: {
   5438             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
   5439             HReg dst = newVRegD(env);
   5440             addInstr(env, ARMInstr_VCvtSD(True/*sToD*/, dst, src));
   5441             return dst;
   5442          }
   5443          case Iop_I32UtoF64:
   5444          case Iop_I32StoF64: {
   5445             HReg src   = iselIntExpr_R(env, e->Iex.Unop.arg);
   5446             HReg f32   = newVRegF(env);
   5447             HReg dst   = newVRegD(env);
   5448             Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
   5449             /* VMOV f32, src */
   5450             addInstr(env, ARMInstr_VXferS(True/*toS*/, f32, src));
   5451             /* FSITOD dst, f32 */
   5452             addInstr(env, ARMInstr_VCvtID(True/*iToD*/, syned,
   5453                                           dst, f32));
   5454             return dst;
   5455          }
   5456          default:
   5457             break;
   5458       }
   5459    }
   5460 
   5461    if (e->tag == Iex_Binop) {
   5462       switch (e->Iex.Binop.op) {
   5463          case Iop_SqrtF64: {
   5464             /* first arg is rounding mode; we ignore it. */
   5465             HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
   5466             HReg dst = newVRegD(env);
   5467             addInstr(env, ARMInstr_VUnaryD(ARMvfpu_SQRT, dst, src));
   5468             return dst;
   5469          }
   5470          default:
   5471             break;
   5472       }
   5473    }
   5474 
   5475    if (e->tag == Iex_Triop) {
   5476       IRTriop *triop = e->Iex.Triop.details;
   5477 
   5478       switch (triop->op) {
   5479          case Iop_DivF64:
   5480          case Iop_MulF64:
   5481          case Iop_AddF64:
   5482          case Iop_SubF64: {
   5483             ARMVfpOp op = 0; /*INVALID*/
   5484             HReg argL = iselDblExpr(env, triop->arg2);
   5485             HReg argR = iselDblExpr(env, triop->arg3);
   5486             HReg dst  = newVRegD(env);
   5487             switch (triop->op) {
   5488                case Iop_DivF64: op = ARMvfp_DIV; break;
   5489                case Iop_MulF64: op = ARMvfp_MUL; break;
   5490                case Iop_AddF64: op = ARMvfp_ADD; break;
   5491                case Iop_SubF64: op = ARMvfp_SUB; break;
   5492                default: vassert(0);
   5493             }
   5494             addInstr(env, ARMInstr_VAluD(op, dst, argL, argR));
   5495             return dst;
   5496          }
   5497          default:
   5498             break;
   5499       }
   5500    }
   5501 
   5502    if (e->tag == Iex_ITE) { // VFD
   5503       if (ty == Ity_F64
   5504           && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
   5505          HReg r1  = iselDblExpr(env, e->Iex.ITE.iftrue);
   5506          HReg r0  = iselDblExpr(env, e->Iex.ITE.iffalse);
   5507          HReg dst = newVRegD(env);
   5508          addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, r1));
   5509          ARMCondCode cc = iselCondCode(env, e->Iex.ITE.cond);
   5510          addInstr(env, ARMInstr_VCMovD(cc ^ 1, dst, r0));
   5511          return dst;
   5512       }
   5513    }
   5514 
   5515    ppIRExpr(e);
   5516    vpanic("iselDblExpr_wrk");
   5517 }
   5518 
   5519 
   5520 /*---------------------------------------------------------*/
   5521 /*--- ISEL: Floating point expressions (32 bit)         ---*/
   5522 /*---------------------------------------------------------*/
   5523 
   5524 /* Compute a 32-bit floating point value into a register, the identity
   5525    of which is returned.  As with iselIntExpr_R, the reg may be either
   5526    real or virtual; in any case it must not be changed by subsequent
   5527    code emitted by the caller.  */
   5528 
   5529 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
   5530 {
   5531    HReg r = iselFltExpr_wrk( env, e );
   5532 #  if 0
   5533    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
   5534 #  endif
   5535    vassert(hregClass(r) == HRcFlt32);
   5536    vassert(hregIsVirtual(r));
   5537    return r;
   5538 }
   5539 
   5540 /* DO NOT CALL THIS DIRECTLY */
   5541 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
   5542 {
   5543    IRType ty = typeOfIRExpr(env->type_env,e);
   5544    vassert(e);
   5545    vassert(ty == Ity_F32);
   5546 
   5547    if (e->tag == Iex_RdTmp) {
   5548       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
   5549    }
   5550 
   5551    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
   5552       ARMAModeV* am;
   5553       HReg res = newVRegF(env);
   5554       vassert(e->Iex.Load.ty == Ity_F32);
   5555       am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
   5556       addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
   5557       return res;
   5558    }
   5559 
   5560    if (e->tag == Iex_Get) {
   5561       // XXX This won't work if offset > 1020 or is not 0 % 4.
   5562       // In which case we'll have to generate more longwinded code.
   5563       ARMAModeV* am  = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
   5564       HReg       res = newVRegF(env);
   5565       addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
   5566       return res;
   5567    }
   5568 
   5569    if (e->tag == Iex_Unop) {
   5570       switch (e->Iex.Unop.op) {
   5571          case Iop_ReinterpI32asF32: {
   5572             HReg dst = newVRegF(env);
   5573             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
   5574             addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
   5575             return dst;
   5576          }
   5577          case Iop_NegF32: {
   5578             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
   5579             HReg dst = newVRegF(env);
   5580             addInstr(env, ARMInstr_VUnaryS(ARMvfpu_NEG, dst, src));
   5581             return dst;
   5582          }
   5583          case Iop_AbsF32: {
   5584             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
   5585             HReg dst = newVRegF(env);
   5586             addInstr(env, ARMInstr_VUnaryS(ARMvfpu_ABS, dst, src));
   5587             return dst;
   5588          }
   5589          default:
   5590             break;
   5591       }
   5592    }
   5593 
   5594    if (e->tag == Iex_Binop) {
   5595       switch (e->Iex.Binop.op) {
   5596          case Iop_SqrtF32: {
   5597             /* first arg is rounding mode; we ignore it. */
   5598             HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
   5599             HReg dst = newVRegF(env);
   5600             addInstr(env, ARMInstr_VUnaryS(ARMvfpu_SQRT, dst, src));
   5601             return dst;
   5602          }
   5603          case Iop_F64toF32: {
   5604             HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
   5605             set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
   5606             HReg valS = newVRegF(env);
   5607             /* FCVTSD valS, valD */
   5608             addInstr(env, ARMInstr_VCvtSD(False/*!sToD*/, valS, valD));
   5609             set_VFP_rounding_default(env);
   5610             return valS;
   5611          }
   5612          default:
   5613             break;
   5614       }
   5615    }
   5616 
   5617    if (e->tag == Iex_Triop) {
   5618       IRTriop *triop = e->Iex.Triop.details;
   5619 
   5620       switch (triop->op) {
   5621          case Iop_DivF32:
   5622          case Iop_MulF32:
   5623          case Iop_AddF32:
   5624          case Iop_SubF32: {
   5625             ARMVfpOp op = 0; /*INVALID*/
   5626             HReg argL = iselFltExpr(env, triop->arg2);
   5627             HReg argR = iselFltExpr(env, triop->arg3);
   5628             HReg dst  = newVRegF(env);
   5629             switch (triop->op) {
   5630                case Iop_DivF32: op = ARMvfp_DIV; break;
   5631                case Iop_MulF32: op = ARMvfp_MUL; break;
   5632                case Iop_AddF32: op = ARMvfp_ADD; break;
   5633                case Iop_SubF32: op = ARMvfp_SUB; break;
   5634                default: vassert(0);
   5635             }
   5636             addInstr(env, ARMInstr_VAluS(op, dst, argL, argR));
   5637             return dst;
   5638          }
   5639          default:
   5640             break;
   5641       }
   5642    }
   5643 
   5644    if (e->tag == Iex_ITE) { // VFD
   5645       if (ty == Ity_F32
   5646           && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
   5647          ARMCondCode cc;
   5648          HReg r1  = iselFltExpr(env, e->Iex.ITE.iftrue);
   5649          HReg r0  = iselFltExpr(env, e->Iex.ITE.iffalse);
   5650          HReg dst = newVRegF(env);
   5651          addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, r1));
   5652          cc = iselCondCode(env, e->Iex.ITE.cond);
   5653          addInstr(env, ARMInstr_VCMovS(cc ^ 1, dst, r0));
   5654          return dst;
   5655       }
   5656    }
   5657 
   5658    ppIRExpr(e);
   5659    vpanic("iselFltExpr_wrk");
   5660 }
   5661 
   5662 
   5663 /*---------------------------------------------------------*/
   5664 /*--- ISEL: Statements                                  ---*/
   5665 /*---------------------------------------------------------*/
   5666 
   5667 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
   5668 {
   5669    if (vex_traceflags & VEX_TRACE_VCODE) {
   5670       vex_printf("\n-- ");
   5671       ppIRStmt(stmt);
   5672       vex_printf("\n");
   5673    }
   5674    switch (stmt->tag) {
   5675 
   5676    /* --------- STORE --------- */
   5677    /* little-endian write to memory */
   5678    case Ist_Store: {
   5679       IRType    tya  = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
   5680       IRType    tyd  = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
   5681       IREndness end  = stmt->Ist.Store.end;
   5682 
   5683       if (tya != Ity_I32 || end != Iend_LE)
   5684          goto stmt_fail;
   5685 
   5686       if (tyd == Ity_I32) {
   5687          HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
   5688          ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
   5689          addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, rD, am));
   5690          return;
   5691       }
   5692       if (tyd == Ity_I16) {
   5693          HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
   5694          ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
   5695          addInstr(env, ARMInstr_LdSt16(ARMcc_AL,
   5696                                        False/*!isLoad*/,
   5697                                        False/*!isSignedLoad*/, rD, am));
   5698          return;
   5699       }
   5700       if (tyd == Ity_I8) {
   5701          HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
   5702          ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
   5703          addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, False/*!isLoad*/, rD, am));
   5704          return;
   5705       }
   5706       if (tyd == Ity_I64) {
   5707          if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
   5708             HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
   5709             ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
   5710             addInstr(env, ARMInstr_NLdStD(False, dD, am));
   5711          } else {
   5712             HReg rDhi, rDlo, rA;
   5713             iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
   5714             rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
   5715             addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDhi,
   5716                                           ARMAMode1_RI(rA,4)));
   5717             addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDlo,
   5718                                           ARMAMode1_RI(rA,0)));
   5719          }
   5720          return;
   5721       }
   5722       if (tyd == Ity_F64) {
   5723          HReg       dD = iselDblExpr(env, stmt->Ist.Store.data);
   5724          ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
   5725          addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
   5726          return;
   5727       }
   5728       if (tyd == Ity_F32) {
   5729          HReg       fD = iselFltExpr(env, stmt->Ist.Store.data);
   5730          ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
   5731          addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
   5732          return;
   5733       }
   5734       if (tyd == Ity_V128) {
   5735          HReg       qD = iselNeonExpr(env, stmt->Ist.Store.data);
   5736          ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
   5737          addInstr(env, ARMInstr_NLdStQ(False, qD, am));
   5738          return;
   5739       }
   5740 
   5741       break;
   5742    }
   5743 
   5744    /* --------- CONDITIONAL STORE --------- */
   5745    /* conditional little-endian write to memory */
   5746    case Ist_StoreG: {
   5747       IRStoreG* sg   = stmt->Ist.StoreG.details;
   5748       IRType    tya  = typeOfIRExpr(env->type_env, sg->addr);
   5749       IRType    tyd  = typeOfIRExpr(env->type_env, sg->data);
   5750       IREndness end  = sg->end;
   5751 
   5752       if (tya != Ity_I32 || end != Iend_LE)
   5753          goto stmt_fail;
   5754 
   5755       switch (tyd) {
   5756          case Ity_I8:
   5757          case Ity_I32: {
   5758             HReg        rD = iselIntExpr_R(env, sg->data);
   5759             ARMAMode1*  am = iselIntExpr_AMode1(env, sg->addr);
   5760             ARMCondCode cc = iselCondCode(env, sg->guard);
   5761             addInstr(env, (tyd == Ity_I32 ? ARMInstr_LdSt32 : ARMInstr_LdSt8U)
   5762                              (cc, False/*!isLoad*/, rD, am));
   5763             return;
   5764          }
   5765          case Ity_I16: {
   5766             HReg        rD = iselIntExpr_R(env, sg->data);
   5767             ARMAMode2*  am = iselIntExpr_AMode2(env, sg->addr);
   5768             ARMCondCode cc = iselCondCode(env, sg->guard);
   5769             addInstr(env, ARMInstr_LdSt16(cc,
   5770                                           False/*!isLoad*/,
   5771                                           False/*!isSignedLoad*/, rD, am));
   5772             return;
   5773          }
   5774          default:
   5775             break;
   5776       }
   5777       break;
   5778    }
   5779 
   5780    /* --------- CONDITIONAL LOAD --------- */
   5781    /* conditional little-endian load from memory */
   5782    case Ist_LoadG: {
   5783       IRLoadG*  lg   = stmt->Ist.LoadG.details;
   5784       IRType    tya  = typeOfIRExpr(env->type_env, lg->addr);
   5785       IREndness end  = lg->end;
   5786 
   5787       if (tya != Ity_I32 || end != Iend_LE)
   5788          goto stmt_fail;
   5789 
   5790       switch (lg->cvt) {
   5791          case ILGop_8Uto32:
   5792          case ILGop_Ident32: {
   5793             HReg        rAlt = iselIntExpr_R(env, lg->alt);
   5794             ARMAMode1*  am   = iselIntExpr_AMode1(env, lg->addr);
   5795             HReg        rD   = lookupIRTemp(env, lg->dst);
   5796             addInstr(env, mk_iMOVds_RR(rD, rAlt));
   5797             ARMCondCode cc   = iselCondCode(env, lg->guard);
   5798             addInstr(env, (lg->cvt == ILGop_Ident32 ? ARMInstr_LdSt32
   5799                                                     : ARMInstr_LdSt8U)
   5800                              (cc, True/*isLoad*/, rD, am));
   5801             return;
   5802          }
   5803          case ILGop_16Sto32:
   5804          case ILGop_16Uto32:
   5805          case ILGop_8Sto32: {
   5806             HReg        rAlt = iselIntExpr_R(env, lg->alt);
   5807             ARMAMode2*  am   = iselIntExpr_AMode2(env, lg->addr);
   5808             HReg        rD   = lookupIRTemp(env, lg->dst);
   5809             addInstr(env, mk_iMOVds_RR(rD, rAlt));
   5810             ARMCondCode cc   = iselCondCode(env, lg->guard);
   5811             if (lg->cvt == ILGop_8Sto32) {
   5812                addInstr(env, ARMInstr_Ld8S(cc, rD, am));
   5813             } else {
   5814                vassert(lg->cvt == ILGop_16Sto32 || lg->cvt == ILGop_16Uto32);
   5815                Bool sx = lg->cvt == ILGop_16Sto32;
   5816                addInstr(env, ARMInstr_LdSt16(cc, True/*isLoad*/, sx, rD, am));
   5817             }
   5818             return;
   5819          }
   5820          default:
   5821             break;
   5822       }
   5823       break;
   5824    }
   5825 
   5826    /* --------- PUT --------- */
   5827    /* write guest state, fixed offset */
   5828    case Ist_Put: {
   5829        IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
   5830 
   5831        if (tyd == Ity_I32) {
   5832            HReg       rD = iselIntExpr_R(env, stmt->Ist.Put.data);
   5833            ARMAMode1* am = ARMAMode1_RI(hregARM_R8(), stmt->Ist.Put.offset);
   5834            addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, rD, am));
   5835            return;
   5836        }
   5837        if (tyd == Ity_I64) {
   5838           if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
   5839              HReg addr = newVRegI(env);
   5840              HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
   5841              addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
   5842                                                 stmt->Ist.Put.offset));
   5843              addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
   5844           } else {
   5845              HReg rDhi, rDlo;
   5846              ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
   5847                                            stmt->Ist.Put.offset + 0);
   5848              ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
   5849                                            stmt->Ist.Put.offset + 4);
   5850              iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
   5851              addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
   5852                                            rDhi, am4));
   5853              addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
   5854                                            rDlo, am0));
   5855           }
   5856           return;
   5857        }
   5858        if (tyd == Ity_F64) {
   5859           // XXX This won't work if offset > 1020 or is not 0 % 4.
   5860           // In which case we'll have to generate more longwinded code.
   5861           ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
   5862           HReg       rD = iselDblExpr(env, stmt->Ist.Put.data);
   5863           addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
   5864           return;
   5865        }
   5866        if (tyd == Ity_F32) {
   5867           // XXX This won't work if offset > 1020 or is not 0 % 4.
   5868           // In which case we'll have to generate more longwinded code.
   5869           ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
   5870           HReg       rD = iselFltExpr(env, stmt->Ist.Put.data);
   5871           addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
   5872           return;
   5873        }
   5874        if (tyd == Ity_V128) {
   5875           HReg addr = newVRegI(env);
   5876           HReg qD = iselNeonExpr(env, stmt->Ist.Put.data);
   5877           addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
   5878                                        stmt->Ist.Put.offset));
   5879           addInstr(env, ARMInstr_NLdStQ(False, qD, mkARMAModeN_R(addr)));
   5880           return;
   5881        }
   5882        break;
   5883    }
   5884 
   5885    /* --------- TMP --------- */
   5886    /* assign value to temporary */
   5887    case Ist_WrTmp: {
   5888       IRTemp tmp = stmt->Ist.WrTmp.tmp;
   5889       IRType ty = typeOfIRTemp(env->type_env, tmp);
   5890 
   5891       if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
   5892          ARMRI84* ri84 = iselIntExpr_RI84(NULL, False,
   5893                                           env, stmt->Ist.WrTmp.data);
   5894          HReg     dst  = lookupIRTemp(env, tmp);
   5895          addInstr(env, ARMInstr_Mov(dst,ri84));
   5896          return;
   5897       }
   5898       if (ty == Ity_I1) {
   5899          /* Here, we are generating a I1 value into a 32 bit register.
   5900             Make sure the value in the register is only zero or one,
   5901             but no other.  This allows optimisation of the
   5902             1Uto32(tmp:I1) case, by making it simply a copy of the
   5903             register holding 'tmp'.  The point being that the value in
   5904             the register holding 'tmp' can only have been created
   5905             here. */
   5906          HReg        dst  = lookupIRTemp(env, tmp);
   5907          ARMCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
   5908          addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
   5909          addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
   5910          return;
   5911       }
   5912       if (ty == Ity_I64) {
   5913          if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
   5914             HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data);
   5915             HReg dst = lookupIRTemp(env, tmp);
   5916             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False));
   5917          } else {
   5918             HReg rHi, rLo, dstHi, dstLo;
   5919             iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
   5920             lookupIRTemp64( &dstHi, &dstLo, env, tmp);
   5921             addInstr(env, mk_iMOVds_RR(dstHi, rHi) );
   5922             addInstr(env, mk_iMOVds_RR(dstLo, rLo) );
   5923          }
   5924          return;
   5925       }
   5926       if (ty == Ity_F64) {
   5927          HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
   5928          HReg dst = lookupIRTemp(env, tmp);
   5929          addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, src));
   5930          return;
   5931       }
   5932       if (ty == Ity_F32) {
   5933          HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
   5934          HReg dst = lookupIRTemp(env, tmp);
   5935          addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, src));
   5936          return;
   5937       }
   5938       if (ty == Ity_V128) {
   5939          HReg src = iselNeonExpr(env, stmt->Ist.WrTmp.data);
   5940          HReg dst = lookupIRTemp(env, tmp);
   5941          addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, True));
   5942          return;
   5943       }
   5944       break;
   5945    }
   5946 
   5947    /* --------- Call to DIRTY helper --------- */
   5948    /* call complex ("dirty") helper function */
   5949    case Ist_Dirty: {
   5950       IRDirty* d = stmt->Ist.Dirty.details;
   5951 
   5952       /* Figure out the return type, if any. */
   5953       IRType retty = Ity_INVALID;
   5954       if (d->tmp != IRTemp_INVALID)
   5955          retty = typeOfIRTemp(env->type_env, d->tmp);
   5956 
   5957       Bool retty_ok = False;
   5958       switch (retty) {
   5959          case Ity_INVALID: /* function doesn't return anything */
   5960          case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
   5961          //case Ity_V128: //ATC
   5962             retty_ok = True; break;
   5963          default:
   5964             break;
   5965       }
   5966       if (!retty_ok)
   5967          break; /* will go to stmt_fail: */
   5968 
   5969       /* Marshal args, do the call, and set the return value to 0x555..555
   5970          if this is a conditional call that returns a value and the
   5971          call is skipped. */
   5972       UInt   addToSp = 0;
   5973       RetLoc rloc    = mk_RetLoc_INVALID();
   5974       doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
   5975       vassert(is_sane_RetLoc(rloc));
   5976 
   5977       /* Now figure out what to do with the returned value, if any. */
   5978       switch (retty) {
   5979          case Ity_INVALID: {
   5980             /* No return value.  Nothing to do. */
   5981             vassert(d->tmp == IRTemp_INVALID);
   5982             vassert(rloc.pri == RLPri_None);
   5983             vassert(addToSp == 0);
   5984             return;
   5985          }
   5986          case Ity_I64: {
   5987             vassert(rloc.pri == RLPri_2Int);
   5988             vassert(addToSp == 0);
   5989             if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
   5990                HReg tmp = lookupIRTemp(env, d->tmp);
   5991                addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(),
   5992                                                         hregARM_R0()));
   5993             } else {
   5994                HReg dstHi, dstLo;
   5995                /* The returned value is in r1:r0.  Park it in the
   5996                   register-pair associated with tmp. */
   5997                lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
   5998                addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R1()) );
   5999                addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R0()) );
   6000             }
   6001             return;
   6002          }
   6003          case Ity_I32: case Ity_I16: case Ity_I8: {
   6004             vassert(rloc.pri == RLPri_Int);
   6005             vassert(addToSp == 0);
   6006             /* The returned value is in r0.  Park it in the register
   6007                associated with tmp. */
   6008             HReg dst = lookupIRTemp(env, d->tmp);
   6009             addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()) );
   6010             return;
   6011          }
   6012          case Ity_V128: {
   6013             vassert(0); // ATC.  The code that this produces really
   6014             // needs to be looked at, to verify correctness.
   6015             // I don't think this can ever happen though, since the
   6016             // ARM front end never produces 128-bit loads/stores.
   6017             // Hence the following is mostly theoretical.
   6018             /* The returned value is on the stack, and *retloc tells
   6019                us where.  Fish it off the stack and then move the
   6020                stack pointer upwards to clear it, as directed by
   6021                doHelperCall. */
   6022             vassert(rloc.pri == RLPri_V128SpRel);
   6023             vassert(rloc.spOff < 256); // else ARMRI84_I84(_,0) can't encode it
   6024             vassert(addToSp >= 16);
   6025             vassert(addToSp < 256); // ditto reason as for rloc.spOff
   6026             HReg dst = lookupIRTemp(env, d->tmp);
   6027             HReg tmp = newVRegI(env);
   6028             HReg r13 = hregARM_R13(); // sp
   6029             addInstr(env, ARMInstr_Alu(ARMalu_ADD,
   6030                                        tmp, r13, ARMRI84_I84(rloc.spOff,0)));
   6031             ARMAModeN* am = mkARMAModeN_R(tmp);
   6032             addInstr(env, ARMInstr_NLdStQ(True/*load*/, dst, am));
   6033             addInstr(env, ARMInstr_Alu(ARMalu_ADD,
   6034                                        r13, r13, ARMRI84_I84(addToSp,0)));
   6035             return;
   6036          }
   6037          default:
   6038             /*NOTREACHED*/
   6039             vassert(0);
   6040       }
   6041       break;
   6042    }
   6043 
   6044    /* --------- Load Linked and Store Conditional --------- */
   6045    case Ist_LLSC: {
   6046       if (stmt->Ist.LLSC.storedata == NULL) {
   6047          /* LL */
   6048          IRTemp res = stmt->Ist.LLSC.result;
   6049          IRType ty  = typeOfIRTemp(env->type_env, res);
   6050          if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
   6051             Int  szB   = 0;
   6052             HReg r_dst = lookupIRTemp(env, res);
   6053             HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
   6054             switch (ty) {
   6055                case Ity_I8:  szB = 1; break;
   6056                case Ity_I16: szB = 2; break;
   6057                case Ity_I32: szB = 4; break;
   6058                default:      vassert(0);
   6059             }
   6060             addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
   6061             addInstr(env, ARMInstr_LdrEX(szB));
   6062             addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2()));
   6063             return;
   6064          }
   6065          if (ty == Ity_I64) {
   6066             HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
   6067             addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
   6068             addInstr(env, ARMInstr_LdrEX(8));
   6069             /* Result is in r3:r2.  On a non-NEON capable CPU, we must
   6070                move it into a result register pair.  On a NEON capable
   6071                CPU, the result register will be a 64 bit NEON
   6072                register, so we must move it there instead. */
   6073             if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
   6074                HReg dst = lookupIRTemp(env, res);
   6075                addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(),
   6076                                                         hregARM_R2()));
   6077             } else {
   6078                HReg r_dst_hi, r_dst_lo;
   6079                lookupIRTemp64(&r_dst_hi, &r_dst_lo, env, res);
   6080                addInstr(env, mk_iMOVds_RR(r_dst_lo, hregARM_R2()));
   6081                addInstr(env, mk_iMOVds_RR(r_dst_hi, hregARM_R3()));
   6082             }
   6083             return;
   6084          }
   6085          /*NOTREACHED*/
   6086          vassert(0);
   6087       } else {
   6088          /* SC */
   6089          IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
   6090          if (tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
   6091             Int  szB = 0;
   6092             HReg rD  = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
   6093             HReg rA  = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
   6094             switch (tyd) {
   6095                case Ity_I8:  szB = 1; break;
   6096                case Ity_I16: szB = 2; break;
   6097                case Ity_I32: szB = 4; break;
   6098                default:      vassert(0);
   6099             }
   6100             addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD));
   6101             addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
   6102             addInstr(env, ARMInstr_StrEX(szB));
   6103          } else {
   6104             vassert(tyd == Ity_I64);
   6105             /* This is really ugly.  There is no is/is-not NEON
   6106                decision akin to the case for LL, because iselInt64Expr
   6107                fudges this for us, and always gets the result into two
   6108                GPRs even if this means moving it from a NEON
   6109                register. */
   6110             HReg rDhi, rDlo;
   6111             iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.LLSC.storedata);
   6112             HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
   6113             addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDlo));
   6114             addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDhi));
   6115             addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
   6116             addInstr(env, ARMInstr_StrEX(8));
   6117          }
   6118          /* now r0 is 1 if failed, 0 if success.  Change to IR
   6119             conventions (0 is fail, 1 is success).  Also transfer
   6120             result to r_res. */
   6121          IRTemp   res   = stmt->Ist.LLSC.result;
   6122          IRType   ty    = typeOfIRTemp(env->type_env, res);
   6123          HReg     r_res = lookupIRTemp(env, res);
   6124          ARMRI84* one   = ARMRI84_I84(1,0);
   6125          vassert(ty == Ity_I1);
   6126          addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one));
   6127          /* And be conservative -- mask off all but the lowest bit */
   6128          addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
   6129          return;
   6130       }
   6131       break;
   6132    }
   6133 
   6134    /* --------- MEM FENCE --------- */
   6135    case Ist_MBE:
   6136       switch (stmt->Ist.MBE.event) {
   6137          case Imbe_Fence:
   6138             addInstr(env, ARMInstr_MFence());
   6139             return;
   6140          case Imbe_CancelReservation:
   6141             addInstr(env, ARMInstr_CLREX());
   6142             return;
   6143          default:
   6144             break;
   6145       }
   6146       break;
   6147 
   6148    /* --------- INSTR MARK --------- */
   6149    /* Doesn't generate any executable code ... */
   6150    case Ist_IMark:
   6151        return;
   6152 
   6153    /* --------- NO-OP --------- */
   6154    case Ist_NoOp:
   6155        return;
   6156 
   6157    /* --------- EXIT --------- */
   6158    case Ist_Exit: {
   6159       if (stmt->Ist.Exit.dst->tag != Ico_U32)
   6160          vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value");
   6161 
   6162       ARMCondCode cc     = iselCondCode(env, stmt->Ist.Exit.guard);
   6163       ARMAMode1*  amR15T = ARMAMode1_RI(hregARM_R8(),
   6164                                         stmt->Ist.Exit.offsIP);
   6165 
   6166       /* Case: boring transfer to known address */
   6167       if (stmt->Ist.Exit.jk == Ijk_Boring
   6168           || stmt->Ist.Exit.jk == Ijk_Call
   6169           || stmt->Ist.Exit.jk == Ijk_Ret) {
   6170          if (env->chainingAllowed) {
   6171             /* .. almost always true .. */
   6172             /* Skip the event check at the dst if this is a forwards
   6173                edge. */
   6174             Bool toFastEP
   6175                = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga;
   6176             if (0) vex_printf("%s", toFastEP ? "Y" : ",");
   6177             addInstr(env, ARMInstr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
   6178                                            amR15T, cc, toFastEP));
   6179          } else {
   6180             /* .. very occasionally .. */
   6181             /* We can't use chaining, so ask for an assisted transfer,
   6182                as that's the only alternative that is allowable. */
   6183             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
   6184             addInstr(env, ARMInstr_XAssisted(r, amR15T, cc, Ijk_Boring));
   6185          }
   6186          return;
   6187       }
   6188 
   6189       /* Case: assisted transfer to arbitrary address */
   6190       switch (stmt->Ist.Exit.jk) {
   6191          /* Keep this list in sync with that in iselNext below */
   6192          case Ijk_ClientReq:
   6193          case Ijk_NoDecode:
   6194          case Ijk_NoRedir:
   6195          case Ijk_Sys_syscall:
   6196          case Ijk_InvalICache:
   6197          case Ijk_Yield:
   6198          {
   6199             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
   6200             addInstr(env, ARMInstr_XAssisted(r, amR15T, cc,
   6201                                              stmt->Ist.Exit.jk));
   6202             return;
   6203          }
   6204          default:
   6205             break;
   6206       }
   6207 
   6208       /* Do we ever expect to see any other kind? */
   6209       goto stmt_fail;
   6210    }
   6211 
   6212    default: break;
   6213    }
   6214   stmt_fail:
   6215    ppIRStmt(stmt);
   6216    vpanic("iselStmt");
   6217 }
   6218 
   6219 
   6220 /*---------------------------------------------------------*/
   6221 /*--- ISEL: Basic block terminators (Nexts)             ---*/
   6222 /*---------------------------------------------------------*/
   6223 
   6224 static void iselNext ( ISelEnv* env,
   6225                        IRExpr* next, IRJumpKind jk, Int offsIP )
   6226 {
   6227    if (vex_traceflags & VEX_TRACE_VCODE) {
   6228       vex_printf( "\n-- PUT(%d) = ", offsIP);
   6229       ppIRExpr( next );
   6230       vex_printf( "; exit-");
   6231       ppIRJumpKind(jk);
   6232       vex_printf( "\n");
   6233    }
   6234 
   6235    /* Case: boring transfer to known address */
   6236    if (next->tag == Iex_Const) {
   6237       IRConst* cdst = next->Iex.Const.con;
   6238       vassert(cdst->tag == Ico_U32);
   6239       if (jk == Ijk_Boring || jk == Ijk_Call) {
   6240          /* Boring transfer to known address */
   6241          ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
   6242          if (env->chainingAllowed) {
   6243             /* .. almost always true .. */
   6244             /* Skip the event check at the dst if this is a forwards
   6245                edge. */
   6246             Bool toFastEP
   6247                = ((Addr64)cdst->Ico.U32) > env->max_ga;
   6248             if (0) vex_printf("%s", toFastEP ? "X" : ".");
   6249             addInstr(env, ARMInstr_XDirect(cdst->Ico.U32,
   6250                                            amR15T, ARMcc_AL,
   6251                                            toFastEP));
   6252          } else {
   6253             /* .. very occasionally .. */
   6254             /* We can't use chaining, so ask for an assisted transfer,
   6255                as that's the only alternative that is allowable. */
   6256             HReg r = iselIntExpr_R(env, next);
   6257             addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
   6258                                              Ijk_Boring));
   6259          }
   6260          return;
   6261       }
   6262    }
   6263 
   6264    /* Case: call/return (==boring) transfer to any address */
   6265    switch (jk) {
   6266       case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
   6267          HReg       r      = iselIntExpr_R(env, next);
   6268          ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
   6269          if (env->chainingAllowed) {
   6270             addInstr(env, ARMInstr_XIndir(r, amR15T, ARMcc_AL));
   6271          } else {
   6272             addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
   6273                                                 Ijk_Boring));
   6274          }
   6275          return;
   6276       }
   6277       default:
   6278          break;
   6279    }
   6280 
   6281    /* Case: assisted transfer to arbitrary address */
   6282    switch (jk) {
   6283       /* Keep this list in sync with that for Ist_Exit above */
   6284       case Ijk_ClientReq:
   6285       case Ijk_NoDecode:
   6286       case Ijk_NoRedir:
   6287       case Ijk_Sys_syscall:
   6288       case Ijk_InvalICache:
   6289       case Ijk_Yield:
   6290       {
   6291          HReg       r      = iselIntExpr_R(env, next);
   6292          ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
   6293          addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL, jk));
   6294          return;
   6295       }
   6296       default:
   6297          break;
   6298    }
   6299 
   6300    vex_printf( "\n-- PUT(%d) = ", offsIP);
   6301    ppIRExpr( next );
   6302    vex_printf( "; exit-");
   6303    ppIRJumpKind(jk);
   6304    vex_printf( "\n");
   6305    vassert(0); // are we expecting any other kind?
   6306 }
   6307 
   6308 
   6309 /*---------------------------------------------------------*/
   6310 /*--- Insn selector top-level                           ---*/
   6311 /*---------------------------------------------------------*/
   6312 
   6313 /* Translate an entire SB to arm code. */
   6314 
   6315 HInstrArray* iselSB_ARM ( IRSB* bb,
   6316                           VexArch      arch_host,
   6317                           VexArchInfo* archinfo_host,
   6318                           VexAbiInfo*  vbi/*UNUSED*/,
   6319                           Int offs_Host_EvC_Counter,
   6320                           Int offs_Host_EvC_FailAddr,
   6321                           Bool chainingAllowed,
   6322                           Bool addProfInc,
   6323                           Addr64 max_ga )
   6324 {
   6325    Int       i, j;
   6326    HReg      hreg, hregHI;
   6327    ISelEnv*  env;
   6328    UInt      hwcaps_host = archinfo_host->hwcaps;
   6329    ARMAMode1 *amCounter, *amFailAddr;
   6330 
   6331    /* sanity ... */
   6332    vassert(arch_host == VexArchARM);
   6333 
   6334    /* guard against unexpected space regressions */
   6335    vassert(sizeof(ARMInstr) <= 28);
   6336 
   6337    /* hwcaps should not change from one ISEL call to another. */
   6338    arm_hwcaps = hwcaps_host; // JRS 2012 Mar 31: FIXME (RM)
   6339 
   6340    /* Make up an initial environment to use. */
   6341    env = LibVEX_Alloc(sizeof(ISelEnv));
   6342    env->vreg_ctr = 0;
   6343 
   6344    /* Set up output code array. */
   6345    env->code = newHInstrArray();
   6346 
   6347    /* Copy BB's type env. */
   6348    env->type_env = bb->tyenv;
   6349 
   6350    /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
   6351       change as we go along. */
   6352    env->n_vregmap = bb->tyenv->types_used;
   6353    env->vregmap   = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
   6354    env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
   6355 
   6356    /* and finally ... */
   6357    env->chainingAllowed = chainingAllowed;
   6358    env->hwcaps          = hwcaps_host;
   6359    env->max_ga          = max_ga;
   6360 
   6361    /* For each IR temporary, allocate a suitably-kinded virtual
   6362       register. */
   6363    j = 0;
   6364    for (i = 0; i < env->n_vregmap; i++) {
   6365       hregHI = hreg = INVALID_HREG;
   6366       switch (bb->tyenv->types[i]) {
   6367          case Ity_I1:
   6368          case Ity_I8:
   6369          case Ity_I16:
   6370          case Ity_I32:  hreg   = mkHReg(j++, HRcInt32, True); break;
   6371          case Ity_I64:
   6372             if (hwcaps_host & VEX_HWCAPS_ARM_NEON) {
   6373                hreg = mkHReg(j++, HRcFlt64, True);
   6374             } else {
   6375                hregHI = mkHReg(j++, HRcInt32, True);
   6376                hreg   = mkHReg(j++, HRcInt32, True);
   6377             }
   6378             break;
   6379          case Ity_F32:  hreg   = mkHReg(j++, HRcFlt32, True); break;
   6380          case Ity_F64:  hreg   = mkHReg(j++, HRcFlt64, True); break;
   6381          case Ity_V128: hreg   = mkHReg(j++, HRcVec128, True); break;
   6382          default: ppIRType(bb->tyenv->types[i]);
   6383                   vpanic("iselBB: IRTemp type");
   6384       }
   6385       env->vregmap[i]   = hreg;
   6386       env->vregmapHI[i] = hregHI;
   6387    }
   6388    env->vreg_ctr = j;
   6389 
   6390    /* The very first instruction must be an event check. */
   6391    amCounter  = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_Counter);
   6392    amFailAddr = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_FailAddr);
   6393    addInstr(env, ARMInstr_EvCheck(amCounter, amFailAddr));
   6394 
   6395    /* Possibly a block counter increment (for profiling).  At this
   6396       point we don't know the address of the counter, so just pretend
   6397       it is zero.  It will have to be patched later, but before this
   6398       translation is used, by a call to LibVEX_patchProfCtr. */
   6399    if (addProfInc) {
   6400       addInstr(env, ARMInstr_ProfInc());
   6401    }
   6402 
   6403    /* Ok, finally we can iterate over the statements. */
   6404    for (i = 0; i < bb->stmts_used; i++)
   6405       iselStmt(env, bb->stmts[i]);
   6406 
   6407    iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
   6408 
   6409    /* record the number of vregs we used. */
   6410    env->code->n_vregs = env->vreg_ctr;
   6411    return env->code;
   6412 }
   6413 
   6414 
   6415 /*---------------------------------------------------------------*/
   6416 /*--- end                                     host_arm_isel.c ---*/
   6417 /*---------------------------------------------------------------*/
   6418