Home | History | Annotate | Download | only in priv
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- begin                                       guest_arm_toIR.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2013 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    NEON support is
     14    Copyright (C) 2010-2013 Samsung Electronics
     15    contributed by Dmitry Zhurikhin <zhur (at) ispras.ru>
     16               and Kirill Batuzov <batuzovk (at) ispras.ru>
     17 
     18    This program is free software; you can redistribute it and/or
     19    modify it under the terms of the GNU General Public License as
     20    published by the Free Software Foundation; either version 2 of the
     21    License, or (at your option) any later version.
     22 
     23    This program is distributed in the hope that it will be useful, but
     24    WITHOUT ANY WARRANTY; without even the implied warranty of
     25    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     26    General Public License for more details.
     27 
     28    You should have received a copy of the GNU General Public License
     29    along with this program; if not, write to the Free Software
     30    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     31    02110-1301, USA.
     32 
     33    The GNU General Public License is contained in the file COPYING.
     34 */
     35 
     36 /* XXXX thumb to check:
     37    that all cases where putIRegT writes r15, we generate a jump.
     38 
     39    All uses of newTemp assign to an IRTemp and not a UInt
     40 
     41    For all thumb loads and stores, including VFP ones, new-ITSTATE is
     42    backed out before the memory op, and restored afterwards.  This
     43    needs to happen even after we go uncond.  (and for sure it doesn't
     44    happen for VFP loads/stores right now).
     45 
     46    VFP on thumb: check that we exclude all r13/r15 cases that we
     47    should.
     48 
     49    XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
     50    taking into account the number of insns guarded by an IT.
     51 
     52    remove the nasty hack, in the spechelper, of looking for Or32(...,
     53    0xE0) in as the first arg to armg_calculate_condition, and instead
     54    use Slice44 as specified in comments in the spechelper.
     55 
     56    add specialisations for armg_calculate_flag_c and _v, as they
     57    are moderately often needed in Thumb code.
     58 
     59    Correctness: ITSTATE handling in Thumb SVCs is wrong.
     60 
     61    Correctness (obscure): in m_transtab, when invalidating code
     62    address ranges, invalidate up to 18 bytes after the end of the
     63    range.  This is because the ITSTATE optimisation at the top of
     64    _THUMB_WRK below analyses up to 18 bytes before the start of any
     65    given instruction, and so might depend on the invalidated area.
     66 */
     67 
     68 /* Limitations, etc
     69 
     70    - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD.
     71      These instructions are non-restartable in the case where the
     72      transfer(s) fault.
     73 
     74    - SWP: the restart jump back is Ijk_Boring; it should be
     75      Ijk_NoRedir but that's expensive.  See comments on casLE() in
     76      guest_x86_toIR.c.
     77 */
     78 
     79 /* "Special" instructions.
     80 
     81    This instruction decoder can decode four special instructions
     82    which mean nothing natively (are no-ops as far as regs/mem are
     83    concerned) but have meaning for supporting Valgrind.  A special
     84    instruction is flagged by a 16-byte preamble:
     85 
     86       E1A0C1EC E1A0C6EC E1A0CEEC E1A0C9EC
     87       (mov r12, r12, ROR #3;   mov r12, r12, ROR #13;
     88        mov r12, r12, ROR #29;  mov r12, r12, ROR #19)
     89 
     90    Following that, one of the following 3 are allowed
     91    (standard interpretation in parentheses):
     92 
     93       E18AA00A (orr r10,r10,r10)   R3 = client_request ( R4 )
     94       E18BB00B (orr r11,r11,r11)   R3 = guest_NRADDR
     95       E18CC00C (orr r12,r12,r12)   branch-and-link-to-noredir R4
     96       E1899009 (orr r9,r9,r9)      IR injection
     97 
     98    Any other bytes following the 16-byte preamble are illegal and
     99    constitute a failure in instruction decoding.  This all assumes
    100    that the preamble will never occur except in specific code
    101    fragments designed for Valgrind to catch.
    102 */
    103 
    104 /* Translates ARM(v5) code to IR. */
    105 
    106 #include "libvex_basictypes.h"
    107 #include "libvex_ir.h"
    108 #include "libvex.h"
    109 #include "libvex_guest_arm.h"
    110 
    111 #include "main_util.h"
    112 #include "main_globals.h"
    113 #include "guest_generic_bb_to_IR.h"
    114 #include "guest_arm_defs.h"
    115 
    116 
    117 /*------------------------------------------------------------*/
    118 /*--- Globals                                              ---*/
    119 /*------------------------------------------------------------*/
    120 
    121 /* These are set at the start of the translation of a instruction, so
    122    that we don't have to pass them around endlessly.  CONST means does
    123    not change during translation of the instruction.
    124 */
    125 
    126 /* CONST: is the host bigendian?  This has to do with float vs double
    127    register accesses on VFP, but it's complex and not properly thought
    128    out. */
    129 static Bool host_is_bigendian;
    130 
    131 /* CONST: The guest address for the instruction currently being
    132    translated.  This is the real, "decoded" address (not subject
    133    to the CPSR.T kludge). */
    134 static Addr32 guest_R15_curr_instr_notENC;
    135 
    136 /* CONST, FOR ASSERTIONS ONLY.  Indicates whether currently processed
    137    insn is Thumb (True) or ARM (False). */
    138 static Bool __curr_is_Thumb;
    139 
    140 /* MOD: The IRSB* into which we're generating code. */
    141 static IRSB* irsb;
    142 
    143 /* These are to do with handling writes to r15.  They are initially
    144    set at the start of disInstr_ARM_WRK to indicate no update,
    145    possibly updated during the routine, and examined again at the end.
    146    If they have been set to indicate a r15 update then a jump is
    147    generated.  Note, "explicit" jumps (b, bx, etc) are generated
    148    directly, not using this mechanism -- this is intended to handle
    149    the implicit-style jumps resulting from (eg) assigning to r15 as
    150    the result of insns we wouldn't normally consider branchy. */
    151 
    152 /* MOD.  Initially False; set to True iff abovementioned handling is
    153    required. */
    154 static Bool r15written;
    155 
    156 /* MOD.  Initially IRTemp_INVALID.  If the r15 branch to be generated
    157    is conditional, this holds the gating IRTemp :: Ity_I32.  If the
    158    branch to be generated is unconditional, this remains
    159    IRTemp_INVALID. */
    160 static IRTemp r15guard; /* :: Ity_I32, 0 or 1 */
    161 
    162 /* MOD.  Initially Ijk_Boring.  If an r15 branch is to be generated,
    163    this holds the jump kind. */
    164 static IRTemp r15kind;
    165 
    166 
    167 /*------------------------------------------------------------*/
    168 /*--- Debugging output                                     ---*/
    169 /*------------------------------------------------------------*/
    170 
    171 #define DIP(format, args...)           \
    172    if (vex_traceflags & VEX_TRACE_FE)  \
    173       vex_printf(format, ## args)
    174 
    175 #define DIS(buf, format, args...)      \
    176    if (vex_traceflags & VEX_TRACE_FE)  \
    177       vex_sprintf(buf, format, ## args)
    178 
    179 #define ASSERT_IS_THUMB \
    180    do { vassert(__curr_is_Thumb); } while (0)
    181 
    182 #define ASSERT_IS_ARM \
    183    do { vassert(! __curr_is_Thumb); } while (0)
    184 
    185 
    186 /*------------------------------------------------------------*/
    187 /*--- Helper bits and pieces for deconstructing the        ---*/
    188 /*--- arm insn stream.                                     ---*/
    189 /*------------------------------------------------------------*/
    190 
    191 /* Do a little-endian load of a 32-bit word, regardless of the
    192    endianness of the underlying host. */
    193 static inline UInt getUIntLittleEndianly ( UChar* p )
    194 {
    195    UInt w = 0;
    196    w = (w << 8) | p[3];
    197    w = (w << 8) | p[2];
    198    w = (w << 8) | p[1];
    199    w = (w << 8) | p[0];
    200    return w;
    201 }
    202 
    203 /* Do a little-endian load of a 16-bit word, regardless of the
    204    endianness of the underlying host. */
    205 static inline UShort getUShortLittleEndianly ( UChar* p )
    206 {
    207    UShort w = 0;
    208    w = (w << 8) | p[1];
    209    w = (w << 8) | p[0];
    210    return w;
    211 }
    212 
    213 static UInt ROR32 ( UInt x, UInt sh ) {
    214    vassert(sh >= 0 && sh < 32);
    215    if (sh == 0)
    216       return x;
    217    else
    218       return (x << (32-sh)) | (x >> sh);
    219 }
    220 
    221 static Int popcount32 ( UInt x )
    222 {
    223    Int res = 0, i;
    224    for (i = 0; i < 32; i++) {
    225       res += (x & 1);
    226       x >>= 1;
    227    }
    228    return res;
    229 }
    230 
    231 static UInt setbit32 ( UInt x, Int ix, UInt b )
    232 {
    233    UInt mask = 1 << ix;
    234    x &= ~mask;
    235    x |= ((b << ix) & mask);
    236    return x;
    237 }
    238 
    239 #define BITS2(_b1,_b0) \
    240    (((_b1) << 1) | (_b0))
    241 
    242 #define BITS3(_b2,_b1,_b0)                      \
    243   (((_b2) << 2) | ((_b1) << 1) | (_b0))
    244 
    245 #define BITS4(_b3,_b2,_b1,_b0) \
    246    (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
    247 
    248 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    249    ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
    250     | BITS4((_b3),(_b2),(_b1),(_b0)))
    251 
    252 #define BITS5(_b4,_b3,_b2,_b1,_b0)  \
    253    (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
    254 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0)  \
    255    (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    256 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    257    (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    258 
    259 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)      \
    260    (((_b8) << 8) \
    261     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    262 
    263 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    264    (((_b9) << 9) | ((_b8) << 8)                                \
    265     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    266 
    267 /* produces _uint[_bMax:_bMin] */
    268 #define SLICE_UInt(_uint,_bMax,_bMin) \
    269    (( ((UInt)(_uint)) >> (_bMin)) \
    270     & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
    271 
    272 
    273 /*------------------------------------------------------------*/
    274 /*--- Helper bits and pieces for creating IR fragments.    ---*/
    275 /*------------------------------------------------------------*/
    276 
    277 static IRExpr* mkU64 ( ULong i )
    278 {
    279    return IRExpr_Const(IRConst_U64(i));
    280 }
    281 
    282 static IRExpr* mkU32 ( UInt i )
    283 {
    284    return IRExpr_Const(IRConst_U32(i));
    285 }
    286 
    287 static IRExpr* mkU8 ( UInt i )
    288 {
    289    vassert(i < 256);
    290    return IRExpr_Const(IRConst_U8( (UChar)i ));
    291 }
    292 
    293 static IRExpr* mkexpr ( IRTemp tmp )
    294 {
    295    return IRExpr_RdTmp(tmp);
    296 }
    297 
    298 static IRExpr* unop ( IROp op, IRExpr* a )
    299 {
    300    return IRExpr_Unop(op, a);
    301 }
    302 
    303 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
    304 {
    305    return IRExpr_Binop(op, a1, a2);
    306 }
    307 
    308 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
    309 {
    310    return IRExpr_Triop(op, a1, a2, a3);
    311 }
    312 
    313 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
    314 {
    315    return IRExpr_Load(Iend_LE, ty, addr);
    316 }
    317 
    318 /* Add a statement to the list held by "irbb". */
    319 static void stmt ( IRStmt* st )
    320 {
    321    addStmtToIRSB( irsb, st );
    322 }
    323 
    324 static void assign ( IRTemp dst, IRExpr* e )
    325 {
    326    stmt( IRStmt_WrTmp(dst, e) );
    327 }
    328 
    329 static void storeLE ( IRExpr* addr, IRExpr* data )
    330 {
    331    stmt( IRStmt_Store(Iend_LE, addr, data) );
    332 }
    333 
    334 static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
    335 {
    336    if (guardT == IRTemp_INVALID) {
    337       /* unconditional */
    338       storeLE(addr, data);
    339    } else {
    340       stmt( IRStmt_StoreG(Iend_LE, addr, data,
    341                           binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
    342    }
    343 }
    344 
    345 static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
    346                             IRExpr* addr, IRExpr* alt,
    347                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
    348 {
    349    if (guardT == IRTemp_INVALID) {
    350       /* unconditional */
    351       IRExpr* loaded = NULL;
    352       switch (cvt) {
    353          case ILGop_Ident32:
    354             loaded = loadLE(Ity_I32, addr); break;
    355          case ILGop_8Uto32:
    356             loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
    357          case ILGop_8Sto32:
    358             loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
    359          case ILGop_16Uto32:
    360             loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
    361          case ILGop_16Sto32:
    362             loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
    363          default:
    364             vassert(0);
    365       }
    366       vassert(loaded != NULL);
    367       assign(dst, loaded);
    368    } else {
    369       /* Generate a guarded load into 'dst', but apply 'cvt' to the
    370          loaded data before putting the data in 'dst'.  If the load
    371          does not take place, 'alt' is placed directly in 'dst'. */
    372       stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
    373                          binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
    374    }
    375 }
    376 
    377 /* Generate a new temporary of the given type. */
    378 static IRTemp newTemp ( IRType ty )
    379 {
    380    vassert(isPlausibleIRType(ty));
    381    return newIRTemp( irsb->tyenv, ty );
    382 }
    383 
    384 /* Produces a value in 0 .. 3, which is encoded as per the type
    385    IRRoundingMode. */
    386 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
    387 {
    388    return mkU32(Irrm_NEAREST);
    389 }
    390 
    391 /* Generate an expression for SRC rotated right by ROT. */
    392 static IRExpr* genROR32( IRTemp src, Int rot )
    393 {
    394    vassert(rot >= 0 && rot < 32);
    395    if (rot == 0)
    396       return mkexpr(src);
    397    return
    398       binop(Iop_Or32,
    399             binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
    400             binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
    401 }
    402 
    403 static IRExpr* mkU128 ( ULong i )
    404 {
    405    return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
    406 }
    407 
    408 /* Generate a 4-aligned version of the given expression if
    409    the given condition is true.  Else return it unchanged. */
    410 static IRExpr* align4if ( IRExpr* e, Bool b )
    411 {
    412    if (b)
    413       return binop(Iop_And32, e, mkU32(~3));
    414    else
    415       return e;
    416 }
    417 
    418 
    419 /*------------------------------------------------------------*/
    420 /*--- Helpers for accessing guest registers.               ---*/
    421 /*------------------------------------------------------------*/
    422 
    423 #define OFFB_R0       offsetof(VexGuestARMState,guest_R0)
    424 #define OFFB_R1       offsetof(VexGuestARMState,guest_R1)
    425 #define OFFB_R2       offsetof(VexGuestARMState,guest_R2)
    426 #define OFFB_R3       offsetof(VexGuestARMState,guest_R3)
    427 #define OFFB_R4       offsetof(VexGuestARMState,guest_R4)
    428 #define OFFB_R5       offsetof(VexGuestARMState,guest_R5)
    429 #define OFFB_R6       offsetof(VexGuestARMState,guest_R6)
    430 #define OFFB_R7       offsetof(VexGuestARMState,guest_R7)
    431 #define OFFB_R8       offsetof(VexGuestARMState,guest_R8)
    432 #define OFFB_R9       offsetof(VexGuestARMState,guest_R9)
    433 #define OFFB_R10      offsetof(VexGuestARMState,guest_R10)
    434 #define OFFB_R11      offsetof(VexGuestARMState,guest_R11)
    435 #define OFFB_R12      offsetof(VexGuestARMState,guest_R12)
    436 #define OFFB_R13      offsetof(VexGuestARMState,guest_R13)
    437 #define OFFB_R14      offsetof(VexGuestARMState,guest_R14)
    438 #define OFFB_R15T     offsetof(VexGuestARMState,guest_R15T)
    439 
    440 #define OFFB_CC_OP    offsetof(VexGuestARMState,guest_CC_OP)
    441 #define OFFB_CC_DEP1  offsetof(VexGuestARMState,guest_CC_DEP1)
    442 #define OFFB_CC_DEP2  offsetof(VexGuestARMState,guest_CC_DEP2)
    443 #define OFFB_CC_NDEP  offsetof(VexGuestARMState,guest_CC_NDEP)
    444 #define OFFB_NRADDR   offsetof(VexGuestARMState,guest_NRADDR)
    445 
    446 #define OFFB_D0       offsetof(VexGuestARMState,guest_D0)
    447 #define OFFB_D1       offsetof(VexGuestARMState,guest_D1)
    448 #define OFFB_D2       offsetof(VexGuestARMState,guest_D2)
    449 #define OFFB_D3       offsetof(VexGuestARMState,guest_D3)
    450 #define OFFB_D4       offsetof(VexGuestARMState,guest_D4)
    451 #define OFFB_D5       offsetof(VexGuestARMState,guest_D5)
    452 #define OFFB_D6       offsetof(VexGuestARMState,guest_D6)
    453 #define OFFB_D7       offsetof(VexGuestARMState,guest_D7)
    454 #define OFFB_D8       offsetof(VexGuestARMState,guest_D8)
    455 #define OFFB_D9       offsetof(VexGuestARMState,guest_D9)
    456 #define OFFB_D10      offsetof(VexGuestARMState,guest_D10)
    457 #define OFFB_D11      offsetof(VexGuestARMState,guest_D11)
    458 #define OFFB_D12      offsetof(VexGuestARMState,guest_D12)
    459 #define OFFB_D13      offsetof(VexGuestARMState,guest_D13)
    460 #define OFFB_D14      offsetof(VexGuestARMState,guest_D14)
    461 #define OFFB_D15      offsetof(VexGuestARMState,guest_D15)
    462 #define OFFB_D16      offsetof(VexGuestARMState,guest_D16)
    463 #define OFFB_D17      offsetof(VexGuestARMState,guest_D17)
    464 #define OFFB_D18      offsetof(VexGuestARMState,guest_D18)
    465 #define OFFB_D19      offsetof(VexGuestARMState,guest_D19)
    466 #define OFFB_D20      offsetof(VexGuestARMState,guest_D20)
    467 #define OFFB_D21      offsetof(VexGuestARMState,guest_D21)
    468 #define OFFB_D22      offsetof(VexGuestARMState,guest_D22)
    469 #define OFFB_D23      offsetof(VexGuestARMState,guest_D23)
    470 #define OFFB_D24      offsetof(VexGuestARMState,guest_D24)
    471 #define OFFB_D25      offsetof(VexGuestARMState,guest_D25)
    472 #define OFFB_D26      offsetof(VexGuestARMState,guest_D26)
    473 #define OFFB_D27      offsetof(VexGuestARMState,guest_D27)
    474 #define OFFB_D28      offsetof(VexGuestARMState,guest_D28)
    475 #define OFFB_D29      offsetof(VexGuestARMState,guest_D29)
    476 #define OFFB_D30      offsetof(VexGuestARMState,guest_D30)
    477 #define OFFB_D31      offsetof(VexGuestARMState,guest_D31)
    478 
    479 #define OFFB_FPSCR    offsetof(VexGuestARMState,guest_FPSCR)
    480 #define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO)
    481 #define OFFB_ITSTATE  offsetof(VexGuestARMState,guest_ITSTATE)
    482 #define OFFB_QFLAG32  offsetof(VexGuestARMState,guest_QFLAG32)
    483 #define OFFB_GEFLAG0  offsetof(VexGuestARMState,guest_GEFLAG0)
    484 #define OFFB_GEFLAG1  offsetof(VexGuestARMState,guest_GEFLAG1)
    485 #define OFFB_GEFLAG2  offsetof(VexGuestARMState,guest_GEFLAG2)
    486 #define OFFB_GEFLAG3  offsetof(VexGuestARMState,guest_GEFLAG3)
    487 
    488 #define OFFB_CMSTART  offsetof(VexGuestARMState,guest_CMSTART)
    489 #define OFFB_CMLEN    offsetof(VexGuestARMState,guest_CMLEN)
    490 
    491 
    492 /* ---------------- Integer registers ---------------- */
    493 
    494 static Int integerGuestRegOffset ( UInt iregNo )
    495 {
    496    /* Do we care about endianness here?  We do if sub-parts of integer
    497       registers are accessed, but I don't think that ever happens on
    498       ARM. */
    499    switch (iregNo) {
    500       case 0:  return OFFB_R0;
    501       case 1:  return OFFB_R1;
    502       case 2:  return OFFB_R2;
    503       case 3:  return OFFB_R3;
    504       case 4:  return OFFB_R4;
    505       case 5:  return OFFB_R5;
    506       case 6:  return OFFB_R6;
    507       case 7:  return OFFB_R7;
    508       case 8:  return OFFB_R8;
    509       case 9:  return OFFB_R9;
    510       case 10: return OFFB_R10;
    511       case 11: return OFFB_R11;
    512       case 12: return OFFB_R12;
    513       case 13: return OFFB_R13;
    514       case 14: return OFFB_R14;
    515       case 15: return OFFB_R15T;
    516       default: vassert(0);
    517    }
    518 }
    519 
    520 /* Plain ("low level") read from a reg; no +8 offset magic for r15. */
    521 static IRExpr* llGetIReg ( UInt iregNo )
    522 {
    523    vassert(iregNo < 16);
    524    return IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
    525 }
    526 
    527 /* Architected read from a reg in ARM mode.  This automagically adds 8
    528    to all reads of r15. */
    529 static IRExpr* getIRegA ( UInt iregNo )
    530 {
    531    IRExpr* e;
    532    ASSERT_IS_ARM;
    533    vassert(iregNo < 16);
    534    if (iregNo == 15) {
    535       /* If asked for r15, don't read the guest state value, as that
    536          may not be up to date in the case where loop unrolling has
    537          happened, because the first insn's write to the block is
    538          omitted; hence in the 2nd and subsequent unrollings we don't
    539          have a correct value in guest r15.  Instead produce the
    540          constant that we know would be produced at this point. */
    541       vassert(0 == (guest_R15_curr_instr_notENC & 3));
    542       e = mkU32(guest_R15_curr_instr_notENC + 8);
    543    } else {
    544       e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
    545    }
    546    return e;
    547 }
    548 
    549 /* Architected read from a reg in Thumb mode.  This automagically adds
    550    4 to all reads of r15. */
    551 static IRExpr* getIRegT ( UInt iregNo )
    552 {
    553    IRExpr* e;
    554    ASSERT_IS_THUMB;
    555    vassert(iregNo < 16);
    556    if (iregNo == 15) {
    557       /* Ditto comment in getIReg. */
    558       vassert(0 == (guest_R15_curr_instr_notENC & 1));
    559       e = mkU32(guest_R15_curr_instr_notENC + 4);
    560    } else {
    561       e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
    562    }
    563    return e;
    564 }
    565 
    566 /* Plain ("low level") write to a reg; no jump or alignment magic for
    567    r15. */
    568 static void llPutIReg ( UInt iregNo, IRExpr* e )
    569 {
    570    vassert(iregNo < 16);
    571    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
    572    stmt( IRStmt_Put(integerGuestRegOffset(iregNo), e) );
    573 }
    574 
    575 /* Architected write to an integer register in ARM mode.  If it is to
    576    r15, record info so at the end of this insn's translation, a branch
    577    to it can be made.  Also handles conditional writes to the
    578    register: if guardT == IRTemp_INVALID then the write is
    579    unconditional.  If writing r15, also 4-align it. */
    580 static void putIRegA ( UInt       iregNo,
    581                        IRExpr*    e,
    582                        IRTemp     guardT /* :: Ity_I32, 0 or 1 */,
    583                        IRJumpKind jk /* if a jump is generated */ )
    584 {
    585    /* if writing r15, force e to be 4-aligned. */
    586    // INTERWORKING FIXME.  this needs to be relaxed so that
    587    // puts caused by LDMxx which load r15 interwork right.
    588    // but is no aligned too relaxed?
    589    //if (iregNo == 15)
    590    //   e = binop(Iop_And32, e, mkU32(~3));
    591    ASSERT_IS_ARM;
    592    /* So, generate either an unconditional or a conditional write to
    593       the reg. */
    594    if (guardT == IRTemp_INVALID) {
    595       /* unconditional write */
    596       llPutIReg( iregNo, e );
    597    } else {
    598       llPutIReg( iregNo,
    599                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    600                              e, llGetIReg(iregNo) ));
    601    }
    602    if (iregNo == 15) {
    603       // assert against competing r15 updates.  Shouldn't
    604       // happen; should be ruled out by the instr matching
    605       // logic.
    606       vassert(r15written == False);
    607       vassert(r15guard   == IRTemp_INVALID);
    608       vassert(r15kind    == Ijk_Boring);
    609       r15written = True;
    610       r15guard   = guardT;
    611       r15kind    = jk;
    612    }
    613 }
    614 
    615 
    616 /* Architected write to an integer register in Thumb mode.  Writes to
    617    r15 are not allowed.  Handles conditional writes to the register:
    618    if guardT == IRTemp_INVALID then the write is unconditional. */
    619 static void putIRegT ( UInt       iregNo,
    620                        IRExpr*    e,
    621                        IRTemp     guardT /* :: Ity_I32, 0 or 1 */ )
    622 {
    623    /* So, generate either an unconditional or a conditional write to
    624       the reg. */
    625    ASSERT_IS_THUMB;
    626    vassert(iregNo >= 0 && iregNo <= 14);
    627    if (guardT == IRTemp_INVALID) {
    628       /* unconditional write */
    629       llPutIReg( iregNo, e );
    630    } else {
    631       llPutIReg( iregNo,
    632                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    633                              e, llGetIReg(iregNo) ));
    634    }
    635 }
    636 
    637 
    638 /* Thumb16 and Thumb32 only.
    639    Returns true if reg is 13 or 15.  Implements the BadReg
    640    predicate in the ARM ARM. */
    641 static Bool isBadRegT ( UInt r )
    642 {
    643    vassert(r <= 15);
    644    ASSERT_IS_THUMB;
    645    return r == 13 || r == 15;
    646 }
    647 
    648 
    649 /* ---------------- Double registers ---------------- */
    650 
    651 static Int doubleGuestRegOffset ( UInt dregNo )
    652 {
    653    /* Do we care about endianness here?  Probably do if we ever get
    654       into the situation of dealing with the single-precision VFP
    655       registers. */
    656    switch (dregNo) {
    657       case 0:  return OFFB_D0;
    658       case 1:  return OFFB_D1;
    659       case 2:  return OFFB_D2;
    660       case 3:  return OFFB_D3;
    661       case 4:  return OFFB_D4;
    662       case 5:  return OFFB_D5;
    663       case 6:  return OFFB_D6;
    664       case 7:  return OFFB_D7;
    665       case 8:  return OFFB_D8;
    666       case 9:  return OFFB_D9;
    667       case 10: return OFFB_D10;
    668       case 11: return OFFB_D11;
    669       case 12: return OFFB_D12;
    670       case 13: return OFFB_D13;
    671       case 14: return OFFB_D14;
    672       case 15: return OFFB_D15;
    673       case 16: return OFFB_D16;
    674       case 17: return OFFB_D17;
    675       case 18: return OFFB_D18;
    676       case 19: return OFFB_D19;
    677       case 20: return OFFB_D20;
    678       case 21: return OFFB_D21;
    679       case 22: return OFFB_D22;
    680       case 23: return OFFB_D23;
    681       case 24: return OFFB_D24;
    682       case 25: return OFFB_D25;
    683       case 26: return OFFB_D26;
    684       case 27: return OFFB_D27;
    685       case 28: return OFFB_D28;
    686       case 29: return OFFB_D29;
    687       case 30: return OFFB_D30;
    688       case 31: return OFFB_D31;
    689       default: vassert(0);
    690    }
    691 }
    692 
    693 /* Plain ("low level") read from a VFP Dreg. */
    694 static IRExpr* llGetDReg ( UInt dregNo )
    695 {
    696    vassert(dregNo < 32);
    697    return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_F64 );
    698 }
    699 
    700 /* Architected read from a VFP Dreg. */
    701 static IRExpr* getDReg ( UInt dregNo ) {
    702    return llGetDReg( dregNo );
    703 }
    704 
    705 /* Plain ("low level") write to a VFP Dreg. */
    706 static void llPutDReg ( UInt dregNo, IRExpr* e )
    707 {
    708    vassert(dregNo < 32);
    709    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64);
    710    stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
    711 }
    712 
    713 /* Architected write to a VFP Dreg.  Handles conditional writes to the
    714    register: if guardT == IRTemp_INVALID then the write is
    715    unconditional. */
    716 static void putDReg ( UInt    dregNo,
    717                       IRExpr* e,
    718                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    719 {
    720    /* So, generate either an unconditional or a conditional write to
    721       the reg. */
    722    if (guardT == IRTemp_INVALID) {
    723       /* unconditional write */
    724       llPutDReg( dregNo, e );
    725    } else {
    726       llPutDReg( dregNo,
    727                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    728                              e, llGetDReg(dregNo) ));
    729    }
    730 }
    731 
    732 /* And now exactly the same stuff all over again, but this time
    733    taking/returning I64 rather than F64, to support 64-bit Neon
    734    ops. */
    735 
    736 /* Plain ("low level") read from a Neon Integer Dreg. */
    737 static IRExpr* llGetDRegI64 ( UInt dregNo )
    738 {
    739    vassert(dregNo < 32);
    740    return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_I64 );
    741 }
    742 
    743 /* Architected read from a Neon Integer Dreg. */
    744 static IRExpr* getDRegI64 ( UInt dregNo ) {
    745    return llGetDRegI64( dregNo );
    746 }
    747 
    748 /* Plain ("low level") write to a Neon Integer Dreg. */
    749 static void llPutDRegI64 ( UInt dregNo, IRExpr* e )
    750 {
    751    vassert(dregNo < 32);
    752    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
    753    stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
    754 }
    755 
    756 /* Architected write to a Neon Integer Dreg.  Handles conditional
    757    writes to the register: if guardT == IRTemp_INVALID then the write
    758    is unconditional. */
    759 static void putDRegI64 ( UInt    dregNo,
    760                          IRExpr* e,
    761                          IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    762 {
    763    /* So, generate either an unconditional or a conditional write to
    764       the reg. */
    765    if (guardT == IRTemp_INVALID) {
    766       /* unconditional write */
    767       llPutDRegI64( dregNo, e );
    768    } else {
    769       llPutDRegI64( dregNo,
    770                     IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    771                                 e, llGetDRegI64(dregNo) ));
    772    }
    773 }
    774 
    775 /* ---------------- Quad registers ---------------- */
    776 
    777 static Int quadGuestRegOffset ( UInt qregNo )
    778 {
    779    /* Do we care about endianness here?  Probably do if we ever get
    780       into the situation of dealing with the 64 bit Neon registers. */
    781    switch (qregNo) {
    782       case 0:  return OFFB_D0;
    783       case 1:  return OFFB_D2;
    784       case 2:  return OFFB_D4;
    785       case 3:  return OFFB_D6;
    786       case 4:  return OFFB_D8;
    787       case 5:  return OFFB_D10;
    788       case 6:  return OFFB_D12;
    789       case 7:  return OFFB_D14;
    790       case 8:  return OFFB_D16;
    791       case 9:  return OFFB_D18;
    792       case 10: return OFFB_D20;
    793       case 11: return OFFB_D22;
    794       case 12: return OFFB_D24;
    795       case 13: return OFFB_D26;
    796       case 14: return OFFB_D28;
    797       case 15: return OFFB_D30;
    798       default: vassert(0);
    799    }
    800 }
    801 
    802 /* Plain ("low level") read from a Neon Qreg. */
    803 static IRExpr* llGetQReg ( UInt qregNo )
    804 {
    805    vassert(qregNo < 16);
    806    return IRExpr_Get( quadGuestRegOffset(qregNo), Ity_V128 );
    807 }
    808 
    809 /* Architected read from a Neon Qreg. */
    810 static IRExpr* getQReg ( UInt qregNo ) {
    811    return llGetQReg( qregNo );
    812 }
    813 
    814 /* Plain ("low level") write to a Neon Qreg. */
    815 static void llPutQReg ( UInt qregNo, IRExpr* e )
    816 {
    817    vassert(qregNo < 16);
    818    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
    819    stmt( IRStmt_Put(quadGuestRegOffset(qregNo), e) );
    820 }
    821 
    822 /* Architected write to a Neon Qreg.  Handles conditional writes to the
    823    register: if guardT == IRTemp_INVALID then the write is
    824    unconditional. */
    825 static void putQReg ( UInt    qregNo,
    826                       IRExpr* e,
    827                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    828 {
    829    /* So, generate either an unconditional or a conditional write to
    830       the reg. */
    831    if (guardT == IRTemp_INVALID) {
    832       /* unconditional write */
    833       llPutQReg( qregNo, e );
    834    } else {
    835       llPutQReg( qregNo,
    836                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    837                              e, llGetQReg(qregNo) ));
    838    }
    839 }
    840 
    841 
    842 /* ---------------- Float registers ---------------- */
    843 
    844 static Int floatGuestRegOffset ( UInt fregNo )
    845 {
    846    /* Start with the offset of the containing double, and then correct
    847       for endianness.  Actually this is completely bogus and needs
    848       careful thought. */
    849    Int off;
    850    vassert(fregNo < 32);
    851    off = doubleGuestRegOffset(fregNo >> 1);
    852    if (host_is_bigendian) {
    853       vassert(0);
    854    } else {
    855       if (fregNo & 1)
    856          off += 4;
    857    }
    858    return off;
    859 }
    860 
    861 /* Plain ("low level") read from a VFP Freg. */
    862 static IRExpr* llGetFReg ( UInt fregNo )
    863 {
    864    vassert(fregNo < 32);
    865    return IRExpr_Get( floatGuestRegOffset(fregNo), Ity_F32 );
    866 }
    867 
    868 /* Architected read from a VFP Freg. */
    869 static IRExpr* getFReg ( UInt fregNo ) {
    870    return llGetFReg( fregNo );
    871 }
    872 
    873 /* Plain ("low level") write to a VFP Freg. */
    874 static void llPutFReg ( UInt fregNo, IRExpr* e )
    875 {
    876    vassert(fregNo < 32);
    877    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32);
    878    stmt( IRStmt_Put(floatGuestRegOffset(fregNo), e) );
    879 }
    880 
    881 /* Architected write to a VFP Freg.  Handles conditional writes to the
    882    register: if guardT == IRTemp_INVALID then the write is
    883    unconditional. */
    884 static void putFReg ( UInt    fregNo,
    885                       IRExpr* e,
    886                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    887 {
    888    /* So, generate either an unconditional or a conditional write to
    889       the reg. */
    890    if (guardT == IRTemp_INVALID) {
    891       /* unconditional write */
    892       llPutFReg( fregNo, e );
    893    } else {
    894       llPutFReg( fregNo,
    895                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    896                              e, llGetFReg(fregNo) ));
    897    }
    898 }
    899 
    900 
    901 /* ---------------- Misc registers ---------------- */
    902 
    903 static void putMiscReg32 ( UInt    gsoffset,
    904                            IRExpr* e, /* :: Ity_I32 */
    905                            IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    906 {
    907    switch (gsoffset) {
    908       case OFFB_FPSCR:   break;
    909       case OFFB_QFLAG32: break;
    910       case OFFB_GEFLAG0: break;
    911       case OFFB_GEFLAG1: break;
    912       case OFFB_GEFLAG2: break;
    913       case OFFB_GEFLAG3: break;
    914       default: vassert(0); /* awaiting more cases */
    915    }
    916    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
    917 
    918    if (guardT == IRTemp_INVALID) {
    919       /* unconditional write */
    920       stmt(IRStmt_Put(gsoffset, e));
    921    } else {
    922       stmt(IRStmt_Put(
    923          gsoffset,
    924          IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    925                      e, IRExpr_Get(gsoffset, Ity_I32) )
    926       ));
    927    }
    928 }
    929 
    930 static IRTemp get_ITSTATE ( void )
    931 {
    932    ASSERT_IS_THUMB;
    933    IRTemp t = newTemp(Ity_I32);
    934    assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
    935    return t;
    936 }
    937 
    938 static void put_ITSTATE ( IRTemp t )
    939 {
    940    ASSERT_IS_THUMB;
    941    stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
    942 }
    943 
    944 static IRTemp get_QFLAG32 ( void )
    945 {
    946    IRTemp t = newTemp(Ity_I32);
    947    assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
    948    return t;
    949 }
    950 
    951 static void put_QFLAG32 ( IRTemp t, IRTemp condT )
    952 {
    953    putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
    954 }
    955 
    956 /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
    957    Status Register) to indicate that overflow or saturation occurred.
    958    Nb: t must be zero to denote no saturation, and any nonzero
    959    value to indicate saturation. */
    960 static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
    961 {
    962    IRTemp old = get_QFLAG32();
    963    IRTemp nyu = newTemp(Ity_I32);
    964    assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
    965    put_QFLAG32(nyu, condT);
    966 }
    967 
    968 /* Generate code to set APSR.GE[flagNo]. Each fn call sets 1 bit.
    969    flagNo: which flag bit to set [3...0]
    970    lowbits_to_ignore:  0 = look at all 32 bits
    971                        8 = look at top 24 bits only
    972                       16 = look at top 16 bits only
    973                       31 = look at the top bit only
    974    e: input value to be evaluated.
    975    The new value is taken from 'e' with the lowest 'lowbits_to_ignore'
    976    masked out.  If the resulting value is zero then the GE flag is
    977    set to 0; any other value sets the flag to 1. */
    978 static void put_GEFLAG32 ( Int flagNo,            /* 0, 1, 2 or 3 */
    979                            Int lowbits_to_ignore, /* 0, 8, 16 or 31   */
    980                            IRExpr* e,             /* Ity_I32 */
    981                            IRTemp condT )
    982 {
    983    vassert( flagNo >= 0 && flagNo <= 3 );
    984    vassert( lowbits_to_ignore == 0  ||
    985             lowbits_to_ignore == 8  ||
    986             lowbits_to_ignore == 16 ||
    987             lowbits_to_ignore == 31 );
    988    IRTemp masked = newTemp(Ity_I32);
    989    assign(masked, binop(Iop_Shr32, e, mkU8(lowbits_to_ignore)));
    990 
    991    switch (flagNo) {
    992       case 0: putMiscReg32(OFFB_GEFLAG0, mkexpr(masked), condT); break;
    993       case 1: putMiscReg32(OFFB_GEFLAG1, mkexpr(masked), condT); break;
    994       case 2: putMiscReg32(OFFB_GEFLAG2, mkexpr(masked), condT); break;
    995       case 3: putMiscReg32(OFFB_GEFLAG3, mkexpr(masked), condT); break;
    996       default: vassert(0);
    997    }
    998 }
    999 
   1000 /* Return the (32-bit, zero-or-nonzero representation scheme) of
   1001    the specified GE flag. */
   1002 static IRExpr* get_GEFLAG32( Int flagNo /* 0, 1, 2, 3 */ )
   1003 {
   1004    switch (flagNo) {
   1005       case 0: return IRExpr_Get( OFFB_GEFLAG0, Ity_I32 );
   1006       case 1: return IRExpr_Get( OFFB_GEFLAG1, Ity_I32 );
   1007       case 2: return IRExpr_Get( OFFB_GEFLAG2, Ity_I32 );
   1008       case 3: return IRExpr_Get( OFFB_GEFLAG3, Ity_I32 );
   1009       default: vassert(0);
   1010    }
   1011 }
   1012 
   1013 /* Set all 4 GE flags from the given 32-bit value as follows: GE 3 and
   1014    2 are set from bit 31 of the value, and GE 1 and 0 are set from bit
   1015    15 of the value.  All other bits are ignored. */
   1016 static void set_GE_32_10_from_bits_31_15 ( IRTemp t32, IRTemp condT )
   1017 {
   1018    IRTemp ge10 = newTemp(Ity_I32);
   1019    IRTemp ge32 = newTemp(Ity_I32);
   1020    assign(ge10, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
   1021    assign(ge32, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
   1022    put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
   1023    put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
   1024    put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
   1025    put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
   1026 }
   1027 
   1028 
   1029 /* Set all 4 GE flags from the given 32-bit value as follows: GE 3
   1030    from bit 31, GE 2 from bit 23, GE 1 from bit 15, and GE0 from
   1031    bit 7.  All other bits are ignored. */
   1032 static void set_GE_3_2_1_0_from_bits_31_23_15_7 ( IRTemp t32, IRTemp condT )
   1033 {
   1034    IRTemp ge0 = newTemp(Ity_I32);
   1035    IRTemp ge1 = newTemp(Ity_I32);
   1036    IRTemp ge2 = newTemp(Ity_I32);
   1037    IRTemp ge3 = newTemp(Ity_I32);
   1038    assign(ge0, binop(Iop_And32, mkexpr(t32), mkU32(0x00000080)));
   1039    assign(ge1, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
   1040    assign(ge2, binop(Iop_And32, mkexpr(t32), mkU32(0x00800000)));
   1041    assign(ge3, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
   1042    put_GEFLAG32( 0, 0, mkexpr(ge0), condT );
   1043    put_GEFLAG32( 1, 0, mkexpr(ge1), condT );
   1044    put_GEFLAG32( 2, 0, mkexpr(ge2), condT );
   1045    put_GEFLAG32( 3, 0, mkexpr(ge3), condT );
   1046 }
   1047 
   1048 
   1049 /* ---------------- FPSCR stuff ---------------- */
   1050 
   1051 /* Generate IR to get hold of the rounding mode bits in FPSCR, and
   1052    convert them to IR format.  Bind the final result to the
   1053    returned temp. */
   1054 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
   1055 {
   1056    /* The ARMvfp encoding for rounding mode bits is:
   1057          00  to nearest
   1058          01  to +infinity
   1059          10  to -infinity
   1060          11  to zero
   1061       We need to convert that to the IR encoding:
   1062          00  to nearest (the default)
   1063          10  to +infinity
   1064          01  to -infinity
   1065          11  to zero
   1066       Which can be done by swapping bits 0 and 1.
   1067       The rmode bits are at 23:22 in FPSCR.
   1068    */
   1069    IRTemp armEncd = newTemp(Ity_I32);
   1070    IRTemp swapped = newTemp(Ity_I32);
   1071    /* Fish FPSCR[23:22] out, and slide to bottom.  Doesn't matter that
   1072       we don't zero out bits 24 and above, since the assignment to
   1073       'swapped' will mask them out anyway. */
   1074    assign(armEncd,
   1075           binop(Iop_Shr32, IRExpr_Get(OFFB_FPSCR, Ity_I32), mkU8(22)));
   1076    /* Now swap them. */
   1077    assign(swapped,
   1078           binop(Iop_Or32,
   1079                 binop(Iop_And32,
   1080                       binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
   1081                       mkU32(2)),
   1082                 binop(Iop_And32,
   1083                       binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
   1084                       mkU32(1))
   1085          ));
   1086    return swapped;
   1087 }
   1088 
   1089 
   1090 /*------------------------------------------------------------*/
   1091 /*--- Helpers for flag handling and conditional insns      ---*/
   1092 /*------------------------------------------------------------*/
   1093 
   1094 static const HChar* name_ARMCondcode ( ARMCondcode cond )
   1095 {
   1096    switch (cond) {
   1097       case ARMCondEQ:  return "{eq}";
   1098       case ARMCondNE:  return "{ne}";
   1099       case ARMCondHS:  return "{hs}";  // or 'cs'
   1100       case ARMCondLO:  return "{lo}";  // or 'cc'
   1101       case ARMCondMI:  return "{mi}";
   1102       case ARMCondPL:  return "{pl}";
   1103       case ARMCondVS:  return "{vs}";
   1104       case ARMCondVC:  return "{vc}";
   1105       case ARMCondHI:  return "{hi}";
   1106       case ARMCondLS:  return "{ls}";
   1107       case ARMCondGE:  return "{ge}";
   1108       case ARMCondLT:  return "{lt}";
   1109       case ARMCondGT:  return "{gt}";
   1110       case ARMCondLE:  return "{le}";
   1111       case ARMCondAL:  return ""; // {al}: is the default
   1112       case ARMCondNV:  return "{nv}";
   1113       default: vpanic("name_ARMCondcode");
   1114    }
   1115 }
   1116 /* and a handy shorthand for it */
   1117 static const HChar* nCC ( ARMCondcode cond ) {
   1118    return name_ARMCondcode(cond);
   1119 }
   1120 
   1121 
   1122 /* Build IR to calculate some particular condition from stored
   1123    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
   1124    Ity_I32, suitable for narrowing.  Although the return type is
   1125    Ity_I32, the returned value is either 0 or 1.  'cond' must be
   1126    :: Ity_I32 and must denote the condition to compute in
   1127    bits 7:4, and be zero everywhere else.
   1128 */
   1129 static IRExpr* mk_armg_calculate_condition_dyn ( IRExpr* cond )
   1130 {
   1131    vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I32);
   1132    /* And 'cond' had better produce a value in which only bits 7:4 are
   1133       nonzero.  However, obviously we can't assert for that. */
   1134 
   1135    /* So what we're constructing for the first argument is
   1136       "(cond << 4) | stored-operation".
   1137       However, as per comments above, 'cond' must be supplied
   1138       pre-shifted to this function.
   1139 
   1140       This pairing scheme requires that the ARM_CC_OP_ values all fit
   1141       in 4 bits.  Hence we are passing a (COND, OP) pair in the lowest
   1142       8 bits of the first argument. */
   1143    IRExpr** args
   1144       = mkIRExprVec_4(
   1145            binop(Iop_Or32, IRExpr_Get(OFFB_CC_OP, Ity_I32), cond),
   1146            IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1147            IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1148            IRExpr_Get(OFFB_CC_NDEP, Ity_I32)
   1149         );
   1150    IRExpr* call
   1151       = mkIRExprCCall(
   1152            Ity_I32,
   1153            0/*regparm*/,
   1154            "armg_calculate_condition", &armg_calculate_condition,
   1155            args
   1156         );
   1157 
   1158    /* Exclude the requested condition, OP and NDEP from definedness
   1159       checking.  We're only interested in DEP1 and DEP2. */
   1160    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1161    return call;
   1162 }
   1163 
   1164 
   1165 /* Build IR to calculate some particular condition from stored
   1166    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
   1167    Ity_I32, suitable for narrowing.  Although the return type is
   1168    Ity_I32, the returned value is either 0 or 1.
   1169 */
   1170 static IRExpr* mk_armg_calculate_condition ( ARMCondcode cond )
   1171 {
   1172   /* First arg is "(cond << 4) | condition".  This requires that the
   1173      ARM_CC_OP_ values all fit in 4 bits.  Hence we are passing a
   1174      (COND, OP) pair in the lowest 8 bits of the first argument. */
   1175    vassert(cond >= 0 && cond <= 15);
   1176    return mk_armg_calculate_condition_dyn( mkU32(cond << 4) );
   1177 }
   1178 
   1179 
   1180 /* Build IR to calculate just the carry flag from stored
   1181    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
   1182    Ity_I32. */
   1183 static IRExpr* mk_armg_calculate_flag_c ( void )
   1184 {
   1185    IRExpr** args
   1186       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
   1187                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1188                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1189                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
   1190    IRExpr* call
   1191       = mkIRExprCCall(
   1192            Ity_I32,
   1193            0/*regparm*/,
   1194            "armg_calculate_flag_c", &armg_calculate_flag_c,
   1195            args
   1196         );
   1197    /* Exclude OP and NDEP from definedness checking.  We're only
   1198       interested in DEP1 and DEP2. */
   1199    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1200    return call;
   1201 }
   1202 
   1203 
   1204 /* Build IR to calculate just the overflow flag from stored
   1205    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
   1206    Ity_I32. */
   1207 static IRExpr* mk_armg_calculate_flag_v ( void )
   1208 {
   1209    IRExpr** args
   1210       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
   1211                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1212                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1213                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
   1214    IRExpr* call
   1215       = mkIRExprCCall(
   1216            Ity_I32,
   1217            0/*regparm*/,
   1218            "armg_calculate_flag_v", &armg_calculate_flag_v,
   1219            args
   1220         );
   1221    /* Exclude OP and NDEP from definedness checking.  We're only
   1222       interested in DEP1 and DEP2. */
   1223    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1224    return call;
   1225 }
   1226 
   1227 
   1228 /* Build IR to calculate N Z C V in bits 31:28 of the
   1229    returned word. */
   1230 static IRExpr* mk_armg_calculate_flags_nzcv ( void )
   1231 {
   1232    IRExpr** args
   1233       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
   1234                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1235                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1236                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
   1237    IRExpr* call
   1238       = mkIRExprCCall(
   1239            Ity_I32,
   1240            0/*regparm*/,
   1241            "armg_calculate_flags_nzcv", &armg_calculate_flags_nzcv,
   1242            args
   1243         );
   1244    /* Exclude OP and NDEP from definedness checking.  We're only
   1245       interested in DEP1 and DEP2. */
   1246    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1247    return call;
   1248 }
   1249 
   1250 static IRExpr* mk_armg_calculate_flag_qc ( IRExpr* resL, IRExpr* resR, Bool Q )
   1251 {
   1252    IRExpr** args1;
   1253    IRExpr** args2;
   1254    IRExpr *call1, *call2, *res;
   1255 
   1256    if (Q) {
   1257       args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(0)),
   1258                               binop(Iop_GetElem32x4, resL, mkU8(1)),
   1259                               binop(Iop_GetElem32x4, resR, mkU8(0)),
   1260                               binop(Iop_GetElem32x4, resR, mkU8(1)) );
   1261       args2 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(2)),
   1262                               binop(Iop_GetElem32x4, resL, mkU8(3)),
   1263                               binop(Iop_GetElem32x4, resR, mkU8(2)),
   1264                               binop(Iop_GetElem32x4, resR, mkU8(3)) );
   1265    } else {
   1266       args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x2, resL, mkU8(0)),
   1267                               binop(Iop_GetElem32x2, resL, mkU8(1)),
   1268                               binop(Iop_GetElem32x2, resR, mkU8(0)),
   1269                               binop(Iop_GetElem32x2, resR, mkU8(1)) );
   1270    }
   1271 
   1272    call1 = mkIRExprCCall(
   1273              Ity_I32,
   1274              0/*regparm*/,
   1275              "armg_calculate_flag_qc", &armg_calculate_flag_qc,
   1276              args1
   1277           );
   1278    if (Q) {
   1279       call2 = mkIRExprCCall(
   1280                 Ity_I32,
   1281                 0/*regparm*/,
   1282                 "armg_calculate_flag_qc", &armg_calculate_flag_qc,
   1283                 args2
   1284              );
   1285    }
   1286    if (Q) {
   1287       res = binop(Iop_Or32, call1, call2);
   1288    } else {
   1289       res = call1;
   1290    }
   1291    return res;
   1292 }
   1293 
   1294 // FIXME: this is named wrongly .. looks like a sticky set of
   1295 // QC, not a write to it.
   1296 static void setFlag_QC ( IRExpr* resL, IRExpr* resR, Bool Q,
   1297                          IRTemp condT )
   1298 {
   1299    putMiscReg32 (OFFB_FPSCR,
   1300                  binop(Iop_Or32,
   1301                        IRExpr_Get(OFFB_FPSCR, Ity_I32),
   1302                        binop(Iop_Shl32,
   1303                              mk_armg_calculate_flag_qc(resL, resR, Q),
   1304                              mkU8(27))),
   1305                  condT);
   1306 }
   1307 
   1308 /* Build IR to conditionally set the flags thunk.  As with putIReg, if
   1309    guard is IRTemp_INVALID then it's unconditional, else it holds a
   1310    condition :: Ity_I32. */
   1311 static
   1312 void setFlags_D1_D2_ND ( UInt cc_op, IRTemp t_dep1,
   1313                          IRTemp t_dep2, IRTemp t_ndep,
   1314                          IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   1315 {
   1316    vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I32));
   1317    vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I32));
   1318    vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I32));
   1319    vassert(cc_op >= ARMG_CC_OP_COPY && cc_op < ARMG_CC_OP_NUMBER);
   1320    if (guardT == IRTemp_INVALID) {
   1321       /* unconditional */
   1322       stmt( IRStmt_Put( OFFB_CC_OP,   mkU32(cc_op) ));
   1323       stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
   1324       stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
   1325       stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
   1326    } else {
   1327       /* conditional */
   1328       IRTemp c1 = newTemp(Ity_I1);
   1329       assign( c1, binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)) );
   1330       stmt( IRStmt_Put(
   1331                OFFB_CC_OP,
   1332                IRExpr_ITE( mkexpr(c1),
   1333                            mkU32(cc_op),
   1334                            IRExpr_Get(OFFB_CC_OP, Ity_I32) ) ));
   1335       stmt( IRStmt_Put(
   1336                OFFB_CC_DEP1,
   1337                IRExpr_ITE( mkexpr(c1),
   1338                            mkexpr(t_dep1),
   1339                            IRExpr_Get(OFFB_CC_DEP1, Ity_I32) ) ));
   1340       stmt( IRStmt_Put(
   1341                OFFB_CC_DEP2,
   1342                IRExpr_ITE( mkexpr(c1),
   1343                            mkexpr(t_dep2),
   1344                            IRExpr_Get(OFFB_CC_DEP2, Ity_I32) ) ));
   1345       stmt( IRStmt_Put(
   1346                OFFB_CC_NDEP,
   1347                IRExpr_ITE( mkexpr(c1),
   1348                            mkexpr(t_ndep),
   1349                            IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ) ));
   1350    }
   1351 }
   1352 
   1353 
   1354 /* Minor variant of the above that sets NDEP to zero (if it
   1355    sets it at all) */
   1356 static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
   1357                              IRTemp t_dep2,
   1358                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   1359 {
   1360    IRTemp z32 = newTemp(Ity_I32);
   1361    assign( z32, mkU32(0) );
   1362    setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
   1363 }
   1364 
   1365 
   1366 /* Minor variant of the above that sets DEP2 to zero (if it
   1367    sets it at all) */
   1368 static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
   1369                              IRTemp t_ndep,
   1370                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   1371 {
   1372    IRTemp z32 = newTemp(Ity_I32);
   1373    assign( z32, mkU32(0) );
   1374    setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
   1375 }
   1376 
   1377 
   1378 /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
   1379    sets them at all) */
   1380 static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
   1381                           IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   1382 {
   1383    IRTemp z32 = newTemp(Ity_I32);
   1384    assign( z32, mkU32(0) );
   1385    setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
   1386 }
   1387 
   1388 
   1389 /* ARM only */
   1390 /* Generate a side-exit to the next instruction, if the given guard
   1391    expression :: Ity_I32 is 0 (note!  the side exit is taken if the
   1392    condition is false!)  This is used to skip over conditional
   1393    instructions which we can't generate straight-line code for, either
   1394    because they are too complex or (more likely) they potentially
   1395    generate exceptions.
   1396 */
   1397 static void mk_skip_over_A32_if_cond_is_false (
   1398                IRTemp guardT /* :: Ity_I32, 0 or 1 */
   1399             )
   1400 {
   1401    ASSERT_IS_ARM;
   1402    vassert(guardT != IRTemp_INVALID);
   1403    vassert(0 == (guest_R15_curr_instr_notENC & 3));
   1404    stmt( IRStmt_Exit(
   1405             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
   1406             Ijk_Boring,
   1407             IRConst_U32(toUInt(guest_R15_curr_instr_notENC + 4)),
   1408             OFFB_R15T
   1409        ));
   1410 }
   1411 
   1412 /* Thumb16 only */
   1413 /* ditto, but jump over a 16-bit thumb insn */
   1414 static void mk_skip_over_T16_if_cond_is_false (
   1415                IRTemp guardT /* :: Ity_I32, 0 or 1 */
   1416             )
   1417 {
   1418    ASSERT_IS_THUMB;
   1419    vassert(guardT != IRTemp_INVALID);
   1420    vassert(0 == (guest_R15_curr_instr_notENC & 1));
   1421    stmt( IRStmt_Exit(
   1422             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
   1423             Ijk_Boring,
   1424             IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 2) | 1)),
   1425             OFFB_R15T
   1426        ));
   1427 }
   1428 
   1429 
   1430 /* Thumb32 only */
   1431 /* ditto, but jump over a 32-bit thumb insn */
   1432 static void mk_skip_over_T32_if_cond_is_false (
   1433                IRTemp guardT /* :: Ity_I32, 0 or 1 */
   1434             )
   1435 {
   1436    ASSERT_IS_THUMB;
   1437    vassert(guardT != IRTemp_INVALID);
   1438    vassert(0 == (guest_R15_curr_instr_notENC & 1));
   1439    stmt( IRStmt_Exit(
   1440             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
   1441             Ijk_Boring,
   1442             IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 4) | 1)),
   1443             OFFB_R15T
   1444        ));
   1445 }
   1446 
   1447 
   1448 /* Thumb16 and Thumb32 only
   1449    Generate a SIGILL followed by a restart of the current instruction
   1450    if the given temp is nonzero. */
   1451 static void gen_SIGILL_T_if_nonzero ( IRTemp t /* :: Ity_I32 */ )
   1452 {
   1453    ASSERT_IS_THUMB;
   1454    vassert(t != IRTemp_INVALID);
   1455    vassert(0 == (guest_R15_curr_instr_notENC & 1));
   1456    stmt(
   1457       IRStmt_Exit(
   1458          binop(Iop_CmpNE32, mkexpr(t), mkU32(0)),
   1459          Ijk_NoDecode,
   1460          IRConst_U32(toUInt(guest_R15_curr_instr_notENC | 1)),
   1461          OFFB_R15T
   1462       )
   1463    );
   1464 }
   1465 
   1466 
   1467 /* Inspect the old_itstate, and generate a SIGILL if it indicates that
   1468    we are currently in an IT block and are not the last in the block.
   1469    This also rolls back guest_ITSTATE to its old value before the exit
   1470    and restores it to its new value afterwards.  This is so that if
   1471    the exit is taken, we have an up to date version of ITSTATE
   1472    available.  Without doing that, we have no hope of making precise
   1473    exceptions work. */
   1474 static void gen_SIGILL_T_if_in_but_NLI_ITBlock (
   1475                IRTemp old_itstate /* :: Ity_I32 */,
   1476                IRTemp new_itstate /* :: Ity_I32 */
   1477             )
   1478 {
   1479    ASSERT_IS_THUMB;
   1480    put_ITSTATE(old_itstate); // backout
   1481    IRTemp guards_for_next3 = newTemp(Ity_I32);
   1482    assign(guards_for_next3,
   1483           binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
   1484    gen_SIGILL_T_if_nonzero(guards_for_next3);
   1485    put_ITSTATE(new_itstate); //restore
   1486 }
   1487 
   1488 
   1489 /* Simpler version of the above, which generates a SIGILL if
   1490    we're anywhere within an IT block. */
   1491 static void gen_SIGILL_T_if_in_ITBlock (
   1492                IRTemp old_itstate /* :: Ity_I32 */,
   1493                IRTemp new_itstate /* :: Ity_I32 */
   1494             )
   1495 {
   1496    put_ITSTATE(old_itstate); // backout
   1497    gen_SIGILL_T_if_nonzero(old_itstate);
   1498    put_ITSTATE(new_itstate); //restore
   1499 }
   1500 
   1501 
   1502 /* Generate an APSR value, from the NZCV thunk, and
   1503    from QFLAG32 and GEFLAG0 .. GEFLAG3. */
   1504 static IRTemp synthesise_APSR ( void )
   1505 {
   1506    IRTemp res1 = newTemp(Ity_I32);
   1507    // Get NZCV
   1508    assign( res1, mk_armg_calculate_flags_nzcv() );
   1509    // OR in the Q value
   1510    IRTemp res2 = newTemp(Ity_I32);
   1511    assign(
   1512       res2,
   1513       binop(Iop_Or32,
   1514             mkexpr(res1),
   1515             binop(Iop_Shl32,
   1516                   unop(Iop_1Uto32,
   1517                        binop(Iop_CmpNE32,
   1518                              mkexpr(get_QFLAG32()),
   1519                              mkU32(0))),
   1520                   mkU8(ARMG_CC_SHIFT_Q)))
   1521    );
   1522    // OR in GE0 .. GE3
   1523    IRExpr* ge0
   1524       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(0), mkU32(0)));
   1525    IRExpr* ge1
   1526       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(1), mkU32(0)));
   1527    IRExpr* ge2
   1528       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(2), mkU32(0)));
   1529    IRExpr* ge3
   1530       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(3), mkU32(0)));
   1531    IRTemp res3 = newTemp(Ity_I32);
   1532    assign(res3,
   1533           binop(Iop_Or32,
   1534                 mkexpr(res2),
   1535                 binop(Iop_Or32,
   1536                       binop(Iop_Or32,
   1537                             binop(Iop_Shl32, ge0, mkU8(16)),
   1538                             binop(Iop_Shl32, ge1, mkU8(17))),
   1539                       binop(Iop_Or32,
   1540                             binop(Iop_Shl32, ge2, mkU8(18)),
   1541                             binop(Iop_Shl32, ge3, mkU8(19))) )));
   1542    return res3;
   1543 }
   1544 
   1545 
   1546 /* and the inverse transformation: given an APSR value,
   1547    set the NZCV thunk, the Q flag, and the GE flags. */
   1548 static void desynthesise_APSR ( Bool write_nzcvq, Bool write_ge,
   1549                                 IRTemp apsrT, IRTemp condT )
   1550 {
   1551    vassert(write_nzcvq || write_ge);
   1552    if (write_nzcvq) {
   1553       // Do NZCV
   1554       IRTemp immT = newTemp(Ity_I32);
   1555       assign(immT, binop(Iop_And32, mkexpr(apsrT), mkU32(0xF0000000)) );
   1556       setFlags_D1(ARMG_CC_OP_COPY, immT, condT);
   1557       // Do Q
   1558       IRTemp qnewT = newTemp(Ity_I32);
   1559       assign(qnewT, binop(Iop_And32, mkexpr(apsrT), mkU32(ARMG_CC_MASK_Q)));
   1560       put_QFLAG32(qnewT, condT);
   1561    }
   1562    if (write_ge) {
   1563       // Do GE3..0
   1564       put_GEFLAG32(0, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<16)),
   1565                    condT);
   1566       put_GEFLAG32(1, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<17)),
   1567                    condT);
   1568       put_GEFLAG32(2, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<18)),
   1569                    condT);
   1570       put_GEFLAG32(3, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<19)),
   1571                    condT);
   1572    }
   1573 }
   1574 
   1575 
   1576 /*------------------------------------------------------------*/
   1577 /*--- Helpers for saturation                               ---*/
   1578 /*------------------------------------------------------------*/
   1579 
   1580 /* FIXME: absolutely the only diff. between (a) armUnsignedSatQ and
   1581    (b) armSignedSatQ is that in (a) the floor is set to 0, whereas in
   1582    (b) the floor is computed from the value of imm5.  these two fnsn
   1583    should be commoned up. */
   1584 
   1585 /* UnsignedSatQ(): 'clamp' each value so it lies between 0 <= x <= (2^N)-1
   1586    Optionally return flag resQ saying whether saturation occurred.
   1587    See definition in manual, section A2.2.1, page 41
   1588    (bits(N), boolean) UnsignedSatQ( integer i, integer N )
   1589    {
   1590      if ( i > (2^N)-1 ) { result = (2^N)-1; saturated = TRUE; }
   1591      elsif ( i < 0 )    { result = 0; saturated = TRUE; }
   1592      else               { result = i; saturated = FALSE; }
   1593      return ( result<N-1:0>, saturated );
   1594    }
   1595 */
   1596 static void armUnsignedSatQ( IRTemp* res,  /* OUT - Ity_I32 */
   1597                              IRTemp* resQ, /* OUT - Ity_I32  */
   1598                              IRTemp regT,  /* value to clamp - Ity_I32 */
   1599                              UInt imm5 )   /* saturation ceiling */
   1600 {
   1601    UInt ceil  = (1 << imm5) - 1;    // (2^imm5)-1
   1602    UInt floor = 0;
   1603 
   1604    IRTemp nd0 = newTemp(Ity_I32);
   1605    IRTemp nd1 = newTemp(Ity_I32);
   1606    IRTemp nd2 = newTemp(Ity_I1);
   1607    IRTemp nd3 = newTemp(Ity_I32);
   1608    IRTemp nd4 = newTemp(Ity_I32);
   1609    IRTemp nd5 = newTemp(Ity_I1);
   1610    IRTemp nd6 = newTemp(Ity_I32);
   1611 
   1612    assign( nd0, mkexpr(regT) );
   1613    assign( nd1, mkU32(ceil) );
   1614    assign( nd2, binop( Iop_CmpLT32S, mkexpr(nd1), mkexpr(nd0) ) );
   1615    assign( nd3, IRExpr_ITE(mkexpr(nd2), mkexpr(nd1), mkexpr(nd0)) );
   1616    assign( nd4, mkU32(floor) );
   1617    assign( nd5, binop( Iop_CmpLT32S, mkexpr(nd3), mkexpr(nd4) ) );
   1618    assign( nd6, IRExpr_ITE(mkexpr(nd5), mkexpr(nd4), mkexpr(nd3)) );
   1619    assign( *res, mkexpr(nd6) );
   1620 
   1621    /* if saturation occurred, then resQ is set to some nonzero value
   1622       if sat did not occur, resQ is guaranteed to be zero. */
   1623    if (resQ) {
   1624       assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
   1625    }
   1626 }
   1627 
   1628 
   1629 /* SignedSatQ(): 'clamp' each value so it lies between  -2^N <= x <= (2^N) - 1
   1630    Optionally return flag resQ saying whether saturation occurred.
   1631    - see definition in manual, section A2.2.1, page 41
   1632    (bits(N), boolean ) SignedSatQ( integer i, integer N )
   1633    {
   1634      if ( i > 2^(N-1) - 1 )    { result = 2^(N-1) - 1; saturated = TRUE; }
   1635      elsif ( i < -(2^(N-1)) )  { result = -(2^(N-1));  saturated = FALSE; }
   1636      else                      { result = i;           saturated = FALSE; }
   1637      return ( result[N-1:0], saturated );
   1638    }
   1639 */
   1640 static void armSignedSatQ( IRTemp regT,    /* value to clamp - Ity_I32 */
   1641                            UInt imm5,      /* saturation ceiling */
   1642                            IRTemp* res,    /* OUT - Ity_I32 */
   1643                            IRTemp* resQ )  /* OUT - Ity_I32  */
   1644 {
   1645    Int ceil  =  (1 << (imm5-1)) - 1;  //  (2^(imm5-1))-1
   1646    Int floor = -(1 << (imm5-1));      // -(2^(imm5-1))
   1647 
   1648    IRTemp nd0 = newTemp(Ity_I32);
   1649    IRTemp nd1 = newTemp(Ity_I32);
   1650    IRTemp nd2 = newTemp(Ity_I1);
   1651    IRTemp nd3 = newTemp(Ity_I32);
   1652    IRTemp nd4 = newTemp(Ity_I32);
   1653    IRTemp nd5 = newTemp(Ity_I1);
   1654    IRTemp nd6 = newTemp(Ity_I32);
   1655 
   1656    assign( nd0, mkexpr(regT) );
   1657    assign( nd1, mkU32(ceil) );
   1658    assign( nd2, binop( Iop_CmpLT32S, mkexpr(nd1), mkexpr(nd0) ) );
   1659    assign( nd3, IRExpr_ITE( mkexpr(nd2), mkexpr(nd1), mkexpr(nd0) ) );
   1660    assign( nd4, mkU32(floor) );
   1661    assign( nd5, binop( Iop_CmpLT32S, mkexpr(nd3), mkexpr(nd4) ) );
   1662    assign( nd6, IRExpr_ITE( mkexpr(nd5), mkexpr(nd4), mkexpr(nd3) ) );
   1663    assign( *res, mkexpr(nd6) );
   1664 
   1665    /* if saturation occurred, then resQ is set to some nonzero value
   1666       if sat did not occur, resQ is guaranteed to be zero. */
   1667    if (resQ) {
   1668      assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
   1669    }
   1670 }
   1671 
   1672 
   1673 /* Compute a value 0 :: I32 or 1 :: I32, indicating whether signed
   1674    overflow occurred for 32-bit addition.  Needs both args and the
   1675    result.  HD p27. */
   1676 static
   1677 IRExpr* signed_overflow_after_Add32 ( IRExpr* resE,
   1678                                       IRTemp argL, IRTemp argR )
   1679 {
   1680    IRTemp res = newTemp(Ity_I32);
   1681    assign(res, resE);
   1682    return
   1683       binop( Iop_Shr32,
   1684              binop( Iop_And32,
   1685                     binop( Iop_Xor32, mkexpr(res), mkexpr(argL) ),
   1686                     binop( Iop_Xor32, mkexpr(res), mkexpr(argR) )),
   1687              mkU8(31) );
   1688 }
   1689 
   1690 /* Similarly .. also from HD p27 .. */
   1691 static
   1692 IRExpr* signed_overflow_after_Sub32 ( IRExpr* resE,
   1693                                       IRTemp argL, IRTemp argR )
   1694 {
   1695    IRTemp res = newTemp(Ity_I32);
   1696    assign(res, resE);
   1697    return
   1698       binop( Iop_Shr32,
   1699              binop( Iop_And32,
   1700                     binop( Iop_Xor32, mkexpr(argL), mkexpr(argR) ),
   1701                     binop( Iop_Xor32, mkexpr(res),  mkexpr(argL) )),
   1702              mkU8(31) );
   1703 }
   1704 
   1705 
   1706 /*------------------------------------------------------------*/
   1707 /*--- Larger helpers                                       ---*/
   1708 /*------------------------------------------------------------*/
   1709 
   1710 /* Compute both the result and new C flag value for a LSL by an imm5
   1711    or by a register operand.  May generate reads of the old C value
   1712    (hence only safe to use before any writes to guest state happen).
   1713    Are factored out so can be used by both ARM and Thumb.
   1714 
   1715    Note that in compute_result_and_C_after_{LSL,LSR,ASR}_by{imm5,reg},
   1716    "res" (the result)  is a.k.a. "shop", shifter operand
   1717    "newC" (the new C)  is a.k.a. "shco", shifter carry out
   1718 
   1719    The calling convention for res and newC is a bit funny.  They could
   1720    be passed by value, but instead are passed by ref.
   1721 
   1722    The C (shco) value computed must be zero in bits 31:1, as the IR
   1723    optimisations for flag handling (guest_arm_spechelper) rely on
   1724    that, and the slow-path handlers (armg_calculate_flags_nzcv) assert
   1725    for it.  Same applies to all these functions that compute shco
   1726    after a shift or rotate, not just this one.
   1727 */
   1728 
   1729 static void compute_result_and_C_after_LSL_by_imm5 (
   1730                /*OUT*/HChar* buf,
   1731                IRTemp* res,
   1732                IRTemp* newC,
   1733                IRTemp rMt, UInt shift_amt, /* operands */
   1734                UInt rM      /* only for debug printing */
   1735             )
   1736 {
   1737    if (shift_amt == 0) {
   1738       if (newC) {
   1739          assign( *newC, mk_armg_calculate_flag_c() );
   1740       }
   1741       assign( *res, mkexpr(rMt) );
   1742       DIS(buf, "r%u", rM);
   1743    } else {
   1744       vassert(shift_amt >= 1 && shift_amt <= 31);
   1745       if (newC) {
   1746          assign( *newC,
   1747                  binop(Iop_And32,
   1748                        binop(Iop_Shr32, mkexpr(rMt),
   1749                                         mkU8(32 - shift_amt)),
   1750                        mkU32(1)));
   1751       }
   1752       assign( *res,
   1753               binop(Iop_Shl32, mkexpr(rMt), mkU8(shift_amt)) );
   1754       DIS(buf, "r%u, LSL #%u", rM, shift_amt);
   1755    }
   1756 }
   1757 
   1758 
   1759 static void compute_result_and_C_after_LSL_by_reg (
   1760                /*OUT*/HChar* buf,
   1761                IRTemp* res,
   1762                IRTemp* newC,
   1763                IRTemp rMt, IRTemp rSt,  /* operands */
   1764                UInt rM,    UInt rS      /* only for debug printing */
   1765             )
   1766 {
   1767    // shift left in range 0 .. 255
   1768    // amt  = rS & 255
   1769    // res  = amt < 32 ?  Rm << amt  : 0
   1770    // newC = amt == 0     ? oldC  :
   1771    //        amt in 1..32 ?  Rm[32-amt]  : 0
   1772    IRTemp amtT = newTemp(Ity_I32);
   1773    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   1774    if (newC) {
   1775       /* mux0X(amt == 0,
   1776                mux0X(amt < 32,
   1777                      0,
   1778                      Rm[(32-amt) & 31]),
   1779                oldC)
   1780       */
   1781       /* About the best you can do is pray that iropt is able
   1782          to nuke most or all of the following junk. */
   1783       IRTemp oldC = newTemp(Ity_I32);
   1784       assign(oldC, mk_armg_calculate_flag_c() );
   1785       assign(
   1786          *newC,
   1787          IRExpr_ITE(
   1788             binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
   1789             mkexpr(oldC),
   1790             IRExpr_ITE(
   1791                binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
   1792                binop(Iop_And32,
   1793                      binop(Iop_Shr32,
   1794                            mkexpr(rMt),
   1795                            unop(Iop_32to8,
   1796                                 binop(Iop_And32,
   1797                                       binop(Iop_Sub32,
   1798                                             mkU32(32),
   1799                                             mkexpr(amtT)),
   1800                                       mkU32(31)
   1801                                 )
   1802                            )
   1803                      ),
   1804                      mkU32(1)
   1805                      ),
   1806                mkU32(0)
   1807             )
   1808          )
   1809       );
   1810    }
   1811    // (Rm << (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
   1812    // Lhs of the & limits the shift to 31 bits, so as to
   1813    // give known IR semantics.  Rhs of the & is all 1s for
   1814    // Rs <= 31 and all 0s for Rs >= 32.
   1815    assign(
   1816       *res,
   1817       binop(
   1818          Iop_And32,
   1819          binop(Iop_Shl32,
   1820                mkexpr(rMt),
   1821                unop(Iop_32to8,
   1822                     binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
   1823          binop(Iop_Sar32,
   1824                binop(Iop_Sub32,
   1825                      mkexpr(amtT),
   1826                      mkU32(32)),
   1827                mkU8(31))));
   1828     DIS(buf, "r%u, LSL r%u", rM, rS);
   1829 }
   1830 
   1831 
   1832 static void compute_result_and_C_after_LSR_by_imm5 (
   1833                /*OUT*/HChar* buf,
   1834                IRTemp* res,
   1835                IRTemp* newC,
   1836                IRTemp rMt, UInt shift_amt, /* operands */
   1837                UInt rM      /* only for debug printing */
   1838             )
   1839 {
   1840    if (shift_amt == 0) {
   1841       // conceptually a 32-bit shift, however:
   1842       // res  = 0
   1843       // newC = Rm[31]
   1844       if (newC) {
   1845          assign( *newC,
   1846                  binop(Iop_And32,
   1847                        binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
   1848                        mkU32(1)));
   1849       }
   1850       assign( *res, mkU32(0) );
   1851       DIS(buf, "r%u, LSR #0(a.k.a. 32)", rM);
   1852    } else {
   1853       // shift in range 1..31
   1854       // res  = Rm >>u shift_amt
   1855       // newC = Rm[shift_amt - 1]
   1856       vassert(shift_amt >= 1 && shift_amt <= 31);
   1857       if (newC) {
   1858          assign( *newC,
   1859                  binop(Iop_And32,
   1860                        binop(Iop_Shr32, mkexpr(rMt),
   1861                                         mkU8(shift_amt - 1)),
   1862                        mkU32(1)));
   1863       }
   1864       assign( *res,
   1865               binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)) );
   1866       DIS(buf, "r%u, LSR #%u", rM, shift_amt);
   1867    }
   1868 }
   1869 
   1870 
   1871 static void compute_result_and_C_after_LSR_by_reg (
   1872                /*OUT*/HChar* buf,
   1873                IRTemp* res,
   1874                IRTemp* newC,
   1875                IRTemp rMt, IRTemp rSt,  /* operands */
   1876                UInt rM,    UInt rS      /* only for debug printing */
   1877             )
   1878 {
   1879    // shift right in range 0 .. 255
   1880    // amt = rS & 255
   1881    // res  = amt < 32 ?  Rm >>u amt  : 0
   1882    // newC = amt == 0     ? oldC  :
   1883    //        amt in 1..32 ?  Rm[amt-1]  : 0
   1884    IRTemp amtT = newTemp(Ity_I32);
   1885    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   1886    if (newC) {
   1887       /* mux0X(amt == 0,
   1888                mux0X(amt < 32,
   1889                      0,
   1890                      Rm[(amt-1) & 31]),
   1891                oldC)
   1892       */
   1893       IRTemp oldC = newTemp(Ity_I32);
   1894       assign(oldC, mk_armg_calculate_flag_c() );
   1895       assign(
   1896          *newC,
   1897          IRExpr_ITE(
   1898             binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
   1899             mkexpr(oldC),
   1900             IRExpr_ITE(
   1901                binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
   1902                binop(Iop_And32,
   1903                      binop(Iop_Shr32,
   1904                            mkexpr(rMt),
   1905                            unop(Iop_32to8,
   1906                                 binop(Iop_And32,
   1907                                       binop(Iop_Sub32,
   1908                                             mkexpr(amtT),
   1909                                             mkU32(1)),
   1910                                       mkU32(31)
   1911                                 )
   1912                            )
   1913                      ),
   1914                      mkU32(1)
   1915                      ),
   1916                mkU32(0)
   1917             )
   1918          )
   1919       );
   1920    }
   1921    // (Rm >>u (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
   1922    // Lhs of the & limits the shift to 31 bits, so as to
   1923    // give known IR semantics.  Rhs of the & is all 1s for
   1924    // Rs <= 31 and all 0s for Rs >= 32.
   1925    assign(
   1926       *res,
   1927       binop(
   1928          Iop_And32,
   1929          binop(Iop_Shr32,
   1930                mkexpr(rMt),
   1931                unop(Iop_32to8,
   1932                     binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
   1933          binop(Iop_Sar32,
   1934                binop(Iop_Sub32,
   1935                      mkexpr(amtT),
   1936                      mkU32(32)),
   1937                mkU8(31))));
   1938     DIS(buf, "r%u, LSR r%u", rM, rS);
   1939 }
   1940 
   1941 
   1942 static void compute_result_and_C_after_ASR_by_imm5 (
   1943                /*OUT*/HChar* buf,
   1944                IRTemp* res,
   1945                IRTemp* newC,
   1946                IRTemp rMt, UInt shift_amt, /* operands */
   1947                UInt rM      /* only for debug printing */
   1948             )
   1949 {
   1950    if (shift_amt == 0) {
   1951       // conceptually a 32-bit shift, however:
   1952       // res  = Rm >>s 31
   1953       // newC = Rm[31]
   1954       if (newC) {
   1955          assign( *newC,
   1956                  binop(Iop_And32,
   1957                        binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
   1958                        mkU32(1)));
   1959       }
   1960       assign( *res, binop(Iop_Sar32, mkexpr(rMt), mkU8(31)) );
   1961       DIS(buf, "r%u, ASR #0(a.k.a. 32)", rM);
   1962    } else {
   1963       // shift in range 1..31
   1964       // res = Rm >>s shift_amt
   1965       // newC = Rm[shift_amt - 1]
   1966       vassert(shift_amt >= 1 && shift_amt <= 31);
   1967       if (newC) {
   1968          assign( *newC,
   1969                  binop(Iop_And32,
   1970                        binop(Iop_Shr32, mkexpr(rMt),
   1971                                         mkU8(shift_amt - 1)),
   1972                        mkU32(1)));
   1973       }
   1974       assign( *res,
   1975               binop(Iop_Sar32, mkexpr(rMt), mkU8(shift_amt)) );
   1976       DIS(buf, "r%u, ASR #%u", rM, shift_amt);
   1977    }
   1978 }
   1979 
   1980 
   1981 static void compute_result_and_C_after_ASR_by_reg (
   1982                /*OUT*/HChar* buf,
   1983                IRTemp* res,
   1984                IRTemp* newC,
   1985                IRTemp rMt, IRTemp rSt,  /* operands */
   1986                UInt rM,    UInt rS      /* only for debug printing */
   1987             )
   1988 {
   1989    // arithmetic shift right in range 0 .. 255
   1990    // amt = rS & 255
   1991    // res  = amt < 32 ?  Rm >>s amt  : Rm >>s 31
   1992    // newC = amt == 0     ? oldC  :
   1993    //        amt in 1..32 ?  Rm[amt-1]  : Rm[31]
   1994    IRTemp amtT = newTemp(Ity_I32);
   1995    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   1996    if (newC) {
   1997       /* mux0X(amt == 0,
   1998                mux0X(amt < 32,
   1999                      Rm[31],
   2000                      Rm[(amt-1) & 31])
   2001                oldC)
   2002       */
   2003       IRTemp oldC = newTemp(Ity_I32);
   2004       assign(oldC, mk_armg_calculate_flag_c() );
   2005       assign(
   2006          *newC,
   2007          IRExpr_ITE(
   2008             binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
   2009             mkexpr(oldC),
   2010             IRExpr_ITE(
   2011                binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
   2012                binop(Iop_And32,
   2013                      binop(Iop_Shr32,
   2014                            mkexpr(rMt),
   2015                            unop(Iop_32to8,
   2016                                 binop(Iop_And32,
   2017                                       binop(Iop_Sub32,
   2018                                             mkexpr(amtT),
   2019                                             mkU32(1)),
   2020                                       mkU32(31)
   2021                                 )
   2022                            )
   2023                      ),
   2024                      mkU32(1)
   2025                      ),
   2026                binop(Iop_And32,
   2027                      binop(Iop_Shr32,
   2028                            mkexpr(rMt),
   2029                            mkU8(31)
   2030                      ),
   2031                      mkU32(1)
   2032                )
   2033             )
   2034          )
   2035       );
   2036    }
   2037    // (Rm >>s (amt <u 32 ? amt : 31))
   2038    assign(
   2039       *res,
   2040       binop(
   2041          Iop_Sar32,
   2042          mkexpr(rMt),
   2043          unop(
   2044             Iop_32to8,
   2045             IRExpr_ITE(
   2046                binop(Iop_CmpLT32U, mkexpr(amtT), mkU32(32)),
   2047                mkexpr(amtT),
   2048                mkU32(31)))));
   2049     DIS(buf, "r%u, ASR r%u", rM, rS);
   2050 }
   2051 
   2052 
   2053 static void compute_result_and_C_after_ROR_by_reg (
   2054                /*OUT*/HChar* buf,
   2055                IRTemp* res,
   2056                IRTemp* newC,
   2057                IRTemp rMt, IRTemp rSt,  /* operands */
   2058                UInt rM,    UInt rS      /* only for debug printing */
   2059             )
   2060 {
   2061    // rotate right in range 0 .. 255
   2062    // amt = rS & 255
   2063    // shop =  Rm `ror` (amt & 31)
   2064    // shco =  amt == 0 ? oldC : Rm[(amt-1) & 31]
   2065    IRTemp amtT = newTemp(Ity_I32);
   2066    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   2067    IRTemp amt5T = newTemp(Ity_I32);
   2068    assign( amt5T, binop(Iop_And32, mkexpr(rSt), mkU32(31)) );
   2069    IRTemp oldC = newTemp(Ity_I32);
   2070    assign(oldC, mk_armg_calculate_flag_c() );
   2071    if (newC) {
   2072       assign(
   2073          *newC,
   2074          IRExpr_ITE(
   2075             binop(Iop_CmpNE32, mkexpr(amtT), mkU32(0)),
   2076             binop(Iop_And32,
   2077                   binop(Iop_Shr32,
   2078                         mkexpr(rMt),
   2079                         unop(Iop_32to8,
   2080                              binop(Iop_And32,
   2081                                    binop(Iop_Sub32,
   2082                                          mkexpr(amtT),
   2083                                          mkU32(1)
   2084                                    ),
   2085                                    mkU32(31)
   2086                              )
   2087                         )
   2088                   ),
   2089                   mkU32(1)
   2090             ),
   2091             mkexpr(oldC)
   2092          )
   2093       );
   2094    }
   2095    assign(
   2096       *res,
   2097       IRExpr_ITE(
   2098          binop(Iop_CmpNE32, mkexpr(amt5T), mkU32(0)),
   2099          binop(Iop_Or32,
   2100                binop(Iop_Shr32,
   2101                      mkexpr(rMt),
   2102                      unop(Iop_32to8, mkexpr(amt5T))
   2103                ),
   2104                binop(Iop_Shl32,
   2105                      mkexpr(rMt),
   2106                      unop(Iop_32to8,
   2107                           binop(Iop_Sub32, mkU32(32), mkexpr(amt5T))
   2108                      )
   2109                )
   2110                ),
   2111          mkexpr(rMt)
   2112       )
   2113    );
   2114    DIS(buf, "r%u, ROR r#%u", rM, rS);
   2115 }
   2116 
   2117 
   2118 /* Generate an expression corresponding to the immediate-shift case of
   2119    a shifter operand.  This is used both for ARM and Thumb2.
   2120 
   2121    Bind it to a temporary, and return that via *res.  If newC is
   2122    non-NULL, also compute a value for the shifter's carry out (in the
   2123    LSB of a word), bind it to a temporary, and return that via *shco.
   2124 
   2125    Generates GETs from the guest state and is therefore not safe to
   2126    use once we start doing PUTs to it, for any given instruction.
   2127 
   2128    'how' is encoded thusly:
   2129       00b LSL,  01b LSR,  10b ASR,  11b ROR
   2130    Most but not all ARM and Thumb integer insns use this encoding.
   2131    Be careful to ensure the right value is passed here.
   2132 */
   2133 static void compute_result_and_C_after_shift_by_imm5 (
   2134                /*OUT*/HChar* buf,
   2135                /*OUT*/IRTemp* res,
   2136                /*OUT*/IRTemp* newC,
   2137                IRTemp  rMt,       /* reg to shift */
   2138                UInt    how,       /* what kind of shift */
   2139                UInt    shift_amt, /* shift amount (0..31) */
   2140                UInt    rM         /* only for debug printing */
   2141             )
   2142 {
   2143    vassert(shift_amt < 32);
   2144    vassert(how < 4);
   2145 
   2146    switch (how) {
   2147 
   2148       case 0:
   2149          compute_result_and_C_after_LSL_by_imm5(
   2150             buf, res, newC, rMt, shift_amt, rM
   2151          );
   2152          break;
   2153 
   2154       case 1:
   2155          compute_result_and_C_after_LSR_by_imm5(
   2156             buf, res, newC, rMt, shift_amt, rM
   2157          );
   2158          break;
   2159 
   2160       case 2:
   2161          compute_result_and_C_after_ASR_by_imm5(
   2162             buf, res, newC, rMt, shift_amt, rM
   2163          );
   2164          break;
   2165 
   2166       case 3:
   2167          if (shift_amt == 0) {
   2168             IRTemp oldcT = newTemp(Ity_I32);
   2169             // rotate right 1 bit through carry (?)
   2170             // RRX -- described at ARM ARM A5-17
   2171             // res  = (oldC << 31) | (Rm >>u 1)
   2172             // newC = Rm[0]
   2173             if (newC) {
   2174                assign( *newC,
   2175                        binop(Iop_And32, mkexpr(rMt), mkU32(1)));
   2176             }
   2177             assign( oldcT, mk_armg_calculate_flag_c() );
   2178             assign( *res,
   2179                     binop(Iop_Or32,
   2180                           binop(Iop_Shl32, mkexpr(oldcT), mkU8(31)),
   2181                           binop(Iop_Shr32, mkexpr(rMt), mkU8(1))) );
   2182             DIS(buf, "r%u, RRX", rM);
   2183          } else {
   2184             // rotate right in range 1..31
   2185             // res  = Rm `ror` shift_amt
   2186             // newC = Rm[shift_amt - 1]
   2187             vassert(shift_amt >= 1 && shift_amt <= 31);
   2188             if (newC) {
   2189                assign( *newC,
   2190                        binop(Iop_And32,
   2191                              binop(Iop_Shr32, mkexpr(rMt),
   2192                                               mkU8(shift_amt - 1)),
   2193                              mkU32(1)));
   2194             }
   2195             assign( *res,
   2196                     binop(Iop_Or32,
   2197                           binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)),
   2198                           binop(Iop_Shl32, mkexpr(rMt),
   2199                                            mkU8(32-shift_amt))));
   2200             DIS(buf, "r%u, ROR #%u", rM, shift_amt);
   2201          }
   2202          break;
   2203 
   2204       default:
   2205          /*NOTREACHED*/
   2206          vassert(0);
   2207    }
   2208 }
   2209 
   2210 
   2211 /* Generate an expression corresponding to the register-shift case of
   2212    a shifter operand.  This is used both for ARM and Thumb2.
   2213 
   2214    Bind it to a temporary, and return that via *res.  If newC is
   2215    non-NULL, also compute a value for the shifter's carry out (in the
   2216    LSB of a word), bind it to a temporary, and return that via *shco.
   2217 
   2218    Generates GETs from the guest state and is therefore not safe to
   2219    use once we start doing PUTs to it, for any given instruction.
   2220 
   2221    'how' is encoded thusly:
   2222       00b LSL,  01b LSR,  10b ASR,  11b ROR
   2223    Most but not all ARM and Thumb integer insns use this encoding.
   2224    Be careful to ensure the right value is passed here.
   2225 */
   2226 static void compute_result_and_C_after_shift_by_reg (
   2227                /*OUT*/HChar*  buf,
   2228                /*OUT*/IRTemp* res,
   2229                /*OUT*/IRTemp* newC,
   2230                IRTemp  rMt,       /* reg to shift */
   2231                UInt    how,       /* what kind of shift */
   2232                IRTemp  rSt,       /* shift amount */
   2233                UInt    rM,        /* only for debug printing */
   2234                UInt    rS         /* only for debug printing */
   2235             )
   2236 {
   2237    vassert(how < 4);
   2238    switch (how) {
   2239       case 0: { /* LSL */
   2240          compute_result_and_C_after_LSL_by_reg(
   2241             buf, res, newC, rMt, rSt, rM, rS
   2242          );
   2243          break;
   2244       }
   2245       case 1: { /* LSR */
   2246          compute_result_and_C_after_LSR_by_reg(
   2247             buf, res, newC, rMt, rSt, rM, rS
   2248          );
   2249          break;
   2250       }
   2251       case 2: { /* ASR */
   2252          compute_result_and_C_after_ASR_by_reg(
   2253             buf, res, newC, rMt, rSt, rM, rS
   2254          );
   2255          break;
   2256       }
   2257       case 3: { /* ROR */
   2258          compute_result_and_C_after_ROR_by_reg(
   2259              buf, res, newC, rMt, rSt, rM, rS
   2260          );
   2261          break;
   2262       }
   2263       default:
   2264          /*NOTREACHED*/
   2265          vassert(0);
   2266    }
   2267 }
   2268 
   2269 
   2270 /* Generate an expression corresponding to a shifter_operand, bind it
   2271    to a temporary, and return that via *shop.  If shco is non-NULL,
   2272    also compute a value for the shifter's carry out (in the LSB of a
   2273    word), bind it to a temporary, and return that via *shco.
   2274 
   2275    If for some reason we can't come up with a shifter operand (missing
   2276    case?  not really a shifter operand?) return False.
   2277 
   2278    Generates GETs from the guest state and is therefore not safe to
   2279    use once we start doing PUTs to it, for any given instruction.
   2280 
   2281    For ARM insns only; not for Thumb.
   2282 */
   2283 static Bool mk_shifter_operand ( UInt insn_25, UInt insn_11_0,
   2284                                  /*OUT*/IRTemp* shop,
   2285                                  /*OUT*/IRTemp* shco,
   2286                                  /*OUT*/HChar* buf )
   2287 {
   2288    UInt insn_4 = (insn_11_0 >> 4) & 1;
   2289    UInt insn_7 = (insn_11_0 >> 7) & 1;
   2290    vassert(insn_25 <= 0x1);
   2291    vassert(insn_11_0 <= 0xFFF);
   2292 
   2293    vassert(shop && *shop == IRTemp_INVALID);
   2294    *shop = newTemp(Ity_I32);
   2295 
   2296    if (shco) {
   2297       vassert(*shco == IRTemp_INVALID);
   2298       *shco = newTemp(Ity_I32);
   2299    }
   2300 
   2301    /* 32-bit immediate */
   2302 
   2303    if (insn_25 == 1) {
   2304       /* immediate: (7:0) rotated right by 2 * (11:8) */
   2305       UInt imm = (insn_11_0 >> 0) & 0xFF;
   2306       UInt rot = 2 * ((insn_11_0 >> 8) & 0xF);
   2307       vassert(rot <= 30);
   2308       imm = ROR32(imm, rot);
   2309       if (shco) {
   2310          if (rot == 0) {
   2311             assign( *shco, mk_armg_calculate_flag_c() );
   2312          } else {
   2313             assign( *shco, mkU32( (imm >> 31) & 1 ) );
   2314          }
   2315       }
   2316       DIS(buf, "#0x%x", imm);
   2317       assign( *shop, mkU32(imm) );
   2318       return True;
   2319    }
   2320 
   2321    /* Shift/rotate by immediate */
   2322 
   2323    if (insn_25 == 0 && insn_4 == 0) {
   2324       /* Rm (3:0) shifted (6:5) by immediate (11:7) */
   2325       UInt shift_amt = (insn_11_0 >> 7) & 0x1F;
   2326       UInt rM        = (insn_11_0 >> 0) & 0xF;
   2327       UInt how       = (insn_11_0 >> 5) & 3;
   2328       /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
   2329       IRTemp rMt = newTemp(Ity_I32);
   2330       assign(rMt, getIRegA(rM));
   2331 
   2332       vassert(shift_amt <= 31);
   2333 
   2334       compute_result_and_C_after_shift_by_imm5(
   2335          buf, shop, shco, rMt, how, shift_amt, rM
   2336       );
   2337       return True;
   2338    }
   2339 
   2340    /* Shift/rotate by register */
   2341    if (insn_25 == 0 && insn_4 == 1) {
   2342       /* Rm (3:0) shifted (6:5) by Rs (11:8) */
   2343       UInt rM  = (insn_11_0 >> 0) & 0xF;
   2344       UInt rS  = (insn_11_0 >> 8) & 0xF;
   2345       UInt how = (insn_11_0 >> 5) & 3;
   2346       /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
   2347       IRTemp rMt = newTemp(Ity_I32);
   2348       IRTemp rSt = newTemp(Ity_I32);
   2349 
   2350       if (insn_7 == 1)
   2351          return False; /* not really a shifter operand */
   2352 
   2353       assign(rMt, getIRegA(rM));
   2354       assign(rSt, getIRegA(rS));
   2355 
   2356       compute_result_and_C_after_shift_by_reg(
   2357          buf, shop, shco, rMt, how, rSt, rM, rS
   2358       );
   2359       return True;
   2360    }
   2361 
   2362    vex_printf("mk_shifter_operand(0x%x,0x%x)\n", insn_25, insn_11_0 );
   2363    return False;
   2364 }
   2365 
   2366 
   2367 /* ARM only */
   2368 static
   2369 IRExpr* mk_EA_reg_plusminus_imm12 ( UInt rN, UInt bU, UInt imm12,
   2370                                     /*OUT*/HChar* buf )
   2371 {
   2372    vassert(rN < 16);
   2373    vassert(bU < 2);
   2374    vassert(imm12 < 0x1000);
   2375    HChar opChar = bU == 1 ? '+' : '-';
   2376    DIS(buf, "[r%u, #%c%u]", rN, opChar, imm12);
   2377    return
   2378       binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
   2379              getIRegA(rN),
   2380              mkU32(imm12) );
   2381 }
   2382 
   2383 
   2384 /* ARM only.
   2385    NB: This is "DecodeImmShift" in newer versions of the the ARM ARM.
   2386 */
   2387 static
   2388 IRExpr* mk_EA_reg_plusminus_shifted_reg ( UInt rN, UInt bU, UInt rM,
   2389                                           UInt sh2, UInt imm5,
   2390                                           /*OUT*/HChar* buf )
   2391 {
   2392    vassert(rN < 16);
   2393    vassert(bU < 2);
   2394    vassert(rM < 16);
   2395    vassert(sh2 < 4);
   2396    vassert(imm5 < 32);
   2397    HChar   opChar = bU == 1 ? '+' : '-';
   2398    IRExpr* index  = NULL;
   2399    switch (sh2) {
   2400       case 0: /* LSL */
   2401          /* imm5 can be in the range 0 .. 31 inclusive. */
   2402          index = binop(Iop_Shl32, getIRegA(rM), mkU8(imm5));
   2403          DIS(buf, "[r%u, %c r%u LSL #%u]", rN, opChar, rM, imm5);
   2404          break;
   2405       case 1: /* LSR */
   2406          if (imm5 == 0) {
   2407             index = mkU32(0);
   2408             vassert(0); // ATC
   2409          } else {
   2410             index = binop(Iop_Shr32, getIRegA(rM), mkU8(imm5));
   2411          }
   2412          DIS(buf, "[r%u, %cr%u, LSR #%u]",
   2413                   rN, opChar, rM, imm5 == 0 ? 32 : imm5);
   2414          break;
   2415       case 2: /* ASR */
   2416          /* Doesn't this just mean that the behaviour with imm5 == 0
   2417             is the same as if it had been 31 ? */
   2418          if (imm5 == 0) {
   2419             index = binop(Iop_Sar32, getIRegA(rM), mkU8(31));
   2420             vassert(0); // ATC
   2421          } else {
   2422             index = binop(Iop_Sar32, getIRegA(rM), mkU8(imm5));
   2423          }
   2424          DIS(buf, "[r%u, %cr%u, ASR #%u]",
   2425                   rN, opChar, rM, imm5 == 0 ? 32 : imm5);
   2426          break;
   2427       case 3: /* ROR or RRX */
   2428          if (imm5 == 0) {
   2429             IRTemp rmT    = newTemp(Ity_I32);
   2430             IRTemp cflagT = newTemp(Ity_I32);
   2431             assign(rmT, getIRegA(rM));
   2432             assign(cflagT, mk_armg_calculate_flag_c());
   2433             index = binop(Iop_Or32,
   2434                           binop(Iop_Shl32, mkexpr(cflagT), mkU8(31)),
   2435                           binop(Iop_Shr32, mkexpr(rmT), mkU8(1)));
   2436             DIS(buf, "[r%u, %cr%u, RRX]", rN, opChar, rM);
   2437          } else {
   2438             IRTemp rmT = newTemp(Ity_I32);
   2439             assign(rmT, getIRegA(rM));
   2440             vassert(imm5 >= 1 && imm5 <= 31);
   2441             index = binop(Iop_Or32,
   2442                           binop(Iop_Shl32, mkexpr(rmT), mkU8(32-imm5)),
   2443                           binop(Iop_Shr32, mkexpr(rmT), mkU8(imm5)));
   2444             DIS(buf, "[r%u, %cr%u, ROR #%u]", rN, opChar, rM, imm5);
   2445          }
   2446          break;
   2447       default:
   2448          vassert(0);
   2449    }
   2450    vassert(index);
   2451    return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
   2452                 getIRegA(rN), index);
   2453 }
   2454 
   2455 
   2456 /* ARM only */
   2457 static
   2458 IRExpr* mk_EA_reg_plusminus_imm8 ( UInt rN, UInt bU, UInt imm8,
   2459                                    /*OUT*/HChar* buf )
   2460 {
   2461    vassert(rN < 16);
   2462    vassert(bU < 2);
   2463    vassert(imm8 < 0x100);
   2464    HChar opChar = bU == 1 ? '+' : '-';
   2465    DIS(buf, "[r%u, #%c%u]", rN, opChar, imm8);
   2466    return
   2467       binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
   2468              getIRegA(rN),
   2469              mkU32(imm8) );
   2470 }
   2471 
   2472 
   2473 /* ARM only */
   2474 static
   2475 IRExpr* mk_EA_reg_plusminus_reg ( UInt rN, UInt bU, UInt rM,
   2476                                   /*OUT*/HChar* buf )
   2477 {
   2478    vassert(rN < 16);
   2479    vassert(bU < 2);
   2480    vassert(rM < 16);
   2481    HChar   opChar = bU == 1 ? '+' : '-';
   2482    IRExpr* index  = getIRegA(rM);
   2483    DIS(buf, "[r%u, %c r%u]", rN, opChar, rM);
   2484    return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
   2485                 getIRegA(rN), index);
   2486 }
   2487 
   2488 
   2489 /* irRes :: Ity_I32 holds a floating point comparison result encoded
   2490    as an IRCmpF64Result.  Generate code to convert it to an
   2491    ARM-encoded (N,Z,C,V) group in the lowest 4 bits of an I32 value.
   2492    Assign a new temp to hold that value, and return the temp. */
   2493 static
   2494 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes )
   2495 {
   2496    IRTemp ix       = newTemp(Ity_I32);
   2497    IRTemp termL    = newTemp(Ity_I32);
   2498    IRTemp termR    = newTemp(Ity_I32);
   2499    IRTemp nzcv     = newTemp(Ity_I32);
   2500 
   2501    /* This is where the fun starts.  We have to convert 'irRes' from
   2502       an IR-convention return result (IRCmpF64Result) to an
   2503       ARM-encoded (N,Z,C,V) group.  The final result is in the bottom
   2504       4 bits of 'nzcv'. */
   2505    /* Map compare result from IR to ARM(nzcv) */
   2506    /*
   2507       FP cmp result | IR   | ARM(nzcv)
   2508       --------------------------------
   2509       UN              0x45   0011
   2510       LT              0x01   1000
   2511       GT              0x00   0010
   2512       EQ              0x40   0110
   2513    */
   2514    /* Now since you're probably wondering WTF ..
   2515 
   2516       ix fishes the useful bits out of the IR value, bits 6 and 0, and
   2517       places them side by side, giving a number which is 0, 1, 2 or 3.
   2518 
   2519       termL is a sequence cooked up by GNU superopt.  It converts ix
   2520          into an almost correct value NZCV value (incredibly), except
   2521          for the case of UN, where it produces 0100 instead of the
   2522          required 0011.
   2523 
   2524       termR is therefore a correction term, also computed from ix.  It
   2525          is 1 in the UN case and 0 for LT, GT and UN.  Hence, to get
   2526          the final correct value, we subtract termR from termL.
   2527 
   2528       Don't take my word for it.  There's a test program at the bottom
   2529       of this file, to try this out with.
   2530    */
   2531    assign(
   2532       ix,
   2533       binop(Iop_Or32,
   2534             binop(Iop_And32,
   2535                   binop(Iop_Shr32, mkexpr(irRes), mkU8(5)),
   2536                   mkU32(3)),
   2537             binop(Iop_And32, mkexpr(irRes), mkU32(1))));
   2538 
   2539    assign(
   2540       termL,
   2541       binop(Iop_Add32,
   2542             binop(Iop_Shr32,
   2543                   binop(Iop_Sub32,
   2544                         binop(Iop_Shl32,
   2545                               binop(Iop_Xor32, mkexpr(ix), mkU32(1)),
   2546                               mkU8(30)),
   2547                         mkU32(1)),
   2548                   mkU8(29)),
   2549             mkU32(1)));
   2550 
   2551    assign(
   2552       termR,
   2553       binop(Iop_And32,
   2554             binop(Iop_And32,
   2555                   mkexpr(ix),
   2556                   binop(Iop_Shr32, mkexpr(ix), mkU8(1))),
   2557             mkU32(1)));
   2558 
   2559    assign(nzcv, binop(Iop_Sub32, mkexpr(termL), mkexpr(termR)));
   2560    return nzcv;
   2561 }
   2562 
   2563 
   2564 /* Thumb32 only.  This is "ThumbExpandImm" in the ARM ARM.  If
   2565    updatesC is non-NULL, a boolean is written to it indicating whether
   2566    or not the C flag is updated, as per ARM ARM "ThumbExpandImm_C".
   2567 */
   2568 static UInt thumbExpandImm ( Bool* updatesC,
   2569                              UInt imm1, UInt imm3, UInt imm8 )
   2570 {
   2571    vassert(imm1 < (1<<1));
   2572    vassert(imm3 < (1<<3));
   2573    vassert(imm8 < (1<<8));
   2574    UInt i_imm3_a = (imm1 << 4) | (imm3 << 1) | ((imm8 >> 7) & 1);
   2575    UInt abcdefgh = imm8;
   2576    UInt lbcdefgh = imm8 | 0x80;
   2577    if (updatesC) {
   2578       *updatesC = i_imm3_a >= 8;
   2579    }
   2580    switch (i_imm3_a) {
   2581       case 0: case 1:
   2582          return abcdefgh;
   2583       case 2: case 3:
   2584          return (abcdefgh << 16) | abcdefgh;
   2585       case 4: case 5:
   2586          return (abcdefgh << 24) | (abcdefgh << 8);
   2587       case 6: case 7:
   2588          return (abcdefgh << 24) | (abcdefgh << 16)
   2589                 | (abcdefgh << 8) | abcdefgh;
   2590       case 8 ... 31:
   2591          return lbcdefgh << (32 - i_imm3_a);
   2592       default:
   2593          break;
   2594    }
   2595    /*NOTREACHED*/vassert(0);
   2596 }
   2597 
   2598 
   2599 /* Version of thumbExpandImm where we simply feed it the
   2600    instruction halfwords (the lowest addressed one is I0). */
   2601 static UInt thumbExpandImm_from_I0_I1 ( Bool* updatesC,
   2602                                         UShort i0s, UShort i1s )
   2603 {
   2604    UInt i0    = (UInt)i0s;
   2605    UInt i1    = (UInt)i1s;
   2606    UInt imm1  = SLICE_UInt(i0,10,10);
   2607    UInt imm3  = SLICE_UInt(i1,14,12);
   2608    UInt imm8  = SLICE_UInt(i1,7,0);
   2609    return thumbExpandImm(updatesC, imm1, imm3, imm8);
   2610 }
   2611 
   2612 
   2613 /* Thumb16 only.  Given the firstcond and mask fields from an IT
   2614    instruction, compute the 32-bit ITSTATE value implied, as described
   2615    in libvex_guest_arm.h.  This is not the ARM ARM representation.
   2616    Also produce the t/e chars for the 2nd, 3rd, 4th insns, for
   2617    disassembly printing.  Returns False if firstcond or mask
   2618    denote something invalid.
   2619 
   2620    The number and conditions for the instructions to be
   2621    conditionalised depend on firstcond and mask:
   2622 
   2623    mask      cond 1    cond 2      cond 3      cond 4
   2624 
   2625    1000      fc[3:0]
   2626    x100      fc[3:0]   fc[3:1]:x
   2627    xy10      fc[3:0]   fc[3:1]:x   fc[3:1]:y
   2628    xyz1      fc[3:0]   fc[3:1]:x   fc[3:1]:y   fc[3:1]:z
   2629 
   2630    The condition fields are assembled in *itstate backwards (cond 4 at
   2631    the top, cond 1 at the bottom).  Conditions are << 4'd and then
   2632    ^0xE'd, and those fields that correspond to instructions in the IT
   2633    block are tagged with a 1 bit.
   2634 */
   2635 static Bool compute_ITSTATE ( /*OUT*/UInt*  itstate,
   2636                               /*OUT*/HChar* ch1,
   2637                               /*OUT*/HChar* ch2,
   2638                               /*OUT*/HChar* ch3,
   2639                               UInt firstcond, UInt mask )
   2640 {
   2641    vassert(firstcond <= 0xF);
   2642    vassert(mask <= 0xF);
   2643    *itstate = 0;
   2644    *ch1 = *ch2 = *ch3 = '.';
   2645    if (mask == 0)
   2646       return False; /* the logic below actually ensures this anyway,
   2647                        but clearer to make it explicit. */
   2648    if (firstcond == 0xF)
   2649       return False; /* NV is not allowed */
   2650    if (firstcond == 0xE && popcount32(mask) != 1)
   2651       return False; /* if firstcond is AL then all the rest must be too */
   2652 
   2653    UInt m3 = (mask >> 3) & 1;
   2654    UInt m2 = (mask >> 2) & 1;
   2655    UInt m1 = (mask >> 1) & 1;
   2656    UInt m0 = (mask >> 0) & 1;
   2657 
   2658    UInt fc = (firstcond << 4) | 1/*in-IT-block*/;
   2659    UInt ni = (0xE/*AL*/ << 4) | 0/*not-in-IT-block*/;
   2660 
   2661    if (m3 == 1 && (m2|m1|m0) == 0) {
   2662       *itstate = (ni << 24) | (ni << 16) | (ni << 8) | fc;
   2663       *itstate ^= 0xE0E0E0E0;
   2664       return True;
   2665    }
   2666 
   2667    if (m2 == 1 && (m1|m0) == 0) {
   2668       *itstate = (ni << 24) | (ni << 16) | (setbit32(fc, 4, m3) << 8) | fc;
   2669       *itstate ^= 0xE0E0E0E0;
   2670       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
   2671       return True;
   2672    }
   2673 
   2674    if (m1 == 1 && m0 == 0) {
   2675       *itstate = (ni << 24)
   2676                  | (setbit32(fc, 4, m2) << 16)
   2677                  | (setbit32(fc, 4, m3) << 8) | fc;
   2678       *itstate ^= 0xE0E0E0E0;
   2679       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
   2680       *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
   2681       return True;
   2682    }
   2683 
   2684    if (m0 == 1) {
   2685       *itstate = (setbit32(fc, 4, m1) << 24)
   2686                  | (setbit32(fc, 4, m2) << 16)
   2687                  | (setbit32(fc, 4, m3) << 8) | fc;
   2688       *itstate ^= 0xE0E0E0E0;
   2689       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
   2690       *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
   2691       *ch3 = m1 == (firstcond & 1) ? 't' : 'e';
   2692       return True;
   2693    }
   2694 
   2695    return False;
   2696 }
   2697 
   2698 
   2699 /* Generate IR to do 32-bit bit reversal, a la Hacker's Delight
   2700    Chapter 7 Section 1. */
   2701 static IRTemp gen_BITREV ( IRTemp x0 )
   2702 {
   2703    IRTemp x1 = newTemp(Ity_I32);
   2704    IRTemp x2 = newTemp(Ity_I32);
   2705    IRTemp x3 = newTemp(Ity_I32);
   2706    IRTemp x4 = newTemp(Ity_I32);
   2707    IRTemp x5 = newTemp(Ity_I32);
   2708    UInt   c1 = 0x55555555;
   2709    UInt   c2 = 0x33333333;
   2710    UInt   c3 = 0x0F0F0F0F;
   2711    UInt   c4 = 0x00FF00FF;
   2712    UInt   c5 = 0x0000FFFF;
   2713    assign(x1,
   2714           binop(Iop_Or32,
   2715                 binop(Iop_Shl32,
   2716                       binop(Iop_And32, mkexpr(x0), mkU32(c1)),
   2717                       mkU8(1)),
   2718                 binop(Iop_Shr32,
   2719                       binop(Iop_And32, mkexpr(x0), mkU32(~c1)),
   2720                       mkU8(1))
   2721    ));
   2722    assign(x2,
   2723           binop(Iop_Or32,
   2724                 binop(Iop_Shl32,
   2725                       binop(Iop_And32, mkexpr(x1), mkU32(c2)),
   2726                       mkU8(2)),
   2727                 binop(Iop_Shr32,
   2728                       binop(Iop_And32, mkexpr(x1), mkU32(~c2)),
   2729                       mkU8(2))
   2730    ));
   2731    assign(x3,
   2732           binop(Iop_Or32,
   2733                 binop(Iop_Shl32,
   2734                       binop(Iop_And32, mkexpr(x2), mkU32(c3)),
   2735                       mkU8(4)),
   2736                 binop(Iop_Shr32,
   2737                       binop(Iop_And32, mkexpr(x2), mkU32(~c3)),
   2738                       mkU8(4))
   2739    ));
   2740    assign(x4,
   2741           binop(Iop_Or32,
   2742                 binop(Iop_Shl32,
   2743                       binop(Iop_And32, mkexpr(x3), mkU32(c4)),
   2744                       mkU8(8)),
   2745                 binop(Iop_Shr32,
   2746                       binop(Iop_And32, mkexpr(x3), mkU32(~c4)),
   2747                       mkU8(8))
   2748    ));
   2749    assign(x5,
   2750           binop(Iop_Or32,
   2751                 binop(Iop_Shl32,
   2752                       binop(Iop_And32, mkexpr(x4), mkU32(c5)),
   2753                       mkU8(16)),
   2754                 binop(Iop_Shr32,
   2755                       binop(Iop_And32, mkexpr(x4), mkU32(~c5)),
   2756                       mkU8(16))
   2757    ));
   2758    return x5;
   2759 }
   2760 
   2761 
   2762 /* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
   2763    0:1:2:3 (aka byte-swap). */
   2764 static IRTemp gen_REV ( IRTemp arg )
   2765 {
   2766    IRTemp res = newTemp(Ity_I32);
   2767    assign(res,
   2768           binop(Iop_Or32,
   2769                 binop(Iop_Shl32, mkexpr(arg), mkU8(24)),
   2770           binop(Iop_Or32,
   2771                 binop(Iop_And32, binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
   2772                                  mkU32(0x00FF0000)),
   2773           binop(Iop_Or32,
   2774                 binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
   2775                                        mkU32(0x0000FF00)),
   2776                 binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(24)),
   2777                                        mkU32(0x000000FF) )
   2778    ))));
   2779    return res;
   2780 }
   2781 
   2782 
   2783 /* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
   2784    2:3:0:1 (swap within lo and hi halves). */
   2785 static IRTemp gen_REV16 ( IRTemp arg )
   2786 {
   2787    IRTemp res = newTemp(Ity_I32);
   2788    assign(res,
   2789           binop(Iop_Or32,
   2790                 binop(Iop_And32,
   2791                       binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
   2792                       mkU32(0xFF00FF00)),
   2793                 binop(Iop_And32,
   2794                       binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
   2795                       mkU32(0x00FF00FF))));
   2796    return res;
   2797 }
   2798 
   2799 
   2800 /*------------------------------------------------------------*/
   2801 /*--- Advanced SIMD (NEON) instructions                    ---*/
   2802 /*------------------------------------------------------------*/
   2803 
   2804 /*------------------------------------------------------------*/
   2805 /*--- NEON data processing                                 ---*/
   2806 /*------------------------------------------------------------*/
   2807 
   2808 /* For all NEON DP ops, we use the normal scheme to handle conditional
   2809    writes to registers -- pass in condT and hand that on to the
   2810    put*Reg functions.  In ARM mode condT is always IRTemp_INVALID
   2811    since NEON is unconditional for ARM.  In Thumb mode condT is
   2812    derived from the ITSTATE shift register in the normal way. */
   2813 
   2814 static
   2815 UInt get_neon_d_regno(UInt theInstr)
   2816 {
   2817    UInt x = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
   2818    if (theInstr & 0x40) {
   2819       if (x & 1) {
   2820          x = x + 0x100;
   2821       } else {
   2822          x = x >> 1;
   2823       }
   2824    }
   2825    return x;
   2826 }
   2827 
   2828 static
   2829 UInt get_neon_n_regno(UInt theInstr)
   2830 {
   2831    UInt x = ((theInstr >> 3) & 0x10) | ((theInstr >> 16) & 0xF);
   2832    if (theInstr & 0x40) {
   2833       if (x & 1) {
   2834          x = x + 0x100;
   2835       } else {
   2836          x = x >> 1;
   2837       }
   2838    }
   2839    return x;
   2840 }
   2841 
   2842 static
   2843 UInt get_neon_m_regno(UInt theInstr)
   2844 {
   2845    UInt x = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
   2846    if (theInstr & 0x40) {
   2847       if (x & 1) {
   2848          x = x + 0x100;
   2849       } else {
   2850          x = x >> 1;
   2851       }
   2852    }
   2853    return x;
   2854 }
   2855 
   2856 static
   2857 Bool dis_neon_vext ( UInt theInstr, IRTemp condT )
   2858 {
   2859    UInt dreg = get_neon_d_regno(theInstr);
   2860    UInt mreg = get_neon_m_regno(theInstr);
   2861    UInt nreg = get_neon_n_regno(theInstr);
   2862    UInt imm4 = (theInstr >> 8) & 0xf;
   2863    UInt Q = (theInstr >> 6) & 1;
   2864    HChar reg_t = Q ? 'q' : 'd';
   2865 
   2866    if (Q) {
   2867       putQReg(dreg, triop(Iop_ExtractV128, getQReg(nreg),
   2868                getQReg(mreg), mkU8(imm4)), condT);
   2869    } else {
   2870       putDRegI64(dreg, triop(Iop_Extract64, getDRegI64(nreg),
   2871                  getDRegI64(mreg), mkU8(imm4)), condT);
   2872    }
   2873    DIP("vext.8 %c%d, %c%d, %c%d, #%d\n", reg_t, dreg, reg_t, nreg,
   2874                                          reg_t, mreg, imm4);
   2875    return True;
   2876 }
   2877 
   2878 /* Generate specific vector FP binary ops, possibly with a fake
   2879    rounding mode as required by the primop. */
   2880 static
   2881 IRExpr* binop_w_fake_RM ( IROp op, IRExpr* argL, IRExpr* argR )
   2882 {
   2883    switch (op) {
   2884       case Iop_Add32Fx4:
   2885       case Iop_Sub32Fx4:
   2886       case Iop_Mul32Fx4:
   2887          return triop(op, get_FAKE_roundingmode(), argL, argR );
   2888       case Iop_Add32x4: case Iop_Add16x8:
   2889       case Iop_Sub32x4: case Iop_Sub16x8:
   2890       case Iop_Mul32x4: case Iop_Mul16x8:
   2891       case Iop_Mul32x2: case Iop_Mul16x4:
   2892       case Iop_Add32Fx2:
   2893       case Iop_Sub32Fx2:
   2894       case Iop_Mul32Fx2:
   2895       case Iop_PwAdd32Fx2:
   2896          return binop(op, argL, argR);
   2897       default:
   2898         ppIROp(op);
   2899         vassert(0);
   2900    }
   2901 }
   2902 
   2903 /* VTBL, VTBX */
   2904 static
   2905 Bool dis_neon_vtb ( UInt theInstr, IRTemp condT )
   2906 {
   2907    UInt op = (theInstr >> 6) & 1;
   2908    UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
   2909    UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
   2910    UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
   2911    UInt len = (theInstr >> 8) & 3;
   2912    Int i;
   2913    IROp cmp;
   2914    ULong imm;
   2915    IRTemp arg_l;
   2916    IRTemp old_mask, new_mask, cur_mask;
   2917    IRTemp old_res, new_res;
   2918    IRTemp old_arg, new_arg;
   2919 
   2920    if (dreg >= 0x100 || mreg >= 0x100 || nreg >= 0x100)
   2921       return False;
   2922    if (nreg + len > 31)
   2923       return False;
   2924 
   2925    cmp = Iop_CmpGT8Ux8;
   2926 
   2927    old_mask = newTemp(Ity_I64);
   2928    old_res = newTemp(Ity_I64);
   2929    old_arg = newTemp(Ity_I64);
   2930    assign(old_mask, mkU64(0));
   2931    assign(old_res, mkU64(0));
   2932    assign(old_arg, getDRegI64(mreg));
   2933    imm = 8;
   2934    imm = (imm <<  8) | imm;
   2935    imm = (imm << 16) | imm;
   2936    imm = (imm << 32) | imm;
   2937 
   2938    for (i = 0; i <= len; i++) {
   2939       arg_l = newTemp(Ity_I64);
   2940       new_mask = newTemp(Ity_I64);
   2941       cur_mask = newTemp(Ity_I64);
   2942       new_res = newTemp(Ity_I64);
   2943       new_arg = newTemp(Ity_I64);
   2944       assign(arg_l, getDRegI64(nreg+i));
   2945       assign(new_arg, binop(Iop_Sub8x8, mkexpr(old_arg), mkU64(imm)));
   2946       assign(cur_mask, binop(cmp, mkU64(imm), mkexpr(old_arg)));
   2947       assign(new_mask, binop(Iop_Or64, mkexpr(old_mask), mkexpr(cur_mask)));
   2948       assign(new_res, binop(Iop_Or64,
   2949                             mkexpr(old_res),
   2950                             binop(Iop_And64,
   2951                                   binop(Iop_Perm8x8,
   2952                                         mkexpr(arg_l),
   2953                                         binop(Iop_And64,
   2954                                               mkexpr(old_arg),
   2955                                               mkexpr(cur_mask))),
   2956                                   mkexpr(cur_mask))));
   2957 
   2958       old_arg = new_arg;
   2959       old_mask = new_mask;
   2960       old_res = new_res;
   2961    }
   2962    if (op) {
   2963       new_res = newTemp(Ity_I64);
   2964       assign(new_res, binop(Iop_Or64,
   2965                             binop(Iop_And64,
   2966                                   getDRegI64(dreg),
   2967                                   unop(Iop_Not64, mkexpr(old_mask))),
   2968                             mkexpr(old_res)));
   2969       old_res = new_res;
   2970    }
   2971 
   2972    putDRegI64(dreg, mkexpr(old_res), condT);
   2973    DIP("vtb%c.8 d%u, {", op ? 'x' : 'l', dreg);
   2974    if (len > 0) {
   2975       DIP("d%u-d%u", nreg, nreg + len);
   2976    } else {
   2977       DIP("d%u", nreg);
   2978    }
   2979    DIP("}, d%u\n", mreg);
   2980    return True;
   2981 }
   2982 
   2983 /* VDUP (scalar)  */
   2984 static
   2985 Bool dis_neon_vdup ( UInt theInstr, IRTemp condT )
   2986 {
   2987    UInt Q = (theInstr >> 6) & 1;
   2988    UInt dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
   2989    UInt mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
   2990    UInt imm4 = (theInstr >> 16) & 0xF;
   2991    UInt index;
   2992    UInt size;
   2993    IRTemp arg_m;
   2994    IRTemp res;
   2995    IROp op, op2;
   2996 
   2997    if ((imm4 == 0) || (imm4 == 8))
   2998       return False;
   2999    if ((Q == 1) && ((dreg & 1) == 1))
   3000       return False;
   3001    if (Q)
   3002       dreg >>= 1;
   3003    arg_m = newTemp(Ity_I64);
   3004    assign(arg_m, getDRegI64(mreg));
   3005    if (Q)
   3006       res = newTemp(Ity_V128);
   3007    else
   3008       res = newTemp(Ity_I64);
   3009    if ((imm4 & 1) == 1) {
   3010       op = Q ? Iop_Dup8x16 : Iop_Dup8x8;
   3011       op2 = Iop_GetElem8x8;
   3012       index = imm4 >> 1;
   3013       size = 8;
   3014    } else if ((imm4 & 3) == 2) {
   3015       op = Q ? Iop_Dup16x8 : Iop_Dup16x4;
   3016       op2 = Iop_GetElem16x4;
   3017       index = imm4 >> 2;
   3018       size = 16;
   3019    } else if ((imm4 & 7) == 4) {
   3020       op = Q ? Iop_Dup32x4 : Iop_Dup32x2;
   3021       op2 = Iop_GetElem32x2;
   3022       index = imm4 >> 3;
   3023       size = 32;
   3024    } else {
   3025       return False; // can this ever happen?
   3026    }
   3027    assign(res, unop(op, binop(op2, mkexpr(arg_m), mkU8(index))));
   3028    if (Q) {
   3029       putQReg(dreg, mkexpr(res), condT);
   3030    } else {
   3031       putDRegI64(dreg, mkexpr(res), condT);
   3032    }
   3033    DIP("vdup.%d %c%d, d%d[%d]\n", size, Q ? 'q' : 'd', dreg, mreg, index);
   3034    return True;
   3035 }
   3036 
   3037 /* A7.4.1 Three registers of the same length */
   3038 static
   3039 Bool dis_neon_data_3same ( UInt theInstr, IRTemp condT )
   3040 {
   3041    UInt Q = (theInstr >> 6) & 1;
   3042    UInt dreg = get_neon_d_regno(theInstr);
   3043    UInt nreg = get_neon_n_regno(theInstr);
   3044    UInt mreg = get_neon_m_regno(theInstr);
   3045    UInt A = (theInstr >> 8) & 0xF;
   3046    UInt B = (theInstr >> 4) & 1;
   3047    UInt C = (theInstr >> 20) & 0x3;
   3048    UInt U = (theInstr >> 24) & 1;
   3049    UInt size = C;
   3050 
   3051    IRTemp arg_n;
   3052    IRTemp arg_m;
   3053    IRTemp res;
   3054 
   3055    if (Q) {
   3056       arg_n = newTemp(Ity_V128);
   3057       arg_m = newTemp(Ity_V128);
   3058       res = newTemp(Ity_V128);
   3059       assign(arg_n, getQReg(nreg));
   3060       assign(arg_m, getQReg(mreg));
   3061    } else {
   3062       arg_n = newTemp(Ity_I64);
   3063       arg_m = newTemp(Ity_I64);
   3064       res = newTemp(Ity_I64);
   3065       assign(arg_n, getDRegI64(nreg));
   3066       assign(arg_m, getDRegI64(mreg));
   3067    }
   3068 
   3069    switch(A) {
   3070       case 0:
   3071          if (B == 0) {
   3072             /* VHADD */
   3073             ULong imm = 0;
   3074             IRExpr *imm_val;
   3075             IROp addOp;
   3076             IROp andOp;
   3077             IROp shOp;
   3078             HChar regType = Q ? 'q' : 'd';
   3079 
   3080             if (size == 3)
   3081                return False;
   3082             switch(size) {
   3083                case 0: imm = 0x101010101010101LL; break;
   3084                case 1: imm = 0x1000100010001LL; break;
   3085                case 2: imm = 0x100000001LL; break;
   3086                default: vassert(0);
   3087             }
   3088             if (Q) {
   3089                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
   3090                andOp = Iop_AndV128;
   3091             } else {
   3092                imm_val = mkU64(imm);
   3093                andOp = Iop_And64;
   3094             }
   3095             if (U) {
   3096                switch(size) {
   3097                   case 0:
   3098                      addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
   3099                      shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3100                      break;
   3101                   case 1:
   3102                      addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
   3103                      shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3104                      break;
   3105                   case 2:
   3106                      addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
   3107                      shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3108                      break;
   3109                   default:
   3110                      vassert(0);
   3111                }
   3112             } else {
   3113                switch(size) {
   3114                   case 0:
   3115                      addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
   3116                      shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
   3117                      break;
   3118                   case 1:
   3119                      addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
   3120                      shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
   3121                      break;
   3122                   case 2:
   3123                      addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
   3124                      shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
   3125                      break;
   3126                   default:
   3127                      vassert(0);
   3128                }
   3129             }
   3130             assign(res,
   3131                    binop(addOp,
   3132                          binop(addOp,
   3133                                binop(shOp, mkexpr(arg_m), mkU8(1)),
   3134                                binop(shOp, mkexpr(arg_n), mkU8(1))),
   3135                          binop(shOp,
   3136                                binop(addOp,
   3137                                      binop(andOp, mkexpr(arg_m), imm_val),
   3138                                      binop(andOp, mkexpr(arg_n), imm_val)),
   3139                                mkU8(1))));
   3140             DIP("vhadd.%c%d %c%d, %c%d, %c%d\n",
   3141                 U ? 'u' : 's', 8 << size, regType,
   3142                 dreg, regType, nreg, regType, mreg);
   3143          } else {
   3144             /* VQADD */
   3145             IROp op, op2;
   3146             IRTemp tmp;
   3147             HChar reg_t = Q ? 'q' : 'd';
   3148             if (Q) {
   3149                switch (size) {
   3150                   case 0:
   3151                      op = U ? Iop_QAdd8Ux16 : Iop_QAdd8Sx16;
   3152                      op2 = Iop_Add8x16;
   3153                      break;
   3154                   case 1:
   3155                      op = U ? Iop_QAdd16Ux8 : Iop_QAdd16Sx8;
   3156                      op2 = Iop_Add16x8;
   3157                      break;
   3158                   case 2:
   3159                      op = U ? Iop_QAdd32Ux4 : Iop_QAdd32Sx4;
   3160                      op2 = Iop_Add32x4;
   3161                      break;
   3162                   case 3:
   3163                      op = U ? Iop_QAdd64Ux2 : Iop_QAdd64Sx2;
   3164                      op2 = Iop_Add64x2;
   3165                      break;
   3166                   default:
   3167                      vassert(0);
   3168                }
   3169             } else {
   3170                switch (size) {
   3171                   case 0:
   3172                      op = U ? Iop_QAdd8Ux8 : Iop_QAdd8Sx8;
   3173                      op2 = Iop_Add8x8;
   3174                      break;
   3175                   case 1:
   3176                      op = U ? Iop_QAdd16Ux4 : Iop_QAdd16Sx4;
   3177                      op2 = Iop_Add16x4;
   3178                      break;
   3179                   case 2:
   3180                      op = U ? Iop_QAdd32Ux2 : Iop_QAdd32Sx2;
   3181                      op2 = Iop_Add32x2;
   3182                      break;
   3183                   case 3:
   3184                      op = U ? Iop_QAdd64Ux1 : Iop_QAdd64Sx1;
   3185                      op2 = Iop_Add64;
   3186                      break;
   3187                   default:
   3188                      vassert(0);
   3189                }
   3190             }
   3191             if (Q) {
   3192                tmp = newTemp(Ity_V128);
   3193             } else {
   3194                tmp = newTemp(Ity_I64);
   3195             }
   3196             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   3197             assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
   3198             setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
   3199             DIP("vqadd.%c%d %c%d, %c%d, %c%d\n",
   3200                 U ? 'u' : 's',
   3201                 8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3202          }
   3203          break;
   3204       case 1:
   3205          if (B == 0) {
   3206             /* VRHADD */
   3207             /* VRHADD C, A, B ::=
   3208                  C = (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1) */
   3209             IROp shift_op, add_op;
   3210             IRTemp cc;
   3211             ULong one = 1;
   3212             HChar reg_t = Q ? 'q' : 'd';
   3213             switch (size) {
   3214                case 0: one = (one <<  8) | one; /* fall through */
   3215                case 1: one = (one << 16) | one; /* fall through */
   3216                case 2: one = (one << 32) | one; break;
   3217                case 3: return False;
   3218                default: vassert(0);
   3219             }
   3220             if (Q) {
   3221                switch (size) {
   3222                   case 0:
   3223                      shift_op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
   3224                      add_op = Iop_Add8x16;
   3225                      break;
   3226                   case 1:
   3227                      shift_op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
   3228                      add_op = Iop_Add16x8;
   3229                      break;
   3230                   case 2:
   3231                      shift_op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
   3232                      add_op = Iop_Add32x4;
   3233                      break;
   3234                   case 3:
   3235                      return False;
   3236                   default:
   3237                      vassert(0);
   3238                }
   3239             } else {
   3240                switch (size) {
   3241                   case 0:
   3242                      shift_op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
   3243                      add_op = Iop_Add8x8;
   3244                      break;
   3245                   case 1:
   3246                      shift_op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
   3247                      add_op = Iop_Add16x4;
   3248                      break;
   3249                   case 2:
   3250                      shift_op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
   3251                      add_op = Iop_Add32x2;
   3252                      break;
   3253                   case 3:
   3254                      return False;
   3255                   default:
   3256                      vassert(0);
   3257                }
   3258             }
   3259             if (Q) {
   3260                cc = newTemp(Ity_V128);
   3261                assign(cc, binop(shift_op,
   3262                                 binop(add_op,
   3263                                       binop(add_op,
   3264                                             binop(Iop_AndV128,
   3265                                                   mkexpr(arg_n),
   3266                                                   binop(Iop_64HLtoV128,
   3267                                                         mkU64(one),
   3268                                                         mkU64(one))),
   3269                                             binop(Iop_AndV128,
   3270                                                   mkexpr(arg_m),
   3271                                                   binop(Iop_64HLtoV128,
   3272                                                         mkU64(one),
   3273                                                         mkU64(one)))),
   3274                                       binop(Iop_64HLtoV128,
   3275                                             mkU64(one),
   3276                                             mkU64(one))),
   3277                                 mkU8(1)));
   3278                assign(res, binop(add_op,
   3279                                  binop(add_op,
   3280                                        binop(shift_op,
   3281                                              mkexpr(arg_n),
   3282                                              mkU8(1)),
   3283                                        binop(shift_op,
   3284                                              mkexpr(arg_m),
   3285                                              mkU8(1))),
   3286                                  mkexpr(cc)));
   3287             } else {
   3288                cc = newTemp(Ity_I64);
   3289                assign(cc, binop(shift_op,
   3290                                 binop(add_op,
   3291                                       binop(add_op,
   3292                                             binop(Iop_And64,
   3293                                                   mkexpr(arg_n),
   3294                                                   mkU64(one)),
   3295                                             binop(Iop_And64,
   3296                                                   mkexpr(arg_m),
   3297                                                   mkU64(one))),
   3298                                       mkU64(one)),
   3299                                 mkU8(1)));
   3300                assign(res, binop(add_op,
   3301                                  binop(add_op,
   3302                                        binop(shift_op,
   3303                                              mkexpr(arg_n),
   3304                                              mkU8(1)),
   3305                                        binop(shift_op,
   3306                                              mkexpr(arg_m),
   3307                                              mkU8(1))),
   3308                                  mkexpr(cc)));
   3309             }
   3310             DIP("vrhadd.%c%d %c%d, %c%d, %c%d\n",
   3311                 U ? 'u' : 's',
   3312                 8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3313          } else {
   3314             if (U == 0)  {
   3315                switch(C) {
   3316                   case 0: {
   3317                      /* VAND  */
   3318                      HChar reg_t = Q ? 'q' : 'd';
   3319                      if (Q) {
   3320                         assign(res, binop(Iop_AndV128, mkexpr(arg_n),
   3321                                                        mkexpr(arg_m)));
   3322                      } else {
   3323                         assign(res, binop(Iop_And64, mkexpr(arg_n),
   3324                                                      mkexpr(arg_m)));
   3325                      }
   3326                      DIP("vand %c%d, %c%d, %c%d\n",
   3327                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3328                      break;
   3329                   }
   3330                   case 1: {
   3331                      /* VBIC  */
   3332                      HChar reg_t = Q ? 'q' : 'd';
   3333                      if (Q) {
   3334                         assign(res, binop(Iop_AndV128,mkexpr(arg_n),
   3335                                unop(Iop_NotV128, mkexpr(arg_m))));
   3336                      } else {
   3337                         assign(res, binop(Iop_And64, mkexpr(arg_n),
   3338                                unop(Iop_Not64, mkexpr(arg_m))));
   3339                      }
   3340                      DIP("vbic %c%d, %c%d, %c%d\n",
   3341                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3342                      break;
   3343                   }
   3344                   case 2:
   3345                      if ( nreg != mreg) {
   3346                         /* VORR  */
   3347                         HChar reg_t = Q ? 'q' : 'd';
   3348                         if (Q) {
   3349                            assign(res, binop(Iop_OrV128, mkexpr(arg_n),
   3350                                                          mkexpr(arg_m)));
   3351                         } else {
   3352                            assign(res, binop(Iop_Or64, mkexpr(arg_n),
   3353                                                        mkexpr(arg_m)));
   3354                         }
   3355                         DIP("vorr %c%d, %c%d, %c%d\n",
   3356                             reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3357                      } else {
   3358                         /* VMOV  */
   3359                         HChar reg_t = Q ? 'q' : 'd';
   3360                         assign(res, mkexpr(arg_m));
   3361                         DIP("vmov %c%d, %c%d\n", reg_t, dreg, reg_t, mreg);
   3362                      }
   3363                      break;
   3364                   case 3:{
   3365                      /* VORN  */
   3366                      HChar reg_t = Q ? 'q' : 'd';
   3367                      if (Q) {
   3368                         assign(res, binop(Iop_OrV128,mkexpr(arg_n),
   3369                                unop(Iop_NotV128, mkexpr(arg_m))));
   3370                      } else {
   3371                         assign(res, binop(Iop_Or64, mkexpr(arg_n),
   3372                                unop(Iop_Not64, mkexpr(arg_m))));
   3373                      }
   3374                      DIP("vorn %c%d, %c%d, %c%d\n",
   3375                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3376                      break;
   3377                   }
   3378                }
   3379             } else {
   3380                switch(C) {
   3381                   case 0:
   3382                      /* VEOR (XOR)  */
   3383                      if (Q) {
   3384                         assign(res, binop(Iop_XorV128, mkexpr(arg_n),
   3385                                                        mkexpr(arg_m)));
   3386                      } else {
   3387                         assign(res, binop(Iop_Xor64, mkexpr(arg_n),
   3388                                                      mkexpr(arg_m)));
   3389                      }
   3390                      DIP("veor %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
   3391                            Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   3392                      break;
   3393                   case 1:
   3394                      /* VBSL  */
   3395                      if (Q) {
   3396                         IRTemp reg_d = newTemp(Ity_V128);
   3397                         assign(reg_d, getQReg(dreg));
   3398                         assign(res,
   3399                                binop(Iop_OrV128,
   3400                                      binop(Iop_AndV128, mkexpr(arg_n),
   3401                                                         mkexpr(reg_d)),
   3402                                      binop(Iop_AndV128,
   3403                                            mkexpr(arg_m),
   3404                                            unop(Iop_NotV128,
   3405                                                  mkexpr(reg_d)) ) ) );
   3406                      } else {
   3407                         IRTemp reg_d = newTemp(Ity_I64);
   3408                         assign(reg_d, getDRegI64(dreg));
   3409                         assign(res,
   3410                                binop(Iop_Or64,
   3411                                      binop(Iop_And64, mkexpr(arg_n),
   3412                                                       mkexpr(reg_d)),
   3413                                      binop(Iop_And64,
   3414                                            mkexpr(arg_m),
   3415                                            unop(Iop_Not64, mkexpr(reg_d)))));
   3416                      }
   3417                      DIP("vbsl %c%u, %c%u, %c%u\n",
   3418                          Q ? 'q' : 'd', dreg,
   3419                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   3420                      break;
   3421                   case 2:
   3422                      /* VBIT  */
   3423                      if (Q) {
   3424                         IRTemp reg_d = newTemp(Ity_V128);
   3425                         assign(reg_d, getQReg(dreg));
   3426                         assign(res,
   3427                                binop(Iop_OrV128,
   3428                                      binop(Iop_AndV128, mkexpr(arg_n),
   3429                                                         mkexpr(arg_m)),
   3430                                      binop(Iop_AndV128,
   3431                                            mkexpr(reg_d),
   3432                                            unop(Iop_NotV128, mkexpr(arg_m)))));
   3433                      } else {
   3434                         IRTemp reg_d = newTemp(Ity_I64);
   3435                         assign(reg_d, getDRegI64(dreg));
   3436                         assign(res,
   3437                                binop(Iop_Or64,
   3438                                      binop(Iop_And64, mkexpr(arg_n),
   3439                                                       mkexpr(arg_m)),
   3440                                      binop(Iop_And64,
   3441                                            mkexpr(reg_d),
   3442                                            unop(Iop_Not64, mkexpr(arg_m)))));
   3443                      }
   3444                      DIP("vbit %c%u, %c%u, %c%u\n",
   3445                          Q ? 'q' : 'd', dreg,
   3446                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   3447                      break;
   3448                   case 3:
   3449                      /* VBIF  */
   3450                      if (Q) {
   3451                         IRTemp reg_d = newTemp(Ity_V128);
   3452                         assign(reg_d, getQReg(dreg));
   3453                         assign(res,
   3454                                binop(Iop_OrV128,
   3455                                      binop(Iop_AndV128, mkexpr(reg_d),
   3456                                                         mkexpr(arg_m)),
   3457                                      binop(Iop_AndV128,
   3458                                            mkexpr(arg_n),
   3459                                            unop(Iop_NotV128, mkexpr(arg_m)))));
   3460                      } else {
   3461                         IRTemp reg_d = newTemp(Ity_I64);
   3462                         assign(reg_d, getDRegI64(dreg));
   3463                         assign(res,
   3464                                binop(Iop_Or64,
   3465                                      binop(Iop_And64, mkexpr(reg_d),
   3466                                                       mkexpr(arg_m)),
   3467                                      binop(Iop_And64,
   3468                                            mkexpr(arg_n),
   3469                                            unop(Iop_Not64, mkexpr(arg_m)))));
   3470                      }
   3471                      DIP("vbif %c%u, %c%u, %c%u\n",
   3472                          Q ? 'q' : 'd', dreg,
   3473                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   3474                      break;
   3475                }
   3476             }
   3477          }
   3478          break;
   3479       case 2:
   3480          if (B == 0) {
   3481             /* VHSUB */
   3482             /* (A >> 1) - (B >> 1) - (NOT (A) & B & 1)   */
   3483             ULong imm = 0;
   3484             IRExpr *imm_val;
   3485             IROp subOp;
   3486             IROp notOp;
   3487             IROp andOp;
   3488             IROp shOp;
   3489             if (size == 3)
   3490                return False;
   3491             switch(size) {
   3492                case 0: imm = 0x101010101010101LL; break;
   3493                case 1: imm = 0x1000100010001LL; break;
   3494                case 2: imm = 0x100000001LL; break;
   3495                default: vassert(0);
   3496             }
   3497             if (Q) {
   3498                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
   3499                andOp = Iop_AndV128;
   3500                notOp = Iop_NotV128;
   3501             } else {
   3502                imm_val = mkU64(imm);
   3503                andOp = Iop_And64;
   3504                notOp = Iop_Not64;
   3505             }
   3506             if (U) {
   3507                switch(size) {
   3508                   case 0:
   3509                      subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   3510                      shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3511                      break;
   3512                   case 1:
   3513                      subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   3514                      shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3515                      break;
   3516                   case 2:
   3517                      subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   3518                      shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3519                      break;
   3520                   default:
   3521                      vassert(0);
   3522                }
   3523             } else {
   3524                switch(size) {
   3525                   case 0:
   3526                      subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   3527                      shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
   3528                      break;
   3529                   case 1:
   3530                      subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   3531                      shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
   3532                      break;
   3533                   case 2:
   3534                      subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   3535                      shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
   3536                      break;
   3537                   default:
   3538                      vassert(0);
   3539                }
   3540             }
   3541             assign(res,
   3542                    binop(subOp,
   3543                          binop(subOp,
   3544                                binop(shOp, mkexpr(arg_n), mkU8(1)),
   3545                                binop(shOp, mkexpr(arg_m), mkU8(1))),
   3546                          binop(andOp,
   3547                                binop(andOp,
   3548                                      unop(notOp, mkexpr(arg_n)),
   3549                                      mkexpr(arg_m)),
   3550                                imm_val)));
   3551             DIP("vhsub.%c%u %c%u, %c%u, %c%u\n",
   3552                 U ? 'u' : 's', 8 << size,
   3553                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   3554                 mreg);
   3555          } else {
   3556             /* VQSUB */
   3557             IROp op, op2;
   3558             IRTemp tmp;
   3559             if (Q) {
   3560                switch (size) {
   3561                   case 0:
   3562                      op = U ? Iop_QSub8Ux16 : Iop_QSub8Sx16;
   3563                      op2 = Iop_Sub8x16;
   3564                      break;
   3565                   case 1:
   3566                      op = U ? Iop_QSub16Ux8 : Iop_QSub16Sx8;
   3567                      op2 = Iop_Sub16x8;
   3568                      break;
   3569                   case 2:
   3570                      op = U ? Iop_QSub32Ux4 : Iop_QSub32Sx4;
   3571                      op2 = Iop_Sub32x4;
   3572                      break;
   3573                   case 3:
   3574                      op = U ? Iop_QSub64Ux2 : Iop_QSub64Sx2;
   3575                      op2 = Iop_Sub64x2;
   3576                      break;
   3577                   default:
   3578                      vassert(0);
   3579                }
   3580             } else {
   3581                switch (size) {
   3582                   case 0:
   3583                      op = U ? Iop_QSub8Ux8 : Iop_QSub8Sx8;
   3584                      op2 = Iop_Sub8x8;
   3585                      break;
   3586                   case 1:
   3587                      op = U ? Iop_QSub16Ux4 : Iop_QSub16Sx4;
   3588                      op2 = Iop_Sub16x4;
   3589                      break;
   3590                   case 2:
   3591                      op = U ? Iop_QSub32Ux2 : Iop_QSub32Sx2;
   3592                      op2 = Iop_Sub32x2;
   3593                      break;
   3594                   case 3:
   3595                      op = U ? Iop_QSub64Ux1 : Iop_QSub64Sx1;
   3596                      op2 = Iop_Sub64;
   3597                      break;
   3598                   default:
   3599                      vassert(0);
   3600                }
   3601             }
   3602             if (Q)
   3603                tmp = newTemp(Ity_V128);
   3604             else
   3605                tmp = newTemp(Ity_I64);
   3606             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   3607             assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
   3608             setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
   3609             DIP("vqsub.%c%u %c%u, %c%u, %c%u\n",
   3610                 U ? 'u' : 's', 8 << size,
   3611                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   3612                 mreg);
   3613          }
   3614          break;
   3615       case 3: {
   3616             IROp op;
   3617             if (Q) {
   3618                switch (size) {
   3619                   case 0: op = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16; break;
   3620                   case 1: op = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8; break;
   3621                   case 2: op = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4; break;
   3622                   case 3: return False;
   3623                   default: vassert(0);
   3624                }
   3625             } else {
   3626                switch (size) {
   3627                   case 0: op = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8; break;
   3628                   case 1: op = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4; break;
   3629                   case 2: op = U ? Iop_CmpGT32Ux2: Iop_CmpGT32Sx2; break;
   3630                   case 3: return False;
   3631                   default: vassert(0);
   3632                }
   3633             }
   3634             if (B == 0) {
   3635                /* VCGT  */
   3636                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   3637                DIP("vcgt.%c%u %c%u, %c%u, %c%u\n",
   3638                    U ? 'u' : 's', 8 << size,
   3639                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   3640                    mreg);
   3641             } else {
   3642                /* VCGE  */
   3643                /* VCGE res, argn, argm
   3644                     is equal to
   3645                   VCGT tmp, argm, argn
   3646                   VNOT res, tmp */
   3647                assign(res,
   3648                       unop(Q ? Iop_NotV128 : Iop_Not64,
   3649                            binop(op, mkexpr(arg_m), mkexpr(arg_n))));
   3650                DIP("vcge.%c%u %c%u, %c%u, %c%u\n",
   3651                    U ? 'u' : 's', 8 << size,
   3652                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   3653                    mreg);
   3654             }
   3655          }
   3656          break;
   3657       case 4:
   3658          if (B == 0) {
   3659             /* VSHL */
   3660             IROp op, sub_op;
   3661             IRTemp tmp;
   3662             if (U) {
   3663                switch (size) {
   3664                   case 0: op = Q ? Iop_Shl8x16 : Iop_Shl8x8; break;
   3665                   case 1: op = Q ? Iop_Shl16x8 : Iop_Shl16x4; break;
   3666                   case 2: op = Q ? Iop_Shl32x4 : Iop_Shl32x2; break;
   3667                   case 3: op = Q ? Iop_Shl64x2 : Iop_Shl64; break;
   3668                   default: vassert(0);
   3669                }
   3670             } else {
   3671                tmp = newTemp(Q ? Ity_V128 : Ity_I64);
   3672                switch (size) {
   3673                   case 0:
   3674                      op = Q ? Iop_Sar8x16 : Iop_Sar8x8;
   3675                      sub_op = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   3676                      break;
   3677                   case 1:
   3678                      op = Q ? Iop_Sar16x8 : Iop_Sar16x4;
   3679                      sub_op = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   3680                      break;
   3681                   case 2:
   3682                      op = Q ? Iop_Sar32x4 : Iop_Sar32x2;
   3683                      sub_op = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   3684                      break;
   3685                   case 3:
   3686                      op = Q ? Iop_Sar64x2 : Iop_Sar64;
   3687                      sub_op = Q ? Iop_Sub64x2 : Iop_Sub64;
   3688                      break;
   3689                   default:
   3690                      vassert(0);
   3691                }
   3692             }
   3693             if (U) {
   3694                if (!Q && (size == 3))
   3695                   assign(res, binop(op, mkexpr(arg_m),
   3696                                         unop(Iop_64to8, mkexpr(arg_n))));
   3697                else
   3698                   assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
   3699             } else {
   3700                if (Q)
   3701                   assign(tmp, binop(sub_op,
   3702                                     binop(Iop_64HLtoV128, mkU64(0), mkU64(0)),
   3703                                     mkexpr(arg_n)));
   3704                else
   3705                   assign(tmp, binop(sub_op, mkU64(0), mkexpr(arg_n)));
   3706                if (!Q && (size == 3))
   3707                   assign(res, binop(op, mkexpr(arg_m),
   3708                                         unop(Iop_64to8, mkexpr(tmp))));
   3709                else
   3710                   assign(res, binop(op, mkexpr(arg_m), mkexpr(tmp)));
   3711             }
   3712             DIP("vshl.%c%u %c%u, %c%u, %c%u\n",
   3713                 U ? 'u' : 's', 8 << size,
   3714                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
   3715                 nreg);
   3716          } else {
   3717             /* VQSHL */
   3718             IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt;
   3719             IRTemp tmp, shval, mask, old_shval;
   3720             UInt i;
   3721             ULong esize;
   3722             cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
   3723             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
   3724             if (U) {
   3725                switch (size) {
   3726                   case 0:
   3727                      op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
   3728                      op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
   3729                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3730                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   3731                      break;
   3732                   case 1:
   3733                      op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
   3734                      op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
   3735                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3736                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   3737                      break;
   3738                   case 2:
   3739                      op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
   3740                      op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
   3741                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3742                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   3743                      break;
   3744                   case 3:
   3745                      op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
   3746                      op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
   3747                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   3748                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   3749                      break;
   3750                   default:
   3751                      vassert(0);
   3752                }
   3753             } else {
   3754                switch (size) {
   3755                   case 0:
   3756                      op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
   3757                      op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
   3758                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3759                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   3760                      break;
   3761                   case 1:
   3762                      op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
   3763                      op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
   3764                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3765                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   3766                      break;
   3767                   case 2:
   3768                      op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
   3769                      op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
   3770                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3771                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   3772                      break;
   3773                   case 3:
   3774                      op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
   3775                      op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
   3776                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   3777                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   3778                      break;
   3779                   default:
   3780                      vassert(0);
   3781                }
   3782             }
   3783             if (Q) {
   3784                tmp = newTemp(Ity_V128);
   3785                shval = newTemp(Ity_V128);
   3786                mask = newTemp(Ity_V128);
   3787             } else {
   3788                tmp = newTemp(Ity_I64);
   3789                shval = newTemp(Ity_I64);
   3790                mask = newTemp(Ity_I64);
   3791             }
   3792             assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
   3793             /* Only least significant byte from second argument is used.
   3794                Copy this byte to the whole vector element. */
   3795             assign(shval, binop(op_shrn,
   3796                                 binop(op_shln,
   3797                                        mkexpr(arg_n),
   3798                                        mkU8((8 << size) - 8)),
   3799                                 mkU8((8 << size) - 8)));
   3800             for(i = 0; i < size; i++) {
   3801                old_shval = shval;
   3802                shval = newTemp(Q ? Ity_V128 : Ity_I64);
   3803                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
   3804                                    mkexpr(old_shval),
   3805                                    binop(op_shln,
   3806                                          mkexpr(old_shval),
   3807                                          mkU8(8 << i))));
   3808             }
   3809             /* If shift is greater or equal to the element size and
   3810                element is non-zero, then QC flag should be set. */
   3811             esize = (8 << size) - 1;
   3812             esize = (esize <<  8) | esize;
   3813             esize = (esize << 16) | esize;
   3814             esize = (esize << 32) | esize;
   3815             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   3816                              binop(cmp_gt, mkexpr(shval),
   3817                                            Q ? mkU128(esize) : mkU64(esize)),
   3818                              unop(cmp_neq, mkexpr(arg_m))),
   3819                        Q ? mkU128(0) : mkU64(0),
   3820                        Q, condT);
   3821             /* Othervise QC flag should be set if shift value is positive and
   3822                result beign rightshifted the same value is not equal to left
   3823                argument. */
   3824             assign(mask, binop(cmp_gt, mkexpr(shval),
   3825                                        Q ? mkU128(0) : mkU64(0)));
   3826             if (!Q && size == 3)
   3827                assign(tmp, binop(op_rev, mkexpr(res),
   3828                                          unop(Iop_64to8, mkexpr(arg_n))));
   3829             else
   3830                assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
   3831             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   3832                              mkexpr(tmp), mkexpr(mask)),
   3833                        binop(Q ? Iop_AndV128 : Iop_And64,
   3834                              mkexpr(arg_m), mkexpr(mask)),
   3835                        Q, condT);
   3836             DIP("vqshl.%c%u %c%u, %c%u, %c%u\n",
   3837                 U ? 'u' : 's', 8 << size,
   3838                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
   3839                 nreg);
   3840          }
   3841          break;
   3842       case 5:
   3843          if (B == 0) {
   3844             /* VRSHL */
   3845             IROp op, op_shrn, op_shln, cmp_gt, op_add;
   3846             IRTemp shval, old_shval, imm_val, round;
   3847             UInt i;
   3848             ULong imm;
   3849             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
   3850             imm = 1L;
   3851             switch (size) {
   3852                case 0: imm = (imm <<  8) | imm; /* fall through */
   3853                case 1: imm = (imm << 16) | imm; /* fall through */
   3854                case 2: imm = (imm << 32) | imm; /* fall through */
   3855                case 3: break;
   3856                default: vassert(0);
   3857             }
   3858             imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
   3859             round = newTemp(Q ? Ity_V128 : Ity_I64);
   3860             assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
   3861             if (U) {
   3862                switch (size) {
   3863                   case 0:
   3864                      op = Q ? Iop_Shl8x16 : Iop_Shl8x8;
   3865                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
   3866                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3867                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   3868                      break;
   3869                   case 1:
   3870                      op = Q ? Iop_Shl16x8 : Iop_Shl16x4;
   3871                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
   3872                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3873                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   3874                      break;
   3875                   case 2:
   3876                      op = Q ? Iop_Shl32x4 : Iop_Shl32x2;
   3877                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
   3878                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3879                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   3880                      break;
   3881                   case 3:
   3882                      op = Q ? Iop_Shl64x2 : Iop_Shl64;
   3883                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
   3884                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   3885                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   3886                      break;
   3887                   default:
   3888                      vassert(0);
   3889                }
   3890             } else {
   3891                switch (size) {
   3892                   case 0:
   3893                      op = Q ? Iop_Sal8x16 : Iop_Sal8x8;
   3894                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
   3895                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3896                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   3897                      break;
   3898                   case 1:
   3899                      op = Q ? Iop_Sal16x8 : Iop_Sal16x4;
   3900                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
   3901                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3902                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   3903                      break;
   3904                   case 2:
   3905                      op = Q ? Iop_Sal32x4 : Iop_Sal32x2;
   3906                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
   3907                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3908                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   3909                      break;
   3910                   case 3:
   3911                      op = Q ? Iop_Sal64x2 : Iop_Sal64x1;
   3912                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
   3913                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   3914                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   3915                      break;
   3916                   default:
   3917                      vassert(0);
   3918                }
   3919             }
   3920             if (Q) {
   3921                shval = newTemp(Ity_V128);
   3922             } else {
   3923                shval = newTemp(Ity_I64);
   3924             }
   3925             /* Only least significant byte from second argument is used.
   3926                Copy this byte to the whole vector element. */
   3927             assign(shval, binop(op_shrn,
   3928                                 binop(op_shln,
   3929                                        mkexpr(arg_n),
   3930                                        mkU8((8 << size) - 8)),
   3931                                 mkU8((8 << size) - 8)));
   3932             for (i = 0; i < size; i++) {
   3933                old_shval = shval;
   3934                shval = newTemp(Q ? Ity_V128 : Ity_I64);
   3935                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
   3936                                    mkexpr(old_shval),
   3937                                    binop(op_shln,
   3938                                          mkexpr(old_shval),
   3939                                          mkU8(8 << i))));
   3940             }
   3941             /* Compute the result */
   3942             if (!Q && size == 3 && U) {
   3943                assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
   3944                                    binop(op,
   3945                                          mkexpr(arg_m),
   3946                                          unop(Iop_64to8,
   3947                                               binop(op_add,
   3948                                                     mkexpr(arg_n),
   3949                                                     mkexpr(imm_val)))),
   3950                                    binop(Q ? Iop_AndV128 : Iop_And64,
   3951                                          mkexpr(imm_val),
   3952                                          binop(cmp_gt,
   3953                                                Q ? mkU128(0) : mkU64(0),
   3954                                                mkexpr(arg_n)))));
   3955                assign(res, binop(op_add,
   3956                                  binop(op,
   3957                                        mkexpr(arg_m),
   3958                                        unop(Iop_64to8, mkexpr(arg_n))),
   3959                                  mkexpr(round)));
   3960             } else {
   3961                assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
   3962                                    binop(op,
   3963                                          mkexpr(arg_m),
   3964                                          binop(op_add,
   3965                                                mkexpr(arg_n),
   3966                                                mkexpr(imm_val))),
   3967                                    binop(Q ? Iop_AndV128 : Iop_And64,
   3968                                          mkexpr(imm_val),
   3969                                          binop(cmp_gt,
   3970                                                Q ? mkU128(0) : mkU64(0),
   3971                                                mkexpr(arg_n)))));
   3972                assign(res, binop(op_add,
   3973                                  binop(op, mkexpr(arg_m), mkexpr(arg_n)),
   3974                                  mkexpr(round)));
   3975             }
   3976             DIP("vrshl.%c%u %c%u, %c%u, %c%u\n",
   3977                 U ? 'u' : 's', 8 << size,
   3978                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
   3979                 nreg);
   3980          } else {
   3981             /* VQRSHL */
   3982             IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt, op_add;
   3983             IRTemp tmp, shval, mask, old_shval, imm_val, round;
   3984             UInt i;
   3985             ULong esize, imm;
   3986             cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
   3987             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
   3988             imm = 1L;
   3989             switch (size) {
   3990                case 0: imm = (imm <<  8) | imm; /* fall through */
   3991                case 1: imm = (imm << 16) | imm; /* fall through */
   3992                case 2: imm = (imm << 32) | imm; /* fall through */
   3993                case 3: break;
   3994                default: vassert(0);
   3995             }
   3996             imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
   3997             round = newTemp(Q ? Ity_V128 : Ity_I64);
   3998             assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
   3999             if (U) {
   4000                switch (size) {
   4001                   case 0:
   4002                      op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
   4003                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
   4004                      op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
   4005                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   4006                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   4007                      break;
   4008                   case 1:
   4009                      op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
   4010                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
   4011                      op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
   4012                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   4013                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   4014                      break;
   4015                   case 2:
   4016                      op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
   4017                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
   4018                      op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
   4019                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   4020                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   4021                      break;
   4022                   case 3:
   4023                      op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
   4024                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
   4025                      op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
   4026                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   4027                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   4028                      break;
   4029                   default:
   4030                      vassert(0);
   4031                }
   4032             } else {
   4033                switch (size) {
   4034                   case 0:
   4035                      op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
   4036                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
   4037                      op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
   4038                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   4039                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   4040                      break;
   4041                   case 1:
   4042                      op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
   4043                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
   4044                      op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
   4045                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   4046                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   4047                      break;
   4048                   case 2:
   4049                      op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
   4050                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
   4051                      op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
   4052                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   4053                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   4054                      break;
   4055                   case 3:
   4056                      op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
   4057                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
   4058                      op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
   4059                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   4060                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   4061                      break;
   4062                   default:
   4063                      vassert(0);
   4064                }
   4065             }
   4066             if (Q) {
   4067                tmp = newTemp(Ity_V128);
   4068                shval = newTemp(Ity_V128);
   4069                mask = newTemp(Ity_V128);
   4070             } else {
   4071                tmp = newTemp(Ity_I64);
   4072                shval = newTemp(Ity_I64);
   4073                mask = newTemp(Ity_I64);
   4074             }
   4075             /* Only least significant byte from second argument is used.
   4076                Copy this byte to the whole vector element. */
   4077             assign(shval, binop(op_shrn,
   4078                                 binop(op_shln,
   4079                                        mkexpr(arg_n),
   4080                                        mkU8((8 << size) - 8)),
   4081                                 mkU8((8 << size) - 8)));
   4082             for (i = 0; i < size; i++) {
   4083                old_shval = shval;
   4084                shval = newTemp(Q ? Ity_V128 : Ity_I64);
   4085                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
   4086                                    mkexpr(old_shval),
   4087                                    binop(op_shln,
   4088                                          mkexpr(old_shval),
   4089                                          mkU8(8 << i))));
   4090             }
   4091             /* Compute the result */
   4092             assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
   4093                                 binop(op,
   4094                                       mkexpr(arg_m),
   4095                                       binop(op_add,
   4096                                             mkexpr(arg_n),
   4097                                             mkexpr(imm_val))),
   4098                                 binop(Q ? Iop_AndV128 : Iop_And64,
   4099                                       mkexpr(imm_val),
   4100                                       binop(cmp_gt,
   4101                                             Q ? mkU128(0) : mkU64(0),
   4102                                             mkexpr(arg_n)))));
   4103             assign(res, binop(op_add,
   4104                               binop(op, mkexpr(arg_m), mkexpr(arg_n)),
   4105                               mkexpr(round)));
   4106             /* If shift is greater or equal to the element size and element is
   4107                non-zero, then QC flag should be set. */
   4108             esize = (8 << size) - 1;
   4109             esize = (esize <<  8) | esize;
   4110             esize = (esize << 16) | esize;
   4111             esize = (esize << 32) | esize;
   4112             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   4113                              binop(cmp_gt, mkexpr(shval),
   4114                                            Q ? mkU128(esize) : mkU64(esize)),
   4115                              unop(cmp_neq, mkexpr(arg_m))),
   4116                        Q ? mkU128(0) : mkU64(0),
   4117                        Q, condT);
   4118             /* Othervise QC flag should be set if shift value is positive and
   4119                result beign rightshifted the same value is not equal to left
   4120                argument. */
   4121             assign(mask, binop(cmp_gt, mkexpr(shval),
   4122                                Q ? mkU128(0) : mkU64(0)));
   4123             if (!Q && size == 3)
   4124                assign(tmp, binop(op_rev, mkexpr(res),
   4125                                          unop(Iop_64to8, mkexpr(arg_n))));
   4126             else
   4127                assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
   4128             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   4129                              mkexpr(tmp), mkexpr(mask)),
   4130                        binop(Q ? Iop_AndV128 : Iop_And64,
   4131                              mkexpr(arg_m), mkexpr(mask)),
   4132                        Q, condT);
   4133             DIP("vqrshl.%c%u %c%u, %c%u, %c%u\n",
   4134                 U ? 'u' : 's', 8 << size,
   4135                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
   4136                 nreg);
   4137          }
   4138          break;
   4139       case 6:
   4140          /* VMAX, VMIN  */
   4141          if (B == 0) {
   4142             /* VMAX */
   4143             IROp op;
   4144             if (U == 0) {
   4145                switch (size) {
   4146                   case 0: op = Q ? Iop_Max8Sx16 : Iop_Max8Sx8; break;
   4147                   case 1: op = Q ? Iop_Max16Sx8 : Iop_Max16Sx4; break;
   4148                   case 2: op = Q ? Iop_Max32Sx4 : Iop_Max32Sx2; break;
   4149                   case 3: return False;
   4150                   default: vassert(0);
   4151                }
   4152             } else {
   4153                switch (size) {
   4154                   case 0: op = Q ? Iop_Max8Ux16 : Iop_Max8Ux8; break;
   4155                   case 1: op = Q ? Iop_Max16Ux8 : Iop_Max16Ux4; break;
   4156                   case 2: op = Q ? Iop_Max32Ux4 : Iop_Max32Ux2; break;
   4157                   case 3: return False;
   4158                   default: vassert(0);
   4159                }
   4160             }
   4161             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4162             DIP("vmax.%c%u %c%u, %c%u, %c%u\n",
   4163                 U ? 'u' : 's', 8 << size,
   4164                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4165                 mreg);
   4166          } else {
   4167             /* VMIN */
   4168             IROp op;
   4169             if (U == 0) {
   4170                switch (size) {
   4171                   case 0: op = Q ? Iop_Min8Sx16 : Iop_Min8Sx8; break;
   4172                   case 1: op = Q ? Iop_Min16Sx8 : Iop_Min16Sx4; break;
   4173                   case 2: op = Q ? Iop_Min32Sx4 : Iop_Min32Sx2; break;
   4174                   case 3: return False;
   4175                   default: vassert(0);
   4176                }
   4177             } else {
   4178                switch (size) {
   4179                   case 0: op = Q ? Iop_Min8Ux16 : Iop_Min8Ux8; break;
   4180                   case 1: op = Q ? Iop_Min16Ux8 : Iop_Min16Ux4; break;
   4181                   case 2: op = Q ? Iop_Min32Ux4 : Iop_Min32Ux2; break;
   4182                   case 3: return False;
   4183                   default: vassert(0);
   4184                }
   4185             }
   4186             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4187             DIP("vmin.%c%u %c%u, %c%u, %c%u\n",
   4188                 U ? 'u' : 's', 8 << size,
   4189                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4190                 mreg);
   4191          }
   4192          break;
   4193       case 7:
   4194          if (B == 0) {
   4195             /* VABD */
   4196             IROp op_cmp, op_sub;
   4197             IRTemp cond;
   4198             if ((theInstr >> 23) & 1) {
   4199                vpanic("VABDL should not be in dis_neon_data_3same\n");
   4200             }
   4201             if (Q) {
   4202                switch (size) {
   4203                   case 0:
   4204                      op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
   4205                      op_sub = Iop_Sub8x16;
   4206                      break;
   4207                   case 1:
   4208                      op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
   4209                      op_sub = Iop_Sub16x8;
   4210                      break;
   4211                   case 2:
   4212                      op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
   4213                      op_sub = Iop_Sub32x4;
   4214                      break;
   4215                   case 3:
   4216                      return False;
   4217                   default:
   4218                      vassert(0);
   4219                }
   4220             } else {
   4221                switch (size) {
   4222                   case 0:
   4223                      op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
   4224                      op_sub = Iop_Sub8x8;
   4225                      break;
   4226                   case 1:
   4227                      op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
   4228                      op_sub = Iop_Sub16x4;
   4229                      break;
   4230                   case 2:
   4231                      op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
   4232                      op_sub = Iop_Sub32x2;
   4233                      break;
   4234                   case 3:
   4235                      return False;
   4236                   default:
   4237                      vassert(0);
   4238                }
   4239             }
   4240             if (Q) {
   4241                cond = newTemp(Ity_V128);
   4242             } else {
   4243                cond = newTemp(Ity_I64);
   4244             }
   4245             assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
   4246             assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
   4247                               binop(Q ? Iop_AndV128 : Iop_And64,
   4248                                     binop(op_sub, mkexpr(arg_n),
   4249                                                   mkexpr(arg_m)),
   4250                                     mkexpr(cond)),
   4251                               binop(Q ? Iop_AndV128 : Iop_And64,
   4252                                     binop(op_sub, mkexpr(arg_m),
   4253                                                   mkexpr(arg_n)),
   4254                                     unop(Q ? Iop_NotV128 : Iop_Not64,
   4255                                          mkexpr(cond)))));
   4256             DIP("vabd.%c%u %c%u, %c%u, %c%u\n",
   4257                 U ? 'u' : 's', 8 << size,
   4258                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4259                 mreg);
   4260          } else {
   4261             /* VABA */
   4262             IROp op_cmp, op_sub, op_add;
   4263             IRTemp cond, acc, tmp;
   4264             if ((theInstr >> 23) & 1) {
   4265                vpanic("VABAL should not be in dis_neon_data_3same");
   4266             }
   4267             if (Q) {
   4268                switch (size) {
   4269                   case 0:
   4270                      op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
   4271                      op_sub = Iop_Sub8x16;
   4272                      op_add = Iop_Add8x16;
   4273                      break;
   4274                   case 1:
   4275                      op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
   4276                      op_sub = Iop_Sub16x8;
   4277                      op_add = Iop_Add16x8;
   4278                      break;
   4279                   case 2:
   4280                      op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
   4281                      op_sub = Iop_Sub32x4;
   4282                      op_add = Iop_Add32x4;
   4283                      break;
   4284                   case 3:
   4285                      return False;
   4286                   default:
   4287                      vassert(0);
   4288                }
   4289             } else {
   4290                switch (size) {
   4291                   case 0:
   4292                      op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
   4293                      op_sub = Iop_Sub8x8;
   4294                      op_add = Iop_Add8x8;
   4295                      break;
   4296                   case 1:
   4297                      op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
   4298                      op_sub = Iop_Sub16x4;
   4299                      op_add = Iop_Add16x4;
   4300                      break;
   4301                   case 2:
   4302                      op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
   4303                      op_sub = Iop_Sub32x2;
   4304                      op_add = Iop_Add32x2;
   4305                      break;
   4306                   case 3:
   4307                      return False;
   4308                   default:
   4309                      vassert(0);
   4310                }
   4311             }
   4312             if (Q) {
   4313                cond = newTemp(Ity_V128);
   4314                acc = newTemp(Ity_V128);
   4315                tmp = newTemp(Ity_V128);
   4316                assign(acc, getQReg(dreg));
   4317             } else {
   4318                cond = newTemp(Ity_I64);
   4319                acc = newTemp(Ity_I64);
   4320                tmp = newTemp(Ity_I64);
   4321                assign(acc, getDRegI64(dreg));
   4322             }
   4323             assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
   4324             assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
   4325                               binop(Q ? Iop_AndV128 : Iop_And64,
   4326                                     binop(op_sub, mkexpr(arg_n),
   4327                                                   mkexpr(arg_m)),
   4328                                     mkexpr(cond)),
   4329                               binop(Q ? Iop_AndV128 : Iop_And64,
   4330                                     binop(op_sub, mkexpr(arg_m),
   4331                                                   mkexpr(arg_n)),
   4332                                     unop(Q ? Iop_NotV128 : Iop_Not64,
   4333                                          mkexpr(cond)))));
   4334             assign(res, binop(op_add, mkexpr(acc), mkexpr(tmp)));
   4335             DIP("vaba.%c%u %c%u, %c%u, %c%u\n",
   4336                 U ? 'u' : 's', 8 << size,
   4337                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4338                 mreg);
   4339          }
   4340          break;
   4341       case 8:
   4342          if (B == 0) {
   4343             IROp op;
   4344             if (U == 0) {
   4345                /* VADD  */
   4346                switch (size) {
   4347                   case 0: op = Q ? Iop_Add8x16 : Iop_Add8x8; break;
   4348                   case 1: op = Q ? Iop_Add16x8 : Iop_Add16x4; break;
   4349                   case 2: op = Q ? Iop_Add32x4 : Iop_Add32x2; break;
   4350                   case 3: op = Q ? Iop_Add64x2 : Iop_Add64; break;
   4351                   default: vassert(0);
   4352                }
   4353                DIP("vadd.i%u %c%u, %c%u, %c%u\n",
   4354                    8 << size, Q ? 'q' : 'd',
   4355                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4356             } else {
   4357                /* VSUB  */
   4358                switch (size) {
   4359                   case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
   4360                   case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
   4361                   case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
   4362                   case 3: op = Q ? Iop_Sub64x2 : Iop_Sub64; break;
   4363                   default: vassert(0);
   4364                }
   4365                DIP("vsub.i%u %c%u, %c%u, %c%u\n",
   4366                    8 << size, Q ? 'q' : 'd',
   4367                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4368             }
   4369             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4370          } else {
   4371             IROp op;
   4372             switch (size) {
   4373                case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
   4374                case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
   4375                case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
   4376                case 3: op = Q ? Iop_CmpNEZ64x2 : Iop_CmpwNEZ64; break;
   4377                default: vassert(0);
   4378             }
   4379             if (U == 0) {
   4380                /* VTST  */
   4381                assign(res, unop(op, binop(Q ? Iop_AndV128 : Iop_And64,
   4382                                           mkexpr(arg_n),
   4383                                           mkexpr(arg_m))));
   4384                DIP("vtst.%u %c%u, %c%u, %c%u\n",
   4385                    8 << size, Q ? 'q' : 'd',
   4386                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4387             } else {
   4388                /* VCEQ  */
   4389                assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
   4390                                 unop(op,
   4391                                      binop(Q ? Iop_XorV128 : Iop_Xor64,
   4392                                            mkexpr(arg_n),
   4393                                            mkexpr(arg_m)))));
   4394                DIP("vceq.i%u %c%u, %c%u, %c%u\n",
   4395                    8 << size, Q ? 'q' : 'd',
   4396                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4397             }
   4398          }
   4399          break;
   4400       case 9:
   4401          if (B == 0) {
   4402             /* VMLA, VMLS (integer) */
   4403             IROp op, op2;
   4404             UInt P = (theInstr >> 24) & 1;
   4405             if (P) {
   4406                switch (size) {
   4407                   case 0:
   4408                      op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
   4409                      op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   4410                      break;
   4411                   case 1:
   4412                      op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
   4413                      op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   4414                      break;
   4415                   case 2:
   4416                      op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
   4417                      op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   4418                      break;
   4419                   case 3:
   4420                      return False;
   4421                   default:
   4422                      vassert(0);
   4423                }
   4424             } else {
   4425                switch (size) {
   4426                   case 0:
   4427                      op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
   4428                      op2 = Q ? Iop_Add8x16 : Iop_Add8x8;
   4429                      break;
   4430                   case 1:
   4431                      op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
   4432                      op2 = Q ? Iop_Add16x8 : Iop_Add16x4;
   4433                      break;
   4434                   case 2:
   4435                      op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
   4436                      op2 = Q ? Iop_Add32x4 : Iop_Add32x2;
   4437                      break;
   4438                   case 3:
   4439                      return False;
   4440                   default:
   4441                      vassert(0);
   4442                }
   4443             }
   4444             assign(res, binop(op2,
   4445                               Q ? getQReg(dreg) : getDRegI64(dreg),
   4446                               binop(op, mkexpr(arg_n), mkexpr(arg_m))));
   4447             DIP("vml%c.i%u %c%u, %c%u, %c%u\n",
   4448                 P ? 's' : 'a', 8 << size,
   4449                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4450                 mreg);
   4451          } else {
   4452             /* VMUL */
   4453             IROp op;
   4454             UInt P = (theInstr >> 24) & 1;
   4455             if (P) {
   4456                switch (size) {
   4457                   case 0:
   4458                      op = Q ? Iop_PolynomialMul8x16 : Iop_PolynomialMul8x8;
   4459                      break;
   4460                   case 1: case 2: case 3: return False;
   4461                   default: vassert(0);
   4462                }
   4463             } else {
   4464                switch (size) {
   4465                   case 0: op = Q ? Iop_Mul8x16 : Iop_Mul8x8; break;
   4466                   case 1: op = Q ? Iop_Mul16x8 : Iop_Mul16x4; break;
   4467                   case 2: op = Q ? Iop_Mul32x4 : Iop_Mul32x2; break;
   4468                   case 3: return False;
   4469                   default: vassert(0);
   4470                }
   4471             }
   4472             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4473             DIP("vmul.%c%u %c%u, %c%u, %c%u\n",
   4474                 P ? 'p' : 'i', 8 << size,
   4475                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4476                 mreg);
   4477          }
   4478          break;
   4479       case 10: {
   4480          /* VPMAX, VPMIN  */
   4481          UInt P = (theInstr >> 4) & 1;
   4482          IROp op;
   4483          if (Q)
   4484             return False;
   4485          if (P) {
   4486             switch (size) {
   4487                case 0: op = U ? Iop_PwMin8Ux8  : Iop_PwMin8Sx8; break;
   4488                case 1: op = U ? Iop_PwMin16Ux4 : Iop_PwMin16Sx4; break;
   4489                case 2: op = U ? Iop_PwMin32Ux2 : Iop_PwMin32Sx2; break;
   4490                case 3: return False;
   4491                default: vassert(0);
   4492             }
   4493          } else {
   4494             switch (size) {
   4495                case 0: op = U ? Iop_PwMax8Ux8  : Iop_PwMax8Sx8; break;
   4496                case 1: op = U ? Iop_PwMax16Ux4 : Iop_PwMax16Sx4; break;
   4497                case 2: op = U ? Iop_PwMax32Ux2 : Iop_PwMax32Sx2; break;
   4498                case 3: return False;
   4499                default: vassert(0);
   4500             }
   4501          }
   4502          assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4503          DIP("vp%s.%c%u %c%u, %c%u, %c%u\n",
   4504              P ? "min" : "max", U ? 'u' : 's',
   4505              8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
   4506              Q ? 'q' : 'd', mreg);
   4507          break;
   4508       }
   4509       case 11:
   4510          if (B == 0) {
   4511             if (U == 0) {
   4512                /* VQDMULH  */
   4513                IROp op ,op2;
   4514                ULong imm;
   4515                switch (size) {
   4516                   case 0: case 3:
   4517                      return False;
   4518                   case 1:
   4519                      op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
   4520                      op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
   4521                      imm = 1LL << 15;
   4522                      imm = (imm << 16) | imm;
   4523                      imm = (imm << 32) | imm;
   4524                      break;
   4525                   case 2:
   4526                      op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
   4527                      op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
   4528                      imm = 1LL << 31;
   4529                      imm = (imm << 32) | imm;
   4530                      break;
   4531                   default:
   4532                      vassert(0);
   4533                }
   4534                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4535                setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   4536                                 binop(op2, mkexpr(arg_n),
   4537                                            Q ? mkU128(imm) : mkU64(imm)),
   4538                                 binop(op2, mkexpr(arg_m),
   4539                                            Q ? mkU128(imm) : mkU64(imm))),
   4540                           Q ? mkU128(0) : mkU64(0),
   4541                           Q, condT);
   4542                DIP("vqdmulh.s%u %c%u, %c%u, %c%u\n",
   4543                    8 << size, Q ? 'q' : 'd',
   4544                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4545             } else {
   4546                /* VQRDMULH */
   4547                IROp op ,op2;
   4548                ULong imm;
   4549                switch(size) {
   4550                   case 0: case 3:
   4551                      return False;
   4552                   case 1:
   4553                      imm = 1LL << 15;
   4554                      imm = (imm << 16) | imm;
   4555                      imm = (imm << 32) | imm;
   4556                      op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
   4557                      op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
   4558                      break;
   4559                   case 2:
   4560                      imm = 1LL << 31;
   4561                      imm = (imm << 32) | imm;
   4562                      op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
   4563                      op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
   4564                      break;
   4565                   default:
   4566                      vassert(0);
   4567                }
   4568                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4569                setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   4570                                 binop(op2, mkexpr(arg_n),
   4571                                            Q ? mkU128(imm) : mkU64(imm)),
   4572                                 binop(op2, mkexpr(arg_m),
   4573                                            Q ? mkU128(imm) : mkU64(imm))),
   4574                           Q ? mkU128(0) : mkU64(0),
   4575                           Q, condT);
   4576                DIP("vqrdmulh.s%u %c%u, %c%u, %c%u\n",
   4577                    8 << size, Q ? 'q' : 'd',
   4578                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4579             }
   4580          } else {
   4581             if (U == 0) {
   4582                /* VPADD */
   4583                IROp op;
   4584                if (Q)
   4585                   return False;
   4586                switch (size) {
   4587                   case 0: op = Q ? Iop_PwAdd8x16 : Iop_PwAdd8x8;  break;
   4588                   case 1: op = Q ? Iop_PwAdd16x8 : Iop_PwAdd16x4; break;
   4589                   case 2: op = Q ? Iop_PwAdd32x4 : Iop_PwAdd32x2; break;
   4590                   case 3: return False;
   4591                   default: vassert(0);
   4592                }
   4593                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4594                DIP("vpadd.i%d %c%u, %c%u, %c%u\n",
   4595                    8 << size, Q ? 'q' : 'd',
   4596                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4597             }
   4598          }
   4599          break;
   4600       /* Starting from here these are FP SIMD cases */
   4601       case 13:
   4602          if (B == 0) {
   4603             IROp op;
   4604             if (U == 0) {
   4605                if ((C >> 1) == 0) {
   4606                   /* VADD  */
   4607                   op = Q ? Iop_Add32Fx4 : Iop_Add32Fx2 ;
   4608                   DIP("vadd.f32 %c%u, %c%u, %c%u\n",
   4609                       Q ? 'q' : 'd', dreg,
   4610                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4611                } else {
   4612                   /* VSUB  */
   4613                   op = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2 ;
   4614                   DIP("vsub.f32 %c%u, %c%u, %c%u\n",
   4615                       Q ? 'q' : 'd', dreg,
   4616                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4617                }
   4618             } else {
   4619                if ((C >> 1) == 0) {
   4620                   /* VPADD */
   4621                   if (Q)
   4622                      return False;
   4623                   op = Iop_PwAdd32Fx2;
   4624                   DIP("vpadd.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
   4625                } else {
   4626                   /* VABD  */
   4627                   if (Q) {
   4628                      assign(res, unop(Iop_Abs32Fx4,
   4629                                       triop(Iop_Sub32Fx4,
   4630                                             get_FAKE_roundingmode(),
   4631                                             mkexpr(arg_n),
   4632                                             mkexpr(arg_m))));
   4633                   } else {
   4634                      assign(res, unop(Iop_Abs32Fx2,
   4635                                       binop(Iop_Sub32Fx2,
   4636                                             mkexpr(arg_n),
   4637                                             mkexpr(arg_m))));
   4638                   }
   4639                   DIP("vabd.f32 %c%u, %c%u, %c%u\n",
   4640                       Q ? 'q' : 'd', dreg,
   4641                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4642                   break;
   4643                }
   4644             }
   4645             assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
   4646          } else {
   4647             if (U == 0) {
   4648                /* VMLA, VMLS  */
   4649                IROp op, op2;
   4650                UInt P = (theInstr >> 21) & 1;
   4651                if (P) {
   4652                   switch (size & 1) {
   4653                      case 0:
   4654                         op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
   4655                         op2 = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
   4656                         break;
   4657                      case 1: return False;
   4658                      default: vassert(0);
   4659                   }
   4660                } else {
   4661                   switch (size & 1) {
   4662                      case 0:
   4663                         op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
   4664                         op2 = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
   4665                         break;
   4666                      case 1: return False;
   4667                      default: vassert(0);
   4668                   }
   4669                }
   4670                assign(res, binop_w_fake_RM(
   4671                               op2,
   4672                               Q ? getQReg(dreg) : getDRegI64(dreg),
   4673                               binop_w_fake_RM(op, mkexpr(arg_n),
   4674                                                   mkexpr(arg_m))));
   4675 
   4676                DIP("vml%c.f32 %c%u, %c%u, %c%u\n",
   4677                    P ? 's' : 'a', Q ? 'q' : 'd',
   4678                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4679             } else {
   4680                /* VMUL  */
   4681                IROp op;
   4682                if ((C >> 1) != 0)
   4683                   return False;
   4684                op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2 ;
   4685                assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
   4686                DIP("vmul.f32 %c%u, %c%u, %c%u\n",
   4687                    Q ? 'q' : 'd', dreg,
   4688                    Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4689             }
   4690          }
   4691          break;
   4692       case 14:
   4693          if (B == 0) {
   4694             if (U == 0) {
   4695                if ((C >> 1) == 0) {
   4696                   /* VCEQ  */
   4697                   IROp op;
   4698                   if ((theInstr >> 20) & 1)
   4699                      return False;
   4700                   op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2;
   4701                   assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4702                   DIP("vceq.f32 %c%u, %c%u, %c%u\n",
   4703                       Q ? 'q' : 'd', dreg,
   4704                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4705                } else {
   4706                   return False;
   4707                }
   4708             } else {
   4709                if ((C >> 1) == 0) {
   4710                   /* VCGE  */
   4711                   IROp op;
   4712                   if ((theInstr >> 20) & 1)
   4713                      return False;
   4714                   op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
   4715                   assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4716                   DIP("vcge.f32 %c%u, %c%u, %c%u\n",
   4717                       Q ? 'q' : 'd', dreg,
   4718                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4719                } else {
   4720                   /* VCGT  */
   4721                   IROp op;
   4722                   if ((theInstr >> 20) & 1)
   4723                      return False;
   4724                   op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
   4725                   assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4726                   DIP("vcgt.f32 %c%u, %c%u, %c%u\n",
   4727                       Q ? 'q' : 'd', dreg,
   4728                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4729                }
   4730             }
   4731          } else {
   4732             if (U == 1) {
   4733                /* VACGE, VACGT */
   4734                UInt op_bit = (theInstr >> 21) & 1;
   4735                IROp op, op2;
   4736                op2 = Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2;
   4737                if (op_bit) {
   4738                   op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
   4739                   assign(res, binop(op,
   4740                                     unop(op2, mkexpr(arg_n)),
   4741                                     unop(op2, mkexpr(arg_m))));
   4742                } else {
   4743                   op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
   4744                   assign(res, binop(op,
   4745                                     unop(op2, mkexpr(arg_n)),
   4746                                     unop(op2, mkexpr(arg_m))));
   4747                }
   4748                DIP("vacg%c.f32 %c%u, %c%u, %c%u\n", op_bit ? 't' : 'e',
   4749                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
   4750                    Q ? 'q' : 'd', mreg);
   4751             }
   4752          }
   4753          break;
   4754       case 15:
   4755          if (B == 0) {
   4756             if (U == 0) {
   4757                /* VMAX, VMIN  */
   4758                IROp op;
   4759                if ((theInstr >> 20) & 1)
   4760                   return False;
   4761                if ((theInstr >> 21) & 1) {
   4762                   op = Q ? Iop_Min32Fx4 : Iop_Min32Fx2;
   4763                   DIP("vmin.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
   4764                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4765                } else {
   4766                   op = Q ? Iop_Max32Fx4 : Iop_Max32Fx2;
   4767                   DIP("vmax.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
   4768                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4769                }
   4770                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4771             } else {
   4772                /* VPMAX, VPMIN   */
   4773                IROp op;
   4774                if (Q)
   4775                   return False;
   4776                if ((theInstr >> 20) & 1)
   4777                   return False;
   4778                if ((theInstr >> 21) & 1) {
   4779                   op = Iop_PwMin32Fx2;
   4780                   DIP("vpmin.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
   4781                } else {
   4782                   op = Iop_PwMax32Fx2;
   4783                   DIP("vpmax.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
   4784                }
   4785                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4786             }
   4787          } else {
   4788             if (U == 0) {
   4789                if ((C >> 1) == 0) {
   4790                   /* VRECPS */
   4791                   if ((theInstr >> 20) & 1)
   4792                      return False;
   4793                   assign(res, binop(Q ? Iop_Recps32Fx4 : Iop_Recps32Fx2,
   4794                                     mkexpr(arg_n),
   4795                                     mkexpr(arg_m)));
   4796                   DIP("vrecps.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
   4797                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4798                } else {
   4799                   /* VRSQRTS  */
   4800                   if ((theInstr >> 20) & 1)
   4801                      return False;
   4802                   assign(res, binop(Q ? Iop_Rsqrts32Fx4 : Iop_Rsqrts32Fx2,
   4803                                     mkexpr(arg_n),
   4804                                     mkexpr(arg_m)));
   4805                   DIP("vrsqrts.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
   4806                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4807                }
   4808             }
   4809          }
   4810          break;
   4811    }
   4812 
   4813    if (Q) {
   4814       putQReg(dreg, mkexpr(res), condT);
   4815    } else {
   4816       putDRegI64(dreg, mkexpr(res), condT);
   4817    }
   4818 
   4819    return True;
   4820 }
   4821 
   4822 /* A7.4.2 Three registers of different length */
   4823 static
   4824 Bool dis_neon_data_3diff ( UInt theInstr, IRTemp condT )
   4825 {
   4826    UInt A = (theInstr >> 8) & 0xf;
   4827    UInt B = (theInstr >> 20) & 3;
   4828    UInt U = (theInstr >> 24) & 1;
   4829    UInt P = (theInstr >> 9) & 1;
   4830    UInt mreg = get_neon_m_regno(theInstr);
   4831    UInt nreg = get_neon_n_regno(theInstr);
   4832    UInt dreg = get_neon_d_regno(theInstr);
   4833    UInt size = B;
   4834    ULong imm;
   4835    IRTemp res, arg_m, arg_n, cond, tmp;
   4836    IROp cvt, cvt2, cmp, op, op2, sh, add;
   4837    switch (A) {
   4838       case 0: case 1: case 2: case 3:
   4839          /* VADDL, VADDW, VSUBL, VSUBW */
   4840          if (dreg & 1)
   4841             return False;
   4842          dreg >>= 1;
   4843          size = B;
   4844          switch (size) {
   4845             case 0:
   4846                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
   4847                op = (A & 2) ? Iop_Sub16x8 : Iop_Add16x8;
   4848                break;
   4849             case 1:
   4850                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
   4851                op = (A & 2) ? Iop_Sub32x4 : Iop_Add32x4;
   4852                break;
   4853             case 2:
   4854                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
   4855                op = (A & 2) ? Iop_Sub64x2 : Iop_Add64x2;
   4856                break;
   4857             case 3:
   4858                return False;
   4859             default:
   4860                vassert(0);
   4861          }
   4862          arg_n = newTemp(Ity_V128);
   4863          arg_m = newTemp(Ity_V128);
   4864          if (A & 1) {
   4865             if (nreg & 1)
   4866                return False;
   4867             nreg >>= 1;
   4868             assign(arg_n, getQReg(nreg));
   4869          } else {
   4870             assign(arg_n, unop(cvt, getDRegI64(nreg)));
   4871          }
   4872          assign(arg_m, unop(cvt, getDRegI64(mreg)));
   4873          putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
   4874                        condT);
   4875          DIP("v%s%c.%c%u q%u, %c%u, d%u\n", (A & 2) ? "sub" : "add",
   4876              (A & 1) ? 'w' : 'l', U ? 'u' : 's', 8 << size, dreg,
   4877              (A & 1) ? 'q' : 'd', nreg, mreg);
   4878          return True;
   4879       case 4:
   4880          /* VADDHN, VRADDHN */
   4881          if (mreg & 1)
   4882             return False;
   4883          mreg >>= 1;
   4884          if (nreg & 1)
   4885             return False;
   4886          nreg >>= 1;
   4887          size = B;
   4888          switch (size) {
   4889             case 0:
   4890                op = Iop_Add16x8;
   4891                cvt = Iop_NarrowUn16to8x8;
   4892                sh = Iop_ShrN16x8;
   4893                imm = 1U << 7;
   4894                imm = (imm << 16) | imm;
   4895                imm = (imm << 32) | imm;
   4896                break;
   4897             case 1:
   4898                op = Iop_Add32x4;
   4899                cvt = Iop_NarrowUn32to16x4;
   4900                sh = Iop_ShrN32x4;
   4901                imm = 1U << 15;
   4902                imm = (imm << 32) | imm;
   4903                break;
   4904             case 2:
   4905                op = Iop_Add64x2;
   4906                cvt = Iop_NarrowUn64to32x2;
   4907                sh = Iop_ShrN64x2;
   4908                imm = 1U << 31;
   4909                break;
   4910             case 3:
   4911                return False;
   4912             default:
   4913                vassert(0);
   4914          }
   4915          tmp = newTemp(Ity_V128);
   4916          res = newTemp(Ity_V128);
   4917          assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
   4918          if (U) {
   4919             /* VRADDHN */
   4920             assign(res, binop(op, mkexpr(tmp),
   4921                      binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
   4922          } else {
   4923             assign(res, mkexpr(tmp));
   4924          }
   4925          putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
   4926                     condT);
   4927          DIP("v%saddhn.i%u d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
   4928              nreg, mreg);
   4929          return True;
   4930       case 5:
   4931          /* VABAL */
   4932          if (!((theInstr >> 23) & 1)) {
   4933             vpanic("VABA should not be in dis_neon_data_3diff\n");
   4934          }
   4935          if (dreg & 1)
   4936             return False;
   4937          dreg >>= 1;
   4938          switch (size) {
   4939             case 0:
   4940                cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
   4941                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
   4942                cvt2 = Iop_Widen8Sto16x8;
   4943                op = Iop_Sub16x8;
   4944                op2 = Iop_Add16x8;
   4945                break;
   4946             case 1:
   4947                cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
   4948                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
   4949                cvt2 = Iop_Widen16Sto32x4;
   4950                op = Iop_Sub32x4;
   4951                op2 = Iop_Add32x4;
   4952                break;
   4953             case 2:
   4954                cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
   4955                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
   4956                cvt2 = Iop_Widen32Sto64x2;
   4957                op = Iop_Sub64x2;
   4958                op2 = Iop_Add64x2;
   4959                break;
   4960             case 3:
   4961                return False;
   4962             default:
   4963                vassert(0);
   4964          }
   4965          arg_n = newTemp(Ity_V128);
   4966          arg_m = newTemp(Ity_V128);
   4967          cond = newTemp(Ity_V128);
   4968          res = newTemp(Ity_V128);
   4969          assign(arg_n, unop(cvt, getDRegI64(nreg)));
   4970          assign(arg_m, unop(cvt, getDRegI64(mreg)));
   4971          assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
   4972                                             getDRegI64(mreg))));
   4973          assign(res, binop(op2,
   4974                            binop(Iop_OrV128,
   4975                                  binop(Iop_AndV128,
   4976                                        binop(op, mkexpr(arg_n), mkexpr(arg_m)),
   4977                                        mkexpr(cond)),
   4978                                  binop(Iop_AndV128,
   4979                                        binop(op, mkexpr(arg_m), mkexpr(arg_n)),
   4980                                        unop(Iop_NotV128, mkexpr(cond)))),
   4981                            getQReg(dreg)));
   4982          putQReg(dreg, mkexpr(res), condT);
   4983          DIP("vabal.%c%u q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
   4984              nreg, mreg);
   4985          return True;
   4986       case 6:
   4987          /* VSUBHN, VRSUBHN */
   4988          if (mreg & 1)
   4989             return False;
   4990          mreg >>= 1;
   4991          if (nreg & 1)
   4992             return False;
   4993          nreg >>= 1;
   4994          size = B;
   4995          switch (size) {
   4996             case 0:
   4997                op = Iop_Sub16x8;
   4998                op2 = Iop_Add16x8;
   4999                cvt = Iop_NarrowUn16to8x8;
   5000                sh = Iop_ShrN16x8;
   5001                imm = 1U << 7;
   5002                imm = (imm << 16) | imm;
   5003                imm = (imm << 32) | imm;
   5004                break;
   5005             case 1:
   5006                op = Iop_Sub32x4;
   5007                op2 = Iop_Add32x4;
   5008                cvt = Iop_NarrowUn32to16x4;
   5009                sh = Iop_ShrN32x4;
   5010                imm = 1U << 15;
   5011                imm = (imm << 32) | imm;
   5012                break;
   5013             case 2:
   5014                op = Iop_Sub64x2;
   5015                op2 = Iop_Add64x2;
   5016                cvt = Iop_NarrowUn64to32x2;
   5017                sh = Iop_ShrN64x2;
   5018                imm = 1U << 31;
   5019                break;
   5020             case 3:
   5021                return False;
   5022             default:
   5023                vassert(0);
   5024          }
   5025          tmp = newTemp(Ity_V128);
   5026          res = newTemp(Ity_V128);
   5027          assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
   5028          if (U) {
   5029             /* VRSUBHN */
   5030             assign(res, binop(op2, mkexpr(tmp),
   5031                      binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
   5032          } else {
   5033             assign(res, mkexpr(tmp));
   5034          }
   5035          putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
   5036                     condT);
   5037          DIP("v%ssubhn.i%u d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
   5038              nreg, mreg);
   5039          return True;
   5040       case 7:
   5041          /* VABDL */
   5042          if (!((theInstr >> 23) & 1)) {
   5043             vpanic("VABL should not be in dis_neon_data_3diff\n");
   5044          }
   5045          if (dreg & 1)
   5046             return False;
   5047          dreg >>= 1;
   5048          switch (size) {
   5049             case 0:
   5050                cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
   5051                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
   5052                cvt2 = Iop_Widen8Sto16x8;
   5053                op = Iop_Sub16x8;
   5054                break;
   5055             case 1:
   5056                cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
   5057                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
   5058                cvt2 = Iop_Widen16Sto32x4;
   5059                op = Iop_Sub32x4;
   5060                break;
   5061             case 2:
   5062                cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
   5063                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
   5064                cvt2 = Iop_Widen32Sto64x2;
   5065                op = Iop_Sub64x2;
   5066                break;
   5067             case 3:
   5068                return False;
   5069             default:
   5070                vassert(0);
   5071          }
   5072          arg_n = newTemp(Ity_V128);
   5073          arg_m = newTemp(Ity_V128);
   5074          cond = newTemp(Ity_V128);
   5075          res = newTemp(Ity_V128);
   5076          assign(arg_n, unop(cvt, getDRegI64(nreg)));
   5077          assign(arg_m, unop(cvt, getDRegI64(mreg)));
   5078          assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
   5079                                             getDRegI64(mreg))));
   5080          assign(res, binop(Iop_OrV128,
   5081                            binop(Iop_AndV128,
   5082                                  binop(op, mkexpr(arg_n), mkexpr(arg_m)),
   5083                                  mkexpr(cond)),
   5084                            binop(Iop_AndV128,
   5085                                  binop(op, mkexpr(arg_m), mkexpr(arg_n)),
   5086                                  unop(Iop_NotV128, mkexpr(cond)))));
   5087          putQReg(dreg, mkexpr(res), condT);
   5088          DIP("vabdl.%c%u q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
   5089              nreg, mreg);
   5090          return True;
   5091       case 8:
   5092       case 10:
   5093          /* VMLAL, VMLSL (integer) */
   5094          if (dreg & 1)
   5095             return False;
   5096          dreg >>= 1;
   5097          size = B;
   5098          switch (size) {
   5099             case 0:
   5100                op = U ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
   5101                op2 = P ? Iop_Sub16x8 : Iop_Add16x8;
   5102                break;
   5103             case 1:
   5104                op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
   5105                op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
   5106                break;
   5107             case 2:
   5108                op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
   5109                op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
   5110                break;
   5111             case 3:
   5112                return False;
   5113             default:
   5114                vassert(0);
   5115          }
   5116          res = newTemp(Ity_V128);
   5117          assign(res, binop(op, getDRegI64(nreg),getDRegI64(mreg)));
   5118          putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
   5119          DIP("vml%cl.%c%u q%u, d%u, d%u\n", P ? 's' : 'a', U ? 'u' : 's',
   5120              8 << size, dreg, nreg, mreg);
   5121          return True;
   5122       case 9:
   5123       case 11:
   5124          /* VQDMLAL, VQDMLSL */
   5125          if (U)
   5126             return False;
   5127          if (dreg & 1)
   5128             return False;
   5129          dreg >>= 1;
   5130          size = B;
   5131          switch (size) {
   5132             case 0: case 3:
   5133                return False;
   5134             case 1:
   5135                op = Iop_QDMulLong16Sx4;
   5136                cmp = Iop_CmpEQ16x4;
   5137                add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
   5138                op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
   5139                imm = 1LL << 15;
   5140                imm = (imm << 16) | imm;
   5141                imm = (imm << 32) | imm;
   5142                break;
   5143             case 2:
   5144                op = Iop_QDMulLong32Sx2;
   5145                cmp = Iop_CmpEQ32x2;
   5146                add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
   5147                op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
   5148                imm = 1LL << 31;
   5149                imm = (imm << 32) | imm;
   5150                break;
   5151             default:
   5152                vassert(0);
   5153          }
   5154          res = newTemp(Ity_V128);
   5155          tmp = newTemp(Ity_V128);
   5156          assign(res, binop(op, getDRegI64(nreg), getDRegI64(mreg)));
   5157          assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
   5158          setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
   5159                     True, condT);
   5160          setFlag_QC(binop(Iop_And64,
   5161                           binop(cmp, getDRegI64(nreg), mkU64(imm)),
   5162                           binop(cmp, getDRegI64(mreg), mkU64(imm))),
   5163                     mkU64(0),
   5164                     False, condT);
   5165          putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
   5166          DIP("vqdml%cl.s%u q%u, d%u, d%u\n", P ? 's' : 'a', 8 << size, dreg,
   5167              nreg, mreg);
   5168          return True;
   5169       case 12:
   5170       case 14:
   5171          /* VMULL (integer or polynomial) */
   5172          if (dreg & 1)
   5173             return False;
   5174          dreg >>= 1;
   5175          size = B;
   5176          switch (size) {
   5177             case 0:
   5178                op = (U) ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
   5179                if (P)
   5180                   op = Iop_PolynomialMull8x8;
   5181                break;
   5182             case 1:
   5183                op = (U) ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
   5184                break;
   5185             case 2:
   5186                op = (U) ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
   5187                break;
   5188             default:
   5189                vassert(0);
   5190          }
   5191          putQReg(dreg, binop(op, getDRegI64(nreg),
   5192                                  getDRegI64(mreg)), condT);
   5193          DIP("vmull.%c%u q%u, d%u, d%u\n", P ? 'p' : (U ? 'u' : 's'),
   5194                8 << size, dreg, nreg, mreg);
   5195          return True;
   5196       case 13:
   5197          /* VQDMULL */
   5198          if (U)
   5199             return False;
   5200          if (dreg & 1)
   5201             return False;
   5202          dreg >>= 1;
   5203          size = B;
   5204          switch (size) {
   5205             case 0:
   5206             case 3:
   5207                return False;
   5208             case 1:
   5209                op = Iop_QDMulLong16Sx4;
   5210                op2 = Iop_CmpEQ16x4;
   5211                imm = 1LL << 15;
   5212                imm = (imm << 16) | imm;
   5213                imm = (imm << 32) | imm;
   5214                break;
   5215             case 2:
   5216                op = Iop_QDMulLong32Sx2;
   5217                op2 = Iop_CmpEQ32x2;
   5218                imm = 1LL << 31;
   5219                imm = (imm << 32) | imm;
   5220                break;
   5221             default:
   5222                vassert(0);
   5223          }
   5224          putQReg(dreg, binop(op, getDRegI64(nreg), getDRegI64(mreg)),
   5225                condT);
   5226          setFlag_QC(binop(Iop_And64,
   5227                           binop(op2, getDRegI64(nreg), mkU64(imm)),
   5228                           binop(op2, getDRegI64(mreg), mkU64(imm))),
   5229                     mkU64(0),
   5230                     False, condT);
   5231          DIP("vqdmull.s%u q%u, d%u, d%u\n", 8 << size, dreg, nreg, mreg);
   5232          return True;
   5233       default:
   5234          return False;
   5235    }
   5236    return False;
   5237 }
   5238 
   5239 /* A7.4.3 Two registers and a scalar */
   5240 static
   5241 Bool dis_neon_data_2reg_and_scalar ( UInt theInstr, IRTemp condT )
   5242 {
   5243 #  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
   5244    UInt U = INSN(24,24);
   5245    UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
   5246    UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
   5247    UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
   5248    UInt size = INSN(21,20);
   5249    UInt index;
   5250    UInt Q = INSN(24,24);
   5251 
   5252    if (INSN(27,25) != 1 || INSN(23,23) != 1
   5253        || INSN(6,6) != 1 || INSN(4,4) != 0)
   5254       return False;
   5255 
   5256    /* VMLA, VMLS (scalar)  */
   5257    if ((INSN(11,8) & BITS4(1,0,1,0)) == BITS4(0,0,0,0)) {
   5258       IRTemp res, arg_m, arg_n;
   5259       IROp dup, get, op, op2, add, sub;
   5260       if (Q) {
   5261          if ((dreg & 1) || (nreg & 1))
   5262             return False;
   5263          dreg >>= 1;
   5264          nreg >>= 1;
   5265          res = newTemp(Ity_V128);
   5266          arg_m = newTemp(Ity_V128);
   5267          arg_n = newTemp(Ity_V128);
   5268          assign(arg_n, getQReg(nreg));
   5269          switch(size) {
   5270             case 1:
   5271                dup = Iop_Dup16x8;
   5272                get = Iop_GetElem16x4;
   5273                index = mreg >> 3;
   5274                mreg &= 7;
   5275                break;
   5276             case 2:
   5277                dup = Iop_Dup32x4;
   5278                get = Iop_GetElem32x2;
   5279                index = mreg >> 4;
   5280                mreg &= 0xf;
   5281                break;
   5282             case 0:
   5283             case 3:
   5284                return False;
   5285             default:
   5286                vassert(0);
   5287          }
   5288          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5289       } else {
   5290          res = newTemp(Ity_I64);
   5291          arg_m = newTemp(Ity_I64);
   5292          arg_n = newTemp(Ity_I64);
   5293          assign(arg_n, getDRegI64(nreg));
   5294          switch(size) {
   5295             case 1:
   5296                dup = Iop_Dup16x4;
   5297                get = Iop_GetElem16x4;
   5298                index = mreg >> 3;
   5299                mreg &= 7;
   5300                break;
   5301             case 2:
   5302                dup = Iop_Dup32x2;
   5303                get = Iop_GetElem32x2;
   5304                index = mreg >> 4;
   5305                mreg &= 0xf;
   5306                break;
   5307             case 0:
   5308             case 3:
   5309                return False;
   5310             default:
   5311                vassert(0);
   5312          }
   5313          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5314       }
   5315       if (INSN(8,8)) {
   5316          switch (size) {
   5317             case 2:
   5318                op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
   5319                add = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
   5320                sub = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
   5321                break;
   5322             case 0:
   5323             case 1:
   5324             case 3:
   5325                return False;
   5326             default:
   5327                vassert(0);
   5328          }
   5329       } else {
   5330          switch (size) {
   5331             case 1:
   5332                op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
   5333                add = Q ? Iop_Add16x8 : Iop_Add16x4;
   5334                sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   5335                break;
   5336             case 2:
   5337                op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
   5338                add = Q ? Iop_Add32x4 : Iop_Add32x2;
   5339                sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   5340                break;
   5341             case 0:
   5342             case 3:
   5343                return False;
   5344             default:
   5345                vassert(0);
   5346          }
   5347       }
   5348       op2 = INSN(10,10) ? sub : add;
   5349       assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
   5350       if (Q)
   5351          putQReg(dreg, binop_w_fake_RM(op2, getQReg(dreg), mkexpr(res)),
   5352                  condT);
   5353       else
   5354          putDRegI64(dreg, binop(op2, getDRegI64(dreg), mkexpr(res)),
   5355                     condT);
   5356       DIP("vml%c.%c%u %c%u, %c%u, d%u[%u]\n", INSN(10,10) ? 's' : 'a',
   5357             INSN(8,8) ? 'f' : 'i', 8 << size,
   5358             Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, mreg, index);
   5359       return True;
   5360    }
   5361 
   5362    /* VMLAL, VMLSL (scalar)   */
   5363    if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,0)) {
   5364       IRTemp res, arg_m, arg_n;
   5365       IROp dup, get, op, op2, add, sub;
   5366       if (dreg & 1)
   5367          return False;
   5368       dreg >>= 1;
   5369       res = newTemp(Ity_V128);
   5370       arg_m = newTemp(Ity_I64);
   5371       arg_n = newTemp(Ity_I64);
   5372       assign(arg_n, getDRegI64(nreg));
   5373       switch(size) {
   5374          case 1:
   5375             dup = Iop_Dup16x4;
   5376             get = Iop_GetElem16x4;
   5377             index = mreg >> 3;
   5378             mreg &= 7;
   5379             break;
   5380          case 2:
   5381             dup = Iop_Dup32x2;
   5382             get = Iop_GetElem32x2;
   5383             index = mreg >> 4;
   5384             mreg &= 0xf;
   5385             break;
   5386          case 0:
   5387          case 3:
   5388             return False;
   5389          default:
   5390             vassert(0);
   5391       }
   5392       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5393       switch (size) {
   5394          case 1:
   5395             op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
   5396             add = Iop_Add32x4;
   5397             sub = Iop_Sub32x4;
   5398             break;
   5399          case 2:
   5400             op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
   5401             add = Iop_Add64x2;
   5402             sub = Iop_Sub64x2;
   5403             break;
   5404          case 0:
   5405          case 3:
   5406             return False;
   5407          default:
   5408             vassert(0);
   5409       }
   5410       op2 = INSN(10,10) ? sub : add;
   5411       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   5412       putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
   5413       DIP("vml%cl.%c%u q%u, d%u, d%u[%u]\n",
   5414           INSN(10,10) ? 's' : 'a', U ? 'u' : 's',
   5415           8 << size, dreg, nreg, mreg, index);
   5416       return True;
   5417    }
   5418 
   5419    /* VQDMLAL, VQDMLSL (scalar)  */
   5420    if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,1) && !U) {
   5421       IRTemp res, arg_m, arg_n, tmp;
   5422       IROp dup, get, op, op2, add, cmp;
   5423       UInt P = INSN(10,10);
   5424       ULong imm;
   5425       if (dreg & 1)
   5426          return False;
   5427       dreg >>= 1;
   5428       res = newTemp(Ity_V128);
   5429       arg_m = newTemp(Ity_I64);
   5430       arg_n = newTemp(Ity_I64);
   5431       assign(arg_n, getDRegI64(nreg));
   5432       switch(size) {
   5433          case 1:
   5434             dup = Iop_Dup16x4;
   5435             get = Iop_GetElem16x4;
   5436             index = mreg >> 3;
   5437             mreg &= 7;
   5438             break;
   5439          case 2:
   5440             dup = Iop_Dup32x2;
   5441             get = Iop_GetElem32x2;
   5442             index = mreg >> 4;
   5443             mreg &= 0xf;
   5444             break;
   5445          case 0:
   5446          case 3:
   5447             return False;
   5448          default:
   5449             vassert(0);
   5450       }
   5451       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5452       switch (size) {
   5453          case 0:
   5454          case 3:
   5455             return False;
   5456          case 1:
   5457             op = Iop_QDMulLong16Sx4;
   5458             cmp = Iop_CmpEQ16x4;
   5459             add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
   5460             op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
   5461             imm = 1LL << 15;
   5462             imm = (imm << 16) | imm;
   5463             imm = (imm << 32) | imm;
   5464             break;
   5465          case 2:
   5466             op = Iop_QDMulLong32Sx2;
   5467             cmp = Iop_CmpEQ32x2;
   5468             add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
   5469             op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
   5470             imm = 1LL << 31;
   5471             imm = (imm << 32) | imm;
   5472             break;
   5473          default:
   5474             vassert(0);
   5475       }
   5476       res = newTemp(Ity_V128);
   5477       tmp = newTemp(Ity_V128);
   5478       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   5479       assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
   5480       setFlag_QC(binop(Iop_And64,
   5481                        binop(cmp, mkexpr(arg_n), mkU64(imm)),
   5482                        binop(cmp, mkexpr(arg_m), mkU64(imm))),
   5483                  mkU64(0),
   5484                  False, condT);
   5485       setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
   5486                  True, condT);
   5487       putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
   5488       DIP("vqdml%cl.s%u q%u, d%u, d%u[%u]\n", P ? 's' : 'a', 8 << size,
   5489           dreg, nreg, mreg, index);
   5490       return True;
   5491    }
   5492 
   5493    /* VMUL (by scalar)  */
   5494    if ((INSN(11,8) & BITS4(1,1,1,0)) == BITS4(1,0,0,0)) {
   5495       IRTemp res, arg_m, arg_n;
   5496       IROp dup, get, op;
   5497       if (Q) {
   5498          if ((dreg & 1) || (nreg & 1))
   5499             return False;
   5500          dreg >>= 1;
   5501          nreg >>= 1;
   5502          res = newTemp(Ity_V128);
   5503          arg_m = newTemp(Ity_V128);
   5504          arg_n = newTemp(Ity_V128);
   5505          assign(arg_n, getQReg(nreg));
   5506          switch(size) {
   5507             case 1:
   5508                dup = Iop_Dup16x8;
   5509                get = Iop_GetElem16x4;
   5510                index = mreg >> 3;
   5511                mreg &= 7;
   5512                break;
   5513             case 2:
   5514                dup = Iop_Dup32x4;
   5515                get = Iop_GetElem32x2;
   5516                index = mreg >> 4;
   5517                mreg &= 0xf;
   5518                break;
   5519             case 0:
   5520             case 3:
   5521                return False;
   5522             default:
   5523                vassert(0);
   5524          }
   5525          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5526       } else {
   5527          res = newTemp(Ity_I64);
   5528          arg_m = newTemp(Ity_I64);
   5529          arg_n = newTemp(Ity_I64);
   5530          assign(arg_n, getDRegI64(nreg));
   5531          switch(size) {
   5532             case 1:
   5533                dup = Iop_Dup16x4;
   5534                get = Iop_GetElem16x4;
   5535                index = mreg >> 3;
   5536                mreg &= 7;
   5537                break;
   5538             case 2:
   5539                dup = Iop_Dup32x2;
   5540                get = Iop_GetElem32x2;
   5541                index = mreg >> 4;
   5542                mreg &= 0xf;
   5543                break;
   5544             case 0:
   5545             case 3:
   5546                return False;
   5547             default:
   5548                vassert(0);
   5549          }
   5550          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5551       }
   5552       if (INSN(8,8)) {
   5553          switch (size) {
   5554             case 2:
   5555                op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
   5556                break;
   5557             case 0:
   5558             case 1:
   5559             case 3:
   5560                return False;
   5561             default:
   5562                vassert(0);
   5563          }
   5564       } else {
   5565          switch (size) {
   5566             case 1:
   5567                op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
   5568                break;
   5569             case 2:
   5570                op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
   5571                break;
   5572             case 0:
   5573             case 3:
   5574                return False;
   5575             default:
   5576                vassert(0);
   5577          }
   5578       }
   5579       assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
   5580       if (Q)
   5581          putQReg(dreg, mkexpr(res), condT);
   5582       else
   5583          putDRegI64(dreg, mkexpr(res), condT);
   5584       DIP("vmul.%c%u %c%u, %c%u, d%u[%u]\n", INSN(8,8) ? 'f' : 'i',
   5585           8 << size, Q ? 'q' : 'd', dreg,
   5586           Q ? 'q' : 'd', nreg, mreg, index);
   5587       return True;
   5588    }
   5589 
   5590    /* VMULL (scalar) */
   5591    if (INSN(11,8) == BITS4(1,0,1,0)) {
   5592       IRTemp res, arg_m, arg_n;
   5593       IROp dup, get, op;
   5594       if (dreg & 1)
   5595          return False;
   5596       dreg >>= 1;
   5597       res = newTemp(Ity_V128);
   5598       arg_m = newTemp(Ity_I64);
   5599       arg_n = newTemp(Ity_I64);
   5600       assign(arg_n, getDRegI64(nreg));
   5601       switch(size) {
   5602          case 1:
   5603             dup = Iop_Dup16x4;
   5604             get = Iop_GetElem16x4;
   5605             index = mreg >> 3;
   5606             mreg &= 7;
   5607             break;
   5608          case 2:
   5609             dup = Iop_Dup32x2;
   5610             get = Iop_GetElem32x2;
   5611             index = mreg >> 4;
   5612             mreg &= 0xf;
   5613             break;
   5614          case 0:
   5615          case 3:
   5616             return False;
   5617          default:
   5618             vassert(0);
   5619       }
   5620       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5621       switch (size) {
   5622          case 1: op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4; break;
   5623          case 2: op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2; break;
   5624          case 0: case 3: return False;
   5625          default: vassert(0);
   5626       }
   5627       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   5628       putQReg(dreg, mkexpr(res), condT);
   5629       DIP("vmull.%c%u q%u, d%u, d%u[%u]\n", U ? 'u' : 's', 8 << size, dreg,
   5630           nreg, mreg, index);
   5631       return True;
   5632    }
   5633 
   5634    /* VQDMULL */
   5635    if (INSN(11,8) == BITS4(1,0,1,1) && !U) {
   5636       IROp op ,op2, dup, get;
   5637       ULong imm;
   5638       IRTemp arg_m, arg_n;
   5639       if (dreg & 1)
   5640          return False;
   5641       dreg >>= 1;
   5642       arg_m = newTemp(Ity_I64);
   5643       arg_n = newTemp(Ity_I64);
   5644       assign(arg_n, getDRegI64(nreg));
   5645       switch(size) {
   5646          case 1:
   5647             dup = Iop_Dup16x4;
   5648             get = Iop_GetElem16x4;
   5649             index = mreg >> 3;
   5650             mreg &= 7;
   5651             break;
   5652          case 2:
   5653             dup = Iop_Dup32x2;
   5654             get = Iop_GetElem32x2;
   5655             index = mreg >> 4;
   5656             mreg &= 0xf;
   5657             break;
   5658          case 0:
   5659          case 3:
   5660             return False;
   5661          default:
   5662             vassert(0);
   5663       }
   5664       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5665       switch (size) {
   5666          case 0:
   5667          case 3:
   5668             return False;
   5669          case 1:
   5670             op = Iop_QDMulLong16Sx4;
   5671             op2 = Iop_CmpEQ16x4;
   5672             imm = 1LL << 15;
   5673             imm = (imm << 16) | imm;
   5674             imm = (imm << 32) | imm;
   5675             break;
   5676          case 2:
   5677             op = Iop_QDMulLong32Sx2;
   5678             op2 = Iop_CmpEQ32x2;
   5679             imm = 1LL << 31;
   5680             imm = (imm << 32) | imm;
   5681             break;
   5682          default:
   5683             vassert(0);
   5684       }
   5685       putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
   5686             condT);
   5687       setFlag_QC(binop(Iop_And64,
   5688                        binop(op2, mkexpr(arg_n), mkU64(imm)),
   5689                        binop(op2, mkexpr(arg_m), mkU64(imm))),
   5690                  mkU64(0),
   5691                  False, condT);
   5692       DIP("vqdmull.s%u q%u, d%u, d%u[%u]\n", 8 << size, dreg, nreg, mreg,
   5693           index);
   5694       return True;
   5695    }
   5696 
   5697    /* VQDMULH */
   5698    if (INSN(11,8) == BITS4(1,1,0,0)) {
   5699       IROp op ,op2, dup, get;
   5700       ULong imm;
   5701       IRTemp res, arg_m, arg_n;
   5702       if (Q) {
   5703          if ((dreg & 1) || (nreg & 1))
   5704             return False;
   5705          dreg >>= 1;
   5706          nreg >>= 1;
   5707          res = newTemp(Ity_V128);
   5708          arg_m = newTemp(Ity_V128);
   5709          arg_n = newTemp(Ity_V128);
   5710          assign(arg_n, getQReg(nreg));
   5711          switch(size) {
   5712             case 1:
   5713                dup = Iop_Dup16x8;
   5714                get = Iop_GetElem16x4;
   5715                index = mreg >> 3;
   5716                mreg &= 7;
   5717                break;
   5718             case 2:
   5719                dup = Iop_Dup32x4;
   5720                get = Iop_GetElem32x2;
   5721                index = mreg >> 4;
   5722                mreg &= 0xf;
   5723                break;
   5724             case 0:
   5725             case 3:
   5726                return False;
   5727             default:
   5728                vassert(0);
   5729          }
   5730          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5731       } else {
   5732          res = newTemp(Ity_I64);
   5733          arg_m = newTemp(Ity_I64);
   5734          arg_n = newTemp(Ity_I64);
   5735          assign(arg_n, getDRegI64(nreg));
   5736          switch(size) {
   5737             case 1:
   5738                dup = Iop_Dup16x4;
   5739                get = Iop_GetElem16x4;
   5740                index = mreg >> 3;
   5741                mreg &= 7;
   5742                break;
   5743             case 2:
   5744                dup = Iop_Dup32x2;
   5745                get = Iop_GetElem32x2;
   5746                index = mreg >> 4;
   5747                mreg &= 0xf;
   5748                break;
   5749             case 0:
   5750             case 3:
   5751                return False;
   5752             default:
   5753                vassert(0);
   5754          }
   5755          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5756       }
   5757       switch (size) {
   5758          case 0:
   5759          case 3:
   5760             return False;
   5761          case 1:
   5762             op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
   5763             op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
   5764             imm = 1LL << 15;
   5765             imm = (imm << 16) | imm;
   5766             imm = (imm << 32) | imm;
   5767             break;
   5768          case 2:
   5769             op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
   5770             op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
   5771             imm = 1LL << 31;
   5772             imm = (imm << 32) | imm;
   5773             break;
   5774          default:
   5775             vassert(0);
   5776       }
   5777       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   5778       setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   5779                        binop(op2, mkexpr(arg_n),
   5780                                   Q ? mkU128(imm) : mkU64(imm)),
   5781                        binop(op2, mkexpr(arg_m),
   5782                              Q ? mkU128(imm) : mkU64(imm))),
   5783                  Q ? mkU128(0) : mkU64(0),
   5784                  Q, condT);
   5785       if (Q)
   5786          putQReg(dreg, mkexpr(res), condT);
   5787       else
   5788          putDRegI64(dreg, mkexpr(res), condT);
   5789       DIP("vqdmulh.s%u %c%u, %c%u, d%u[%u]\n",
   5790           8 << size, Q ? 'q' : 'd', dreg,
   5791           Q ? 'q' : 'd', nreg, mreg, index);
   5792       return True;
   5793    }
   5794 
   5795    /* VQRDMULH (scalar) */
   5796    if (INSN(11,8) == BITS4(1,1,0,1)) {
   5797       IROp op ,op2, dup, get;
   5798       ULong imm;
   5799       IRTemp res, arg_m, arg_n;
   5800       if (Q) {
   5801          if ((dreg & 1) || (nreg & 1))
   5802             return False;
   5803          dreg >>= 1;
   5804          nreg >>= 1;
   5805          res = newTemp(Ity_V128);
   5806          arg_m = newTemp(Ity_V128);
   5807          arg_n = newTemp(Ity_V128);
   5808          assign(arg_n, getQReg(nreg));
   5809          switch(size) {
   5810             case 1:
   5811                dup = Iop_Dup16x8;
   5812                get = Iop_GetElem16x4;
   5813                index = mreg >> 3;
   5814                mreg &= 7;
   5815                break;
   5816             case 2:
   5817                dup = Iop_Dup32x4;
   5818                get = Iop_GetElem32x2;
   5819                index = mreg >> 4;
   5820                mreg &= 0xf;
   5821                break;
   5822             case 0:
   5823             case 3:
   5824                return False;
   5825             default:
   5826                vassert(0);
   5827          }
   5828          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5829       } else {
   5830          res = newTemp(Ity_I64);
   5831          arg_m = newTemp(Ity_I64);
   5832          arg_n = newTemp(Ity_I64);
   5833          assign(arg_n, getDRegI64(nreg));
   5834          switch(size) {
   5835             case 1:
   5836                dup = Iop_Dup16x4;
   5837                get = Iop_GetElem16x4;
   5838                index = mreg >> 3;
   5839                mreg &= 7;
   5840                break;
   5841             case 2:
   5842                dup = Iop_Dup32x2;
   5843                get = Iop_GetElem32x2;
   5844                index = mreg >> 4;
   5845                mreg &= 0xf;
   5846                break;
   5847             case 0:
   5848             case 3:
   5849                return False;
   5850             default:
   5851                vassert(0);
   5852          }
   5853          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5854       }
   5855       switch (size) {
   5856          case 0:
   5857          case 3:
   5858             return False;
   5859          case 1:
   5860             op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
   5861             op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
   5862             imm = 1LL << 15;
   5863             imm = (imm << 16) | imm;
   5864             imm = (imm << 32) | imm;
   5865             break;
   5866          case 2:
   5867             op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
   5868             op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
   5869             imm = 1LL << 31;
   5870             imm = (imm << 32) | imm;
   5871             break;
   5872          default:
   5873             vassert(0);
   5874       }
   5875       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   5876       setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   5877                        binop(op2, mkexpr(arg_n),
   5878                                   Q ? mkU128(imm) : mkU64(imm)),
   5879                        binop(op2, mkexpr(arg_m),
   5880                                   Q ? mkU128(imm) : mkU64(imm))),
   5881                  Q ? mkU128(0) : mkU64(0),
   5882                  Q, condT);
   5883       if (Q)
   5884          putQReg(dreg, mkexpr(res), condT);
   5885       else
   5886          putDRegI64(dreg, mkexpr(res), condT);
   5887       DIP("vqrdmulh.s%u %c%u, %c%u, d%u[%u]\n",
   5888           8 << size, Q ? 'q' : 'd', dreg,
   5889           Q ? 'q' : 'd', nreg, mreg, index);
   5890       return True;
   5891    }
   5892 
   5893    return False;
   5894 #  undef INSN
   5895 }
   5896 
   5897 /* A7.4.4 Two registers and a shift amount */
   5898 static
   5899 Bool dis_neon_data_2reg_and_shift ( UInt theInstr, IRTemp condT )
   5900 {
   5901    UInt A = (theInstr >> 8) & 0xf;
   5902    UInt B = (theInstr >> 6) & 1;
   5903    UInt L = (theInstr >> 7) & 1;
   5904    UInt U = (theInstr >> 24) & 1;
   5905    UInt Q = B;
   5906    UInt imm6 = (theInstr >> 16) & 0x3f;
   5907    UInt shift_imm;
   5908    UInt size = 4;
   5909    UInt tmp;
   5910    UInt mreg = get_neon_m_regno(theInstr);
   5911    UInt dreg = get_neon_d_regno(theInstr);
   5912    ULong imm = 0;
   5913    IROp op, cvt, add = Iop_INVALID, cvt2, op_rev;
   5914    IRTemp reg_m, res, mask;
   5915 
   5916    if (L == 0 && ((theInstr >> 19) & 7) == 0)
   5917       /* It is one reg and immediate */
   5918       return False;
   5919 
   5920    tmp = (L << 6) | imm6;
   5921    if (tmp & 0x40) {
   5922       size = 3;
   5923       shift_imm = 64 - imm6;
   5924    } else if (tmp & 0x20) {
   5925       size = 2;
   5926       shift_imm = 64 - imm6;
   5927    } else if (tmp & 0x10) {
   5928       size = 1;
   5929       shift_imm = 32 - imm6;
   5930    } else if (tmp & 0x8) {
   5931       size = 0;
   5932       shift_imm = 16 - imm6;
   5933    } else {
   5934       return False;
   5935    }
   5936 
   5937    switch (A) {
   5938       case 3:
   5939       case 2:
   5940          /* VRSHR, VRSRA */
   5941          if (shift_imm > 0) {
   5942             IRExpr *imm_val;
   5943             imm = 1L;
   5944             switch (size) {
   5945                case 0:
   5946                   imm = (imm << 8) | imm;
   5947                   /* fall through */
   5948                case 1:
   5949                   imm = (imm << 16) | imm;
   5950                   /* fall through */
   5951                case 2:
   5952                   imm = (imm << 32) | imm;
   5953                   /* fall through */
   5954                case 3:
   5955                   break;
   5956                default:
   5957                   vassert(0);
   5958             }
   5959             if (Q) {
   5960                reg_m = newTemp(Ity_V128);
   5961                res = newTemp(Ity_V128);
   5962                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
   5963                assign(reg_m, getQReg(mreg));
   5964                switch (size) {
   5965                   case 0:
   5966                      add = Iop_Add8x16;
   5967                      op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
   5968                      break;
   5969                   case 1:
   5970                      add = Iop_Add16x8;
   5971                      op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
   5972                      break;
   5973                   case 2:
   5974                      add = Iop_Add32x4;
   5975                      op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
   5976                      break;
   5977                   case 3:
   5978                      add = Iop_Add64x2;
   5979                      op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
   5980                      break;
   5981                   default:
   5982                      vassert(0);
   5983                }
   5984             } else {
   5985                reg_m = newTemp(Ity_I64);
   5986                res = newTemp(Ity_I64);
   5987                imm_val = mkU64(imm);
   5988                assign(reg_m, getDRegI64(mreg));
   5989                switch (size) {
   5990                   case 0:
   5991                      add = Iop_Add8x8;
   5992                      op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
   5993                      break;
   5994                   case 1:
   5995                      add = Iop_Add16x4;
   5996                      op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
   5997                      break;
   5998                   case 2:
   5999                      add = Iop_Add32x2;
   6000                      op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
   6001                      break;
   6002                   case 3:
   6003                      add = Iop_Add64;
   6004                      op = U ? Iop_Shr64 : Iop_Sar64;
   6005                      break;
   6006                   default:
   6007                      vassert(0);
   6008                }
   6009             }
   6010             assign(res,
   6011                    binop(add,
   6012                          binop(op,
   6013                                mkexpr(reg_m),
   6014                                mkU8(shift_imm)),
   6015                          binop(Q ? Iop_AndV128 : Iop_And64,
   6016                                binop(op,
   6017                                      mkexpr(reg_m),
   6018                                      mkU8(shift_imm - 1)),
   6019                                imm_val)));
   6020          } else {
   6021             if (Q) {
   6022                res = newTemp(Ity_V128);
   6023                assign(res, getQReg(mreg));
   6024             } else {
   6025                res = newTemp(Ity_I64);
   6026                assign(res, getDRegI64(mreg));
   6027             }
   6028          }
   6029          if (A == 3) {
   6030             if (Q) {
   6031                putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
   6032                              condT);
   6033             } else {
   6034                putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
   6035                                 condT);
   6036             }
   6037             DIP("vrsra.%c%u %c%u, %c%u, #%u\n",
   6038                 U ? 'u' : 's', 8 << size,
   6039                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6040          } else {
   6041             if (Q) {
   6042                putQReg(dreg, mkexpr(res), condT);
   6043             } else {
   6044                putDRegI64(dreg, mkexpr(res), condT);
   6045             }
   6046             DIP("vrshr.%c%u %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
   6047                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6048          }
   6049          return True;
   6050       case 1:
   6051       case 0:
   6052          /* VSHR, VSRA */
   6053          if (Q) {
   6054             reg_m = newTemp(Ity_V128);
   6055             assign(reg_m, getQReg(mreg));
   6056             res = newTemp(Ity_V128);
   6057          } else {
   6058             reg_m = newTemp(Ity_I64);
   6059             assign(reg_m, getDRegI64(mreg));
   6060             res = newTemp(Ity_I64);
   6061          }
   6062          if (Q) {
   6063             switch (size) {
   6064                case 0:
   6065                   op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
   6066                   add = Iop_Add8x16;
   6067                   break;
   6068                case 1:
   6069                   op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
   6070                   add = Iop_Add16x8;
   6071                   break;
   6072                case 2:
   6073                   op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
   6074                   add = Iop_Add32x4;
   6075                   break;
   6076                case 3:
   6077                   op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
   6078                   add = Iop_Add64x2;
   6079                   break;
   6080                default:
   6081                   vassert(0);
   6082             }
   6083          } else {
   6084             switch (size) {
   6085                case 0:
   6086                   op =  U ? Iop_ShrN8x8 : Iop_SarN8x8;
   6087                   add = Iop_Add8x8;
   6088                   break;
   6089                case 1:
   6090                   op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
   6091                   add = Iop_Add16x4;
   6092                   break;
   6093                case 2:
   6094                   op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
   6095                   add = Iop_Add32x2;
   6096                   break;
   6097                case 3:
   6098                   op = U ? Iop_Shr64 : Iop_Sar64;
   6099                   add = Iop_Add64;
   6100                   break;
   6101                default:
   6102                   vassert(0);
   6103             }
   6104          }
   6105          assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
   6106          if (A == 1) {
   6107             if (Q) {
   6108                putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
   6109                              condT);
   6110             } else {
   6111                putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
   6112                                 condT);
   6113             }
   6114             DIP("vsra.%c%u %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
   6115                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6116          } else {
   6117             if (Q) {
   6118                putQReg(dreg, mkexpr(res), condT);
   6119             } else {
   6120                putDRegI64(dreg, mkexpr(res), condT);
   6121             }
   6122             DIP("vshr.%c%u %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
   6123                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6124          }
   6125          return True;
   6126       case 4:
   6127          /* VSRI */
   6128          if (!U)
   6129             return False;
   6130          if (Q) {
   6131             res = newTemp(Ity_V128);
   6132             mask = newTemp(Ity_V128);
   6133          } else {
   6134             res = newTemp(Ity_I64);
   6135             mask = newTemp(Ity_I64);
   6136          }
   6137          switch (size) {
   6138             case 0: op = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; break;
   6139             case 1: op = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; break;
   6140             case 2: op = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; break;
   6141             case 3: op = Q ? Iop_ShrN64x2 : Iop_Shr64; break;
   6142             default: vassert(0);
   6143          }
   6144          if (Q) {
   6145             assign(mask, binop(op, binop(Iop_64HLtoV128,
   6146                                          mkU64(0xFFFFFFFFFFFFFFFFLL),
   6147                                          mkU64(0xFFFFFFFFFFFFFFFFLL)),
   6148                                mkU8(shift_imm)));
   6149             assign(res, binop(Iop_OrV128,
   6150                               binop(Iop_AndV128,
   6151                                     getQReg(dreg),
   6152                                     unop(Iop_NotV128,
   6153                                          mkexpr(mask))),
   6154                               binop(op,
   6155                                     getQReg(mreg),
   6156                                     mkU8(shift_imm))));
   6157             putQReg(dreg, mkexpr(res), condT);
   6158          } else {
   6159             assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
   6160                                mkU8(shift_imm)));
   6161             assign(res, binop(Iop_Or64,
   6162                               binop(Iop_And64,
   6163                                     getDRegI64(dreg),
   6164                                     unop(Iop_Not64,
   6165                                          mkexpr(mask))),
   6166                               binop(op,
   6167                                     getDRegI64(mreg),
   6168                                     mkU8(shift_imm))));
   6169             putDRegI64(dreg, mkexpr(res), condT);
   6170          }
   6171          DIP("vsri.%u %c%u, %c%u, #%u\n",
   6172              8 << size, Q ? 'q' : 'd', dreg,
   6173              Q ? 'q' : 'd', mreg, shift_imm);
   6174          return True;
   6175       case 5:
   6176          if (U) {
   6177             /* VSLI */
   6178             shift_imm = 8 * (1 << size) - shift_imm;
   6179             if (Q) {
   6180                res = newTemp(Ity_V128);
   6181                mask = newTemp(Ity_V128);
   6182             } else {
   6183                res = newTemp(Ity_I64);
   6184                mask = newTemp(Ity_I64);
   6185             }
   6186             switch (size) {
   6187                case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
   6188                case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
   6189                case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
   6190                case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
   6191                default: vassert(0);
   6192             }
   6193             if (Q) {
   6194                assign(mask, binop(op, binop(Iop_64HLtoV128,
   6195                                             mkU64(0xFFFFFFFFFFFFFFFFLL),
   6196                                             mkU64(0xFFFFFFFFFFFFFFFFLL)),
   6197                                   mkU8(shift_imm)));
   6198                assign(res, binop(Iop_OrV128,
   6199                                  binop(Iop_AndV128,
   6200                                        getQReg(dreg),
   6201                                        unop(Iop_NotV128,
   6202                                             mkexpr(mask))),
   6203                                  binop(op,
   6204                                        getQReg(mreg),
   6205                                        mkU8(shift_imm))));
   6206                putQReg(dreg, mkexpr(res), condT);
   6207             } else {
   6208                assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
   6209                                   mkU8(shift_imm)));
   6210                assign(res, binop(Iop_Or64,
   6211                                  binop(Iop_And64,
   6212                                        getDRegI64(dreg),
   6213                                        unop(Iop_Not64,
   6214                                             mkexpr(mask))),
   6215                                  binop(op,
   6216                                        getDRegI64(mreg),
   6217                                        mkU8(shift_imm))));
   6218                putDRegI64(dreg, mkexpr(res), condT);
   6219             }
   6220             DIP("vsli.%u %c%u, %c%u, #%u\n",
   6221                 8 << size, Q ? 'q' : 'd', dreg,
   6222                 Q ? 'q' : 'd', mreg, shift_imm);
   6223             return True;
   6224          } else {
   6225             /* VSHL #imm */
   6226             shift_imm = 8 * (1 << size) - shift_imm;
   6227             if (Q) {
   6228                res = newTemp(Ity_V128);
   6229             } else {
   6230                res = newTemp(Ity_I64);
   6231             }
   6232             switch (size) {
   6233                case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
   6234                case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
   6235                case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
   6236                case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
   6237                default: vassert(0);
   6238             }
   6239             assign(res, binop(op, Q ? getQReg(mreg) : getDRegI64(mreg),
   6240                      mkU8(shift_imm)));
   6241             if (Q) {
   6242                putQReg(dreg, mkexpr(res), condT);
   6243             } else {
   6244                putDRegI64(dreg, mkexpr(res), condT);
   6245             }
   6246             DIP("vshl.i%u %c%u, %c%u, #%u\n",
   6247                 8 << size, Q ? 'q' : 'd', dreg,
   6248                 Q ? 'q' : 'd', mreg, shift_imm);
   6249             return True;
   6250          }
   6251          break;
   6252       case 6:
   6253       case 7:
   6254          /* VQSHL, VQSHLU */
   6255          shift_imm = 8 * (1 << size) - shift_imm;
   6256          if (U) {
   6257             if (A & 1) {
   6258                switch (size) {
   6259                   case 0:
   6260                      op = Q ? Iop_QShlN8x16 : Iop_QShlN8x8;
   6261                      op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   6262                      break;
   6263                   case 1:
   6264                      op = Q ? Iop_QShlN16x8 : Iop_QShlN16x4;
   6265                      op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   6266                      break;
   6267                   case 2:
   6268                      op = Q ? Iop_QShlN32x4 : Iop_QShlN32x2;
   6269                      op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   6270                      break;
   6271                   case 3:
   6272                      op = Q ? Iop_QShlN64x2 : Iop_QShlN64x1;
   6273                      op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
   6274                      break;
   6275                   default:
   6276                      vassert(0);
   6277                }
   6278                DIP("vqshl.u%u %c%u, %c%u, #%u\n",
   6279                    8 << size,
   6280                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6281             } else {
   6282                switch (size) {
   6283                   case 0:
   6284                      op = Q ? Iop_QShlN8Sx16 : Iop_QShlN8Sx8;
   6285                      op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   6286                      break;
   6287                   case 1:
   6288                      op = Q ? Iop_QShlN16Sx8 : Iop_QShlN16Sx4;
   6289                      op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   6290                      break;
   6291                   case 2:
   6292                      op = Q ? Iop_QShlN32Sx4 : Iop_QShlN32Sx2;
   6293                      op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   6294                      break;
   6295                   case 3:
   6296                      op = Q ? Iop_QShlN64Sx2 : Iop_QShlN64Sx1;
   6297                      op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
   6298                      break;
   6299                   default:
   6300                      vassert(0);
   6301                }
   6302                DIP("vqshlu.s%u %c%u, %c%u, #%u\n",
   6303                    8 << size,
   6304                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6305             }
   6306          } else {
   6307             if (!(A & 1))
   6308                return False;
   6309             switch (size) {
   6310                case 0:
   6311                   op = Q ? Iop_QSalN8x16 : Iop_QSalN8x8;
   6312                   op_rev = Q ? Iop_SarN8x16 : Iop_SarN8x8;
   6313                   break;
   6314                case 1:
   6315                   op = Q ? Iop_QSalN16x8 : Iop_QSalN16x4;
   6316                   op_rev = Q ? Iop_SarN16x8 : Iop_SarN16x4;
   6317                   break;
   6318                case 2:
   6319                   op = Q ? Iop_QSalN32x4 : Iop_QSalN32x2;
   6320                   op_rev = Q ? Iop_SarN32x4 : Iop_SarN32x2;
   6321                   break;
   6322                case 3:
   6323                   op = Q ? Iop_QSalN64x2 : Iop_QSalN64x1;
   6324                   op_rev = Q ? Iop_SarN64x2 : Iop_Sar64;
   6325                   break;
   6326                default:
   6327                   vassert(0);
   6328             }
   6329             DIP("vqshl.s%u %c%u, %c%u, #%u\n",
   6330                 8 << size,
   6331                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6332          }
   6333          if (Q) {
   6334             tmp = newTemp(Ity_V128);
   6335             res = newTemp(Ity_V128);
   6336             reg_m = newTemp(Ity_V128);
   6337             assign(reg_m, getQReg(mreg));
   6338          } else {
   6339             tmp = newTemp(Ity_I64);
   6340             res = newTemp(Ity_I64);
   6341             reg_m = newTemp(Ity_I64);
   6342             assign(reg_m, getDRegI64(mreg));
   6343          }
   6344          assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
   6345          assign(tmp, binop(op_rev, mkexpr(res), mkU8(shift_imm)));
   6346          setFlag_QC(mkexpr(tmp), mkexpr(reg_m), Q, condT);
   6347          if (Q)
   6348             putQReg(dreg, mkexpr(res), condT);
   6349          else
   6350             putDRegI64(dreg, mkexpr(res), condT);
   6351          return True;
   6352       case 8:
   6353          if (!U) {
   6354             if (L == 1)
   6355                return False;
   6356             size++;
   6357             dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
   6358             mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
   6359             if (mreg & 1)
   6360                return False;
   6361             mreg >>= 1;
   6362             if (!B) {
   6363                /* VSHRN*/
   6364                IROp narOp;
   6365                reg_m = newTemp(Ity_V128);
   6366                assign(reg_m, getQReg(mreg));
   6367                res = newTemp(Ity_I64);
   6368                switch (size) {
   6369                   case 1:
   6370                      op = Iop_ShrN16x8;
   6371                      narOp = Iop_NarrowUn16to8x8;
   6372                      break;
   6373                   case 2:
   6374                      op = Iop_ShrN32x4;
   6375                      narOp = Iop_NarrowUn32to16x4;
   6376                      break;
   6377                   case 3:
   6378                      op = Iop_ShrN64x2;
   6379                      narOp = Iop_NarrowUn64to32x2;
   6380                      break;
   6381                   default:
   6382                      vassert(0);
   6383                }
   6384                assign(res, unop(narOp,
   6385                                 binop(op,
   6386                                       mkexpr(reg_m),
   6387                                       mkU8(shift_imm))));
   6388                putDRegI64(dreg, mkexpr(res), condT);
   6389                DIP("vshrn.i%u d%u, q%u, #%u\n", 8 << size, dreg, mreg,
   6390                    shift_imm);
   6391                return True;
   6392             } else {
   6393                /* VRSHRN   */
   6394                IROp addOp, shOp, narOp;
   6395                IRExpr *imm_val;
   6396                reg_m = newTemp(Ity_V128);
   6397                assign(reg_m, getQReg(mreg));
   6398                res = newTemp(Ity_I64);
   6399                imm = 1L;
   6400                switch (size) {
   6401                   case 0: imm = (imm <<  8) | imm; /* fall through */
   6402                   case 1: imm = (imm << 16) | imm; /* fall through */
   6403                   case 2: imm = (imm << 32) | imm; /* fall through */
   6404                   case 3: break;
   6405                   default: vassert(0);
   6406                }
   6407                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
   6408                switch (size) {
   6409                   case 1:
   6410                      addOp = Iop_Add16x8;
   6411                      shOp = Iop_ShrN16x8;
   6412                      narOp = Iop_NarrowUn16to8x8;
   6413                      break;
   6414                   case 2:
   6415                      addOp = Iop_Add32x4;
   6416                      shOp = Iop_ShrN32x4;
   6417                      narOp = Iop_NarrowUn32to16x4;
   6418                      break;
   6419                   case 3:
   6420                      addOp = Iop_Add64x2;
   6421                      shOp = Iop_ShrN64x2;
   6422                      narOp = Iop_NarrowUn64to32x2;
   6423                      break;
   6424                   default:
   6425                      vassert(0);
   6426                }
   6427                assign(res, unop(narOp,
   6428                                 binop(addOp,
   6429                                       binop(shOp,
   6430                                             mkexpr(reg_m),
   6431                                             mkU8(shift_imm)),
   6432                                       binop(Iop_AndV128,
   6433                                             binop(shOp,
   6434                                                   mkexpr(reg_m),
   6435                                                   mkU8(shift_imm - 1)),
   6436                                             imm_val))));
   6437                putDRegI64(dreg, mkexpr(res), condT);
   6438                if (shift_imm == 0) {
   6439                   DIP("vmov%u d%u, q%u, #%u\n", 8 << size, dreg, mreg,
   6440                       shift_imm);
   6441                } else {
   6442                   DIP("vrshrn.i%u d%u, q%u, #%u\n", 8 << size, dreg, mreg,
   6443                       shift_imm);
   6444                }
   6445                return True;
   6446             }
   6447          } else {
   6448             /* fall through */
   6449          }
   6450       case 9:
   6451          dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
   6452          mreg = ((theInstr >>  1) & 0x10) | (theInstr & 0xF);
   6453          if (mreg & 1)
   6454             return False;
   6455          mreg >>= 1;
   6456          size++;
   6457          if ((theInstr >> 8) & 1) {
   6458             switch (size) {
   6459                case 1:
   6460                   op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
   6461                   cvt = U ? Iop_QNarrowUn16Uto8Ux8 : Iop_QNarrowUn16Sto8Sx8;
   6462                   cvt2 = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
   6463                   break;
   6464                case 2:
   6465                   op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
   6466                   cvt = U ? Iop_QNarrowUn32Uto16Ux4 : Iop_QNarrowUn32Sto16Sx4;
   6467                   cvt2 = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
   6468                   break;
   6469                case 3:
   6470                   op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
   6471                   cvt = U ? Iop_QNarrowUn64Uto32Ux2 : Iop_QNarrowUn64Sto32Sx2;
   6472                   cvt2 = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
   6473                   break;
   6474                default:
   6475                   vassert(0);
   6476             }
   6477             DIP("vq%sshrn.%c%u d%u, q%u, #%u\n", B ? "r" : "",
   6478                 U ? 'u' : 's', 8 << size, dreg, mreg, shift_imm);
   6479          } else {
   6480             vassert(U);
   6481             switch (size) {
   6482                case 1:
   6483                   op = Iop_SarN16x8;
   6484                   cvt = Iop_QNarrowUn16Sto8Ux8;
   6485                   cvt2 = Iop_Widen8Uto16x8;
   6486                   break;
   6487                case 2:
   6488                   op = Iop_SarN32x4;
   6489                   cvt = Iop_QNarrowUn32Sto16Ux4;
   6490                   cvt2 = Iop_Widen16Uto32x4;
   6491                   break;
   6492                case 3:
   6493                   op = Iop_SarN64x2;
   6494                   cvt = Iop_QNarrowUn64Sto32Ux2;
   6495                   cvt2 = Iop_Widen32Uto64x2;
   6496                   break;
   6497                default:
   6498                   vassert(0);
   6499             }
   6500             DIP("vq%sshrun.s%u d%u, q%u, #%u\n", B ? "r" : "",
   6501                 8 << size, dreg, mreg, shift_imm);
   6502          }
   6503          if (B) {
   6504             if (shift_imm > 0) {
   6505                imm = 1;
   6506                switch (size) {
   6507                   case 1: imm = (imm << 16) | imm; /* fall through */
   6508                   case 2: imm = (imm << 32) | imm; /* fall through */
   6509                   case 3: break;
   6510                   case 0: default: vassert(0);
   6511                }
   6512                switch (size) {
   6513                   case 1: add = Iop_Add16x8; break;
   6514                   case 2: add = Iop_Add32x4; break;
   6515                   case 3: add = Iop_Add64x2; break;
   6516                   case 0: default: vassert(0);
   6517                }
   6518             }
   6519          }
   6520          reg_m = newTemp(Ity_V128);
   6521          res = newTemp(Ity_V128);
   6522          assign(reg_m, getQReg(mreg));
   6523          if (B) {
   6524             /* VQRSHRN, VQRSHRUN */
   6525             assign(res, binop(add,
   6526                               binop(op, mkexpr(reg_m), mkU8(shift_imm)),
   6527                               binop(Iop_AndV128,
   6528                                     binop(op,
   6529                                           mkexpr(reg_m),
   6530                                           mkU8(shift_imm - 1)),
   6531                                     mkU128(imm))));
   6532          } else {
   6533             /* VQSHRN, VQSHRUN */
   6534             assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
   6535          }
   6536          setFlag_QC(unop(cvt2, unop(cvt, mkexpr(res))), mkexpr(res),
   6537                     True, condT);
   6538          putDRegI64(dreg, unop(cvt, mkexpr(res)), condT);
   6539          return True;
   6540       case 10:
   6541          /* VSHLL
   6542             VMOVL ::= VSHLL #0 */
   6543          if (B)
   6544             return False;
   6545          if (dreg & 1)
   6546             return False;
   6547          dreg >>= 1;
   6548          shift_imm = (8 << size) - shift_imm;
   6549          res = newTemp(Ity_V128);
   6550          switch (size) {
   6551             case 0:
   6552                op = Iop_ShlN16x8;
   6553                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
   6554                break;
   6555             case 1:
   6556                op = Iop_ShlN32x4;
   6557                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
   6558                break;
   6559             case 2:
   6560                op = Iop_ShlN64x2;
   6561                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
   6562                break;
   6563             case 3:
   6564                return False;
   6565             default:
   6566                vassert(0);
   6567          }
   6568          assign(res, binop(op, unop(cvt, getDRegI64(mreg)), mkU8(shift_imm)));
   6569          putQReg(dreg, mkexpr(res), condT);
   6570          if (shift_imm == 0) {
   6571             DIP("vmovl.%c%u q%u, d%u\n", U ? 'u' : 's', 8 << size,
   6572                 dreg, mreg);
   6573          } else {
   6574             DIP("vshll.%c%u q%u, d%u, #%u\n", U ? 'u' : 's', 8 << size,
   6575                 dreg, mreg, shift_imm);
   6576          }
   6577          return True;
   6578       case 14:
   6579       case 15:
   6580          /* VCVT floating-point <-> fixed-point */
   6581          if ((theInstr >> 8) & 1) {
   6582             if (U) {
   6583                op = Q ? Iop_F32ToFixed32Ux4_RZ : Iop_F32ToFixed32Ux2_RZ;
   6584             } else {
   6585                op = Q ? Iop_F32ToFixed32Sx4_RZ : Iop_F32ToFixed32Sx2_RZ;
   6586             }
   6587             DIP("vcvt.%c32.f32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
   6588                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
   6589                 64 - ((theInstr >> 16) & 0x3f));
   6590          } else {
   6591             if (U) {
   6592                op = Q ? Iop_Fixed32UToF32x4_RN : Iop_Fixed32UToF32x2_RN;
   6593             } else {
   6594                op = Q ? Iop_Fixed32SToF32x4_RN : Iop_Fixed32SToF32x2_RN;
   6595             }
   6596             DIP("vcvt.f32.%c32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
   6597                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
   6598                 64 - ((theInstr >> 16) & 0x3f));
   6599          }
   6600          if (((theInstr >> 21) & 1) == 0)
   6601             return False;
   6602          if (Q) {
   6603             putQReg(dreg, binop(op, getQReg(mreg),
   6604                      mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
   6605          } else {
   6606             putDRegI64(dreg, binop(op, getDRegI64(mreg),
   6607                        mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
   6608          }
   6609          return True;
   6610       default:
   6611          return False;
   6612 
   6613    }
   6614    return False;
   6615 }
   6616 
   6617 /* A7.4.5 Two registers, miscellaneous */
   6618 static
   6619 Bool dis_neon_data_2reg_misc ( UInt theInstr, IRTemp condT )
   6620 {
   6621    UInt A = (theInstr >> 16) & 3;
   6622    UInt B = (theInstr >> 6) & 0x1f;
   6623    UInt Q = (theInstr >> 6) & 1;
   6624    UInt U = (theInstr >> 24) & 1;
   6625    UInt size = (theInstr >> 18) & 3;
   6626    UInt dreg = get_neon_d_regno(theInstr);
   6627    UInt mreg = get_neon_m_regno(theInstr);
   6628    UInt F = (theInstr >> 10) & 1;
   6629    IRTemp arg_d = IRTemp_INVALID;
   6630    IRTemp arg_m = IRTemp_INVALID;
   6631    IRTemp res = IRTemp_INVALID;
   6632    switch (A) {
   6633       case 0:
   6634          if (Q) {
   6635             arg_m = newTemp(Ity_V128);
   6636             res = newTemp(Ity_V128);
   6637             assign(arg_m, getQReg(mreg));
   6638          } else {
   6639             arg_m = newTemp(Ity_I64);
   6640             res = newTemp(Ity_I64);
   6641             assign(arg_m, getDRegI64(mreg));
   6642          }
   6643          switch (B >> 1) {
   6644             case 0: {
   6645                /* VREV64 */
   6646                IROp op;
   6647                switch (size) {
   6648                   case 0:
   6649                      op = Q ? Iop_Reverse64_8x16 : Iop_Reverse64_8x8;
   6650                      break;
   6651                   case 1:
   6652                      op = Q ? Iop_Reverse64_16x8 : Iop_Reverse64_16x4;
   6653                      break;
   6654                   case 2:
   6655                      op = Q ? Iop_Reverse64_32x4 : Iop_Reverse64_32x2;
   6656                      break;
   6657                   case 3:
   6658                      return False;
   6659                   default:
   6660                      vassert(0);
   6661                }
   6662                assign(res, unop(op, mkexpr(arg_m)));
   6663                DIP("vrev64.%u %c%u, %c%u\n", 8 << size,
   6664                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   6665                break;
   6666             }
   6667             case 1: {
   6668                /* VREV32 */
   6669                IROp op;
   6670                switch (size) {
   6671                   case 0:
   6672                      op = Q ? Iop_Reverse32_8x16 : Iop_Reverse32_8x8;
   6673                      break;
   6674                   case 1:
   6675                      op = Q ? Iop_Reverse32_16x8 : Iop_Reverse32_16x4;
   6676                      break;
   6677                   case 2:
   6678                   case 3:
   6679                      return False;
   6680                   default:
   6681                      vassert(0);
   6682                }
   6683                assign(res, unop(op, mkexpr(arg_m)));
   6684                DIP("vrev32.%u %c%u, %c%u\n", 8 << size,
   6685                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   6686                break;
   6687             }
   6688             case 2: {
   6689                /* VREV16 */
   6690                IROp op;
   6691                switch (size) {
   6692                   case 0:
   6693                      op = Q ? Iop_Reverse16_8x16 : Iop_Reverse16_8x8;
   6694                      break;
   6695                   case 1:
   6696                   case 2:
   6697                   case 3:
   6698                      return False;
   6699                   default:
   6700                      vassert(0);
   6701                }
   6702                assign(res, unop(op, mkexpr(arg_m)));
   6703                DIP("vrev16.%u %c%u, %c%u\n", 8 << size,
   6704                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   6705                break;
   6706             }
   6707             case 3:
   6708                return False;
   6709             case 4:
   6710             case 5: {
   6711                /* VPADDL */
   6712                IROp op;
   6713                U = (theInstr >> 7) & 1;
   6714                if (Q) {
   6715                   switch (size) {
   6716                      case 0: op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16; break;
   6717                      case 1: op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8; break;
   6718                      case 2: op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4; break;
   6719                      case 3: return False;
   6720                      default: vassert(0);
   6721                   }
   6722                } else {
   6723                   switch (size) {
   6724                      case 0: op = U ? Iop_PwAddL8Ux8  : Iop_PwAddL8Sx8;  break;
   6725                      case 1: op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4; break;
   6726                      case 2: op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2; break;
   6727                      case 3: return False;
   6728                      default: vassert(0);
   6729                   }
   6730                }
   6731                assign(res, unop(op, mkexpr(arg_m)));
   6732                DIP("vpaddl.%c%u %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
   6733                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   6734                break;
   6735             }
   6736             case 6:
   6737             case 7:
   6738                return False;
   6739             case 8: {
   6740                /* VCLS */
   6741                IROp op;
   6742                switch (size) {
   6743                   case 0: op = Q ? Iop_Cls8Sx16 : Iop_Cls8Sx8; break;
   6744                   case 1: op = Q ? Iop_Cls16Sx8 : Iop_Cls16Sx4; break;
   6745                   case 2: op = Q ? Iop_Cls32Sx4 : Iop_Cls32Sx2; break;
   6746                   case 3: return False;
   6747                   default: vassert(0);
   6748                }
   6749                assign(res, unop(op, mkexpr(arg_m)));
   6750                DIP("vcls.s%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
   6751                    Q ? 'q' : 'd', mreg);
   6752                break;
   6753             }
   6754             case 9: {
   6755                /* VCLZ */
   6756                IROp op;
   6757                switch (size) {
   6758                   case 0: op = Q ? Iop_Clz8Sx16 : Iop_Clz8Sx8; break;
   6759                   case 1: op = Q ? Iop_Clz16Sx8 : Iop_Clz16Sx4; break;
   6760                   case 2: op = Q ? Iop_Clz32Sx4 : Iop_Clz32Sx2; break;
   6761                   case 3: return False;
   6762                   default: vassert(0);
   6763                }
   6764                assign(res, unop(op, mkexpr(arg_m)));
   6765                DIP("vclz.i%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
   6766                    Q ? 'q' : 'd', mreg);
   6767                break;
   6768             }
   6769             case 10:
   6770                /* VCNT */
   6771                assign(res, unop(Q ? Iop_Cnt8x16 : Iop_Cnt8x8, mkexpr(arg_m)));
   6772                DIP("vcnt.8 %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
   6773                    mreg);
   6774                break;
   6775             case 11:
   6776                /* VMVN */
   6777                if (Q)
   6778                   assign(res, unop(Iop_NotV128, mkexpr(arg_m)));
   6779                else
   6780                   assign(res, unop(Iop_Not64, mkexpr(arg_m)));
   6781                DIP("vmvn %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
   6782                    mreg);
   6783                break;
   6784             case 12:
   6785             case 13: {
   6786                /* VPADAL */
   6787                IROp op, add_op;
   6788                U = (theInstr >> 7) & 1;
   6789                if (Q) {
   6790                   switch (size) {
   6791                      case 0:
   6792                         op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16;
   6793                         add_op = Iop_Add16x8;
   6794                         break;
   6795                      case 1:
   6796                         op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8;
   6797                         add_op = Iop_Add32x4;
   6798                         break;
   6799                      case 2:
   6800                         op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4;
   6801                         add_op = Iop_Add64x2;
   6802                         break;
   6803                      case 3:
   6804                         return False;
   6805                      default:
   6806                         vassert(0);
   6807                   }
   6808                } else {
   6809                   switch (size) {
   6810                      case 0:
   6811                         op = U ? Iop_PwAddL8Ux8 : Iop_PwAddL8Sx8;
   6812                         add_op = Iop_Add16x4;
   6813                         break;
   6814                      case 1:
   6815                         op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4;
   6816                         add_op = Iop_Add32x2;
   6817                         break;
   6818                      case 2:
   6819                         op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2;
   6820                         add_op = Iop_Add64;
   6821                         break;
   6822                      case 3:
   6823                         return False;
   6824                      default:
   6825                         vassert(0);
   6826                   }
   6827                }
   6828                if (Q) {
   6829                   arg_d = newTemp(Ity_V128);
   6830                   assign(arg_d, getQReg(dreg));
   6831                } else {
   6832                   arg_d = newTemp(Ity_I64);
   6833                   assign(arg_d, getDRegI64(dreg));
   6834                }
   6835                assign(res, binop(add_op, unop(op, mkexpr(arg_m)),
   6836                                          mkexpr(arg_d)));
   6837                DIP("vpadal.%c%u %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
   6838                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   6839                break;
   6840             }
   6841             case 14: {
   6842                /* VQABS */
   6843                IROp op_sub, op_qsub, op_cmp;
   6844                IRTemp mask, tmp;
   6845                IRExpr *zero1, *zero2;
   6846                IRExpr *neg, *neg2;
   6847                if (Q) {
   6848                   zero1 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   6849                   zero2 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   6850                   mask = newTemp(Ity_V128);
   6851                   tmp = newTemp(Ity_V128);
   6852                } else {
   6853                   zero1 = mkU64(0);
   6854                   zero2 = mkU64(0);
   6855                   mask = newTemp(Ity_I64);
   6856                   tmp = newTemp(Ity_I64);
   6857                }
   6858                switch (size) {
   6859                   case 0:
   6860                      op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   6861                      op_qsub = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
   6862                      op_cmp = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
   6863                      break;
   6864                   case 1:
   6865                      op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   6866                      op_qsub = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
   6867                      op_cmp = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4;
   6868                      break;
   6869                   case 2:
   6870                      op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   6871                      op_qsub = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
   6872                      op_cmp = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2;
   6873                      break;
   6874                   case 3:
   6875                      return False;
   6876                   default:
   6877                      vassert(0);
   6878                }
   6879                assign(mask, binop(op_cmp, mkexpr(arg_m), zero1));
   6880                neg = binop(op_qsub, zero2, mkexpr(arg_m));
   6881                neg2 = binop(op_sub, zero2, mkexpr(arg_m));
   6882                assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
   6883                                  binop(Q ? Iop_AndV128 : Iop_And64,
   6884                                        mkexpr(mask),
   6885                                        mkexpr(arg_m)),
   6886                                  binop(Q ? Iop_AndV128 : Iop_And64,
   6887                                        unop(Q ? Iop_NotV128 : Iop_Not64,
   6888                                             mkexpr(mask)),
   6889                                        neg)));
   6890                assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
   6891                                  binop(Q ? Iop_AndV128 : Iop_And64,
   6892                                        mkexpr(mask),
   6893                                        mkexpr(arg_m)),
   6894                                  binop(Q ? Iop_AndV128 : Iop_And64,
   6895                                        unop(Q ? Iop_NotV128 : Iop_Not64,
   6896                                             mkexpr(mask)),
   6897                                        neg2)));
   6898                setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
   6899                DIP("vqabs.s%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
   6900                    Q ? 'q' : 'd', mreg);
   6901                break;
   6902             }
   6903             case 15: {
   6904                /* VQNEG */
   6905                IROp op, op2;
   6906                IRExpr *zero;
   6907                if (Q) {
   6908                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   6909                } else {
   6910                   zero = mkU64(0);
   6911                }
   6912                switch (size) {
   6913                   case 0:
   6914                      op = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
   6915                      op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   6916                      break;
   6917                   case 1:
   6918                      op = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
   6919                      op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   6920                      break;
   6921                   case 2:
   6922                      op = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
   6923                      op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   6924                      break;
   6925                   case 3:
   6926                      return False;
   6927                   default:
   6928                      vassert(0);
   6929                }
   6930                assign(res, binop(op, zero, mkexpr(arg_m)));
   6931                setFlag_QC(mkexpr(res), binop(op2, zero, mkexpr(arg_m)),
   6932                           Q, condT);
   6933                DIP("vqneg.s%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
   6934                    Q ? 'q' : 'd', mreg);
   6935                break;
   6936             }
   6937             default:
   6938                vassert(0);
   6939          }
   6940          if (Q) {
   6941             putQReg(dreg, mkexpr(res), condT);
   6942          } else {
   6943             putDRegI64(dreg, mkexpr(res), condT);
   6944          }
   6945          return True;
   6946       case 1:
   6947          if (Q) {
   6948             arg_m = newTemp(Ity_V128);
   6949             res = newTemp(Ity_V128);
   6950             assign(arg_m, getQReg(mreg));
   6951          } else {
   6952             arg_m = newTemp(Ity_I64);
   6953             res = newTemp(Ity_I64);
   6954             assign(arg_m, getDRegI64(mreg));
   6955          }
   6956          switch ((B >> 1) & 0x7) {
   6957             case 0: {
   6958                /* VCGT #0 */
   6959                IRExpr *zero;
   6960                IROp op;
   6961                if (Q) {
   6962                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   6963                } else {
   6964                   zero = mkU64(0);
   6965                }
   6966                if (F) {
   6967                   switch (size) {
   6968                      case 0: case 1: case 3: return False;
   6969                      case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
   6970                      default: vassert(0);
   6971                   }
   6972                } else {
   6973                   switch (size) {
   6974                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
   6975                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
   6976                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
   6977                      case 3: return False;
   6978                      default: vassert(0);
   6979                   }
   6980                }
   6981                assign(res, binop(op, mkexpr(arg_m), zero));
   6982                DIP("vcgt.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
   6983                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   6984                break;
   6985             }
   6986             case 1: {
   6987                /* VCGE #0 */
   6988                IROp op;
   6989                IRExpr *zero;
   6990                if (Q) {
   6991                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   6992                } else {
   6993                   zero = mkU64(0);
   6994                }
   6995                if (F) {
   6996                   switch (size) {
   6997                      case 0: case 1: case 3: return False;
   6998                      case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
   6999                      default: vassert(0);
   7000                   }
   7001                   assign(res, binop(op, mkexpr(arg_m), zero));
   7002                } else {
   7003                   switch (size) {
   7004                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
   7005                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
   7006                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
   7007                      case 3: return False;
   7008                      default: vassert(0);
   7009                   }
   7010                   assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
   7011                                    binop(op, zero, mkexpr(arg_m))));
   7012                }
   7013                DIP("vcge.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
   7014                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7015                break;
   7016             }
   7017             case 2: {
   7018                /* VCEQ #0 */
   7019                IROp op;
   7020                IRExpr *zero;
   7021                if (F) {
   7022                   if (Q) {
   7023                      zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   7024                   } else {
   7025                      zero = mkU64(0);
   7026                   }
   7027                   switch (size) {
   7028                      case 0: case 1: case 3: return False;
   7029                      case 2: op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2; break;
   7030                      default: vassert(0);
   7031                   }
   7032                   assign(res, binop(op, zero, mkexpr(arg_m)));
   7033                } else {
   7034                   switch (size) {
   7035                      case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
   7036                      case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
   7037                      case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
   7038                      case 3: return False;
   7039                      default: vassert(0);
   7040                   }
   7041                   assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
   7042                                    unop(op, mkexpr(arg_m))));
   7043                }
   7044                DIP("vceq.%c%u %c%u, %c%u, #0\n", F ? 'f' : 'i', 8 << size,
   7045                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7046                break;
   7047             }
   7048             case 3: {
   7049                /* VCLE #0 */
   7050                IRExpr *zero;
   7051                IROp op;
   7052                if (Q) {
   7053                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   7054                } else {
   7055                   zero = mkU64(0);
   7056                }
   7057                if (F) {
   7058                   switch (size) {
   7059                      case 0: case 1: case 3: return False;
   7060                      case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
   7061                      default: vassert(0);
   7062                   }
   7063                   assign(res, binop(op, zero, mkexpr(arg_m)));
   7064                } else {
   7065                   switch (size) {
   7066                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
   7067                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
   7068                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
   7069                      case 3: return False;
   7070                      default: vassert(0);
   7071                   }
   7072                   assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
   7073                                    binop(op, mkexpr(arg_m), zero)));
   7074                }
   7075                DIP("vcle.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
   7076                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7077                break;
   7078             }
   7079             case 4: {
   7080                /* VCLT #0 */
   7081                IROp op;
   7082                IRExpr *zero;
   7083                if (Q) {
   7084                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   7085                } else {
   7086                   zero = mkU64(0);
   7087                }
   7088                if (F) {
   7089                   switch (size) {
   7090                      case 0: case 1: case 3: return False;
   7091                      case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
   7092                      default: vassert(0);
   7093                   }
   7094                   assign(res, binop(op, zero, mkexpr(arg_m)));
   7095                } else {
   7096                   switch (size) {
   7097                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
   7098                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
   7099                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
   7100                      case 3: return False;
   7101                      default: vassert(0);
   7102                   }
   7103                   assign(res, binop(op, zero, mkexpr(arg_m)));
   7104                }
   7105                DIP("vclt.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
   7106                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7107                break;
   7108             }
   7109             case 5:
   7110                return False;
   7111             case 6: {
   7112                /* VABS */
   7113                if (!F) {
   7114                   IROp op;
   7115                   switch(size) {
   7116                      case 0: op = Q ? Iop_Abs8x16 : Iop_Abs8x8; break;
   7117                      case 1: op = Q ? Iop_Abs16x8 : Iop_Abs16x4; break;
   7118                      case 2: op = Q ? Iop_Abs32x4 : Iop_Abs32x2; break;
   7119                      case 3: return False;
   7120                      default: vassert(0);
   7121                   }
   7122                   assign(res, unop(op, mkexpr(arg_m)));
   7123                } else {
   7124                   assign(res, unop(Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2,
   7125                                    mkexpr(arg_m)));
   7126                }
   7127                DIP("vabs.%c%u %c%u, %c%u\n",
   7128                    F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
   7129                    Q ? 'q' : 'd', mreg);
   7130                break;
   7131             }
   7132             case 7: {
   7133                /* VNEG */
   7134                IROp op;
   7135                IRExpr *zero;
   7136                if (F) {
   7137                   switch (size) {
   7138                      case 0: case 1: case 3: return False;
   7139                      case 2: op = Q ? Iop_Neg32Fx4 : Iop_Neg32Fx2; break;
   7140                      default: vassert(0);
   7141                   }
   7142                   assign(res, unop(op, mkexpr(arg_m)));
   7143                } else {
   7144                   if (Q) {
   7145                      zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   7146                   } else {
   7147                      zero = mkU64(0);
   7148                   }
   7149                   switch (size) {
   7150                      case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
   7151                      case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
   7152                      case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
   7153                      case 3: return False;
   7154                      default: vassert(0);
   7155                   }
   7156                   assign(res, binop(op, zero, mkexpr(arg_m)));
   7157                }
   7158                DIP("vneg.%c%u %c%u, %c%u\n",
   7159                    F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
   7160                    Q ? 'q' : 'd', mreg);
   7161                break;
   7162             }
   7163             default:
   7164                vassert(0);
   7165          }
   7166          if (Q) {
   7167             putQReg(dreg, mkexpr(res), condT);
   7168          } else {
   7169             putDRegI64(dreg, mkexpr(res), condT);
   7170          }
   7171          return True;
   7172       case 2:
   7173          if ((B >> 1) == 0) {
   7174             /* VSWP */
   7175             if (Q) {
   7176                arg_m = newTemp(Ity_V128);
   7177                assign(arg_m, getQReg(mreg));
   7178                putQReg(mreg, getQReg(dreg), condT);
   7179                putQReg(dreg, mkexpr(arg_m), condT);
   7180             } else {
   7181                arg_m = newTemp(Ity_I64);
   7182                assign(arg_m, getDRegI64(mreg));
   7183                putDRegI64(mreg, getDRegI64(dreg), condT);
   7184                putDRegI64(dreg, mkexpr(arg_m), condT);
   7185             }
   7186             DIP("vswp %c%u, %c%u\n",
   7187                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7188             return True;
   7189          } else if ((B >> 1) == 1) {
   7190             /* VTRN */
   7191             IROp op_odd = Iop_INVALID, op_even = Iop_INVALID;
   7192             IRTemp old_m, old_d, new_d, new_m;
   7193             if (Q) {
   7194                old_m = newTemp(Ity_V128);
   7195                old_d = newTemp(Ity_V128);
   7196                new_m = newTemp(Ity_V128);
   7197                new_d = newTemp(Ity_V128);
   7198                assign(old_m, getQReg(mreg));
   7199                assign(old_d, getQReg(dreg));
   7200             } else {
   7201                old_m = newTemp(Ity_I64);
   7202                old_d = newTemp(Ity_I64);
   7203                new_m = newTemp(Ity_I64);
   7204                new_d = newTemp(Ity_I64);
   7205                assign(old_m, getDRegI64(mreg));
   7206                assign(old_d, getDRegI64(dreg));
   7207             }
   7208             if (Q) {
   7209                switch (size) {
   7210                   case 0:
   7211                      op_odd  = Iop_InterleaveOddLanes8x16;
   7212                      op_even = Iop_InterleaveEvenLanes8x16;
   7213                      break;
   7214                   case 1:
   7215                      op_odd  = Iop_InterleaveOddLanes16x8;
   7216                      op_even = Iop_InterleaveEvenLanes16x8;
   7217                      break;
   7218                   case 2:
   7219                      op_odd  = Iop_InterleaveOddLanes32x4;
   7220                      op_even = Iop_InterleaveEvenLanes32x4;
   7221                      break;
   7222                   case 3:
   7223                      return False;
   7224                   default:
   7225                      vassert(0);
   7226                }
   7227             } else {
   7228                switch (size) {
   7229                   case 0:
   7230                      op_odd  = Iop_InterleaveOddLanes8x8;
   7231                      op_even = Iop_InterleaveEvenLanes8x8;
   7232                      break;
   7233                   case 1:
   7234                      op_odd  = Iop_InterleaveOddLanes16x4;
   7235                      op_even = Iop_InterleaveEvenLanes16x4;
   7236                      break;
   7237                   case 2:
   7238                      op_odd  = Iop_InterleaveHI32x2;
   7239                      op_even = Iop_InterleaveLO32x2;
   7240                      break;
   7241                   case 3:
   7242                      return False;
   7243                   default:
   7244                      vassert(0);
   7245                }
   7246             }
   7247             assign(new_d, binop(op_even, mkexpr(old_m), mkexpr(old_d)));
   7248             assign(new_m, binop(op_odd, mkexpr(old_m), mkexpr(old_d)));
   7249             if (Q) {
   7250                putQReg(dreg, mkexpr(new_d), condT);
   7251                putQReg(mreg, mkexpr(new_m), condT);
   7252             } else {
   7253                putDRegI64(dreg, mkexpr(new_d), condT);
   7254                putDRegI64(mreg, mkexpr(new_m), condT);
   7255             }
   7256             DIP("vtrn.%u %c%u, %c%u\n",
   7257                 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7258             return True;
   7259          } else if ((B >> 1) == 2) {
   7260             /* VUZP */
   7261             IROp op_even, op_odd;
   7262             IRTemp old_m, old_d, new_m, new_d;
   7263             if (!Q && size == 2)
   7264                return False;
   7265             if (Q) {
   7266                old_m = newTemp(Ity_V128);
   7267                old_d = newTemp(Ity_V128);
   7268                new_m = newTemp(Ity_V128);
   7269                new_d = newTemp(Ity_V128);
   7270                assign(old_m, getQReg(mreg));
   7271                assign(old_d, getQReg(dreg));
   7272             } else {
   7273                old_m = newTemp(Ity_I64);
   7274                old_d = newTemp(Ity_I64);
   7275                new_m = newTemp(Ity_I64);
   7276                new_d = newTemp(Ity_I64);
   7277                assign(old_m, getDRegI64(mreg));
   7278                assign(old_d, getDRegI64(dreg));
   7279             }
   7280             switch (size) {
   7281                case 0:
   7282                   op_odd  = Q ? Iop_CatOddLanes8x16 : Iop_CatOddLanes8x8;
   7283                   op_even = Q ? Iop_CatEvenLanes8x16 : Iop_CatEvenLanes8x8;
   7284                   break;
   7285                case 1:
   7286                   op_odd  = Q ? Iop_CatOddLanes16x8 : Iop_CatOddLanes16x4;
   7287                   op_even = Q ? Iop_CatEvenLanes16x8 : Iop_CatEvenLanes16x4;
   7288                   break;
   7289                case 2:
   7290                   op_odd  = Iop_CatOddLanes32x4;
   7291                   op_even = Iop_CatEvenLanes32x4;
   7292                   break;
   7293                case 3:
   7294                   return False;
   7295                default:
   7296                   vassert(0);
   7297             }
   7298             assign(new_d, binop(op_even, mkexpr(old_m), mkexpr(old_d)));
   7299             assign(new_m, binop(op_odd,  mkexpr(old_m), mkexpr(old_d)));
   7300             if (Q) {
   7301                putQReg(dreg, mkexpr(new_d), condT);
   7302                putQReg(mreg, mkexpr(new_m), condT);
   7303             } else {
   7304                putDRegI64(dreg, mkexpr(new_d), condT);
   7305                putDRegI64(mreg, mkexpr(new_m), condT);
   7306             }
   7307             DIP("vuzp.%u %c%u, %c%u\n",
   7308                 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7309             return True;
   7310          } else if ((B >> 1) == 3) {
   7311             /* VZIP */
   7312             IROp op_lo, op_hi;
   7313             IRTemp old_m, old_d, new_m, new_d;
   7314             if (!Q && size == 2)
   7315                return False;
   7316             if (Q) {
   7317                old_m = newTemp(Ity_V128);
   7318                old_d = newTemp(Ity_V128);
   7319                new_m = newTemp(Ity_V128);
   7320                new_d = newTemp(Ity_V128);
   7321                assign(old_m, getQReg(mreg));
   7322                assign(old_d, getQReg(dreg));
   7323             } else {
   7324                old_m = newTemp(Ity_I64);
   7325                old_d = newTemp(Ity_I64);
   7326                new_m = newTemp(Ity_I64);
   7327                new_d = newTemp(Ity_I64);
   7328                assign(old_m, getDRegI64(mreg));
   7329                assign(old_d, getDRegI64(dreg));
   7330             }
   7331             switch (size) {
   7332                case 0:
   7333                   op_hi = Q ? Iop_InterleaveHI8x16 : Iop_InterleaveHI8x8;
   7334                   op_lo = Q ? Iop_InterleaveLO8x16 : Iop_InterleaveLO8x8;
   7335                   break;
   7336                case 1:
   7337                   op_hi = Q ? Iop_InterleaveHI16x8 : Iop_InterleaveHI16x4;
   7338                   op_lo = Q ? Iop_InterleaveLO16x8 : Iop_InterleaveLO16x4;
   7339                   break;
   7340                case 2:
   7341                   op_hi = Iop_InterleaveHI32x4;
   7342                   op_lo = Iop_InterleaveLO32x4;
   7343                   break;
   7344                case 3:
   7345                   return False;
   7346                default:
   7347                   vassert(0);
   7348             }
   7349             assign(new_d, binop(op_lo, mkexpr(old_m), mkexpr(old_d)));
   7350             assign(new_m, binop(op_hi, mkexpr(old_m), mkexpr(old_d)));
   7351             if (Q) {
   7352                putQReg(dreg, mkexpr(new_d), condT);
   7353                putQReg(mreg, mkexpr(new_m), condT);
   7354             } else {
   7355                putDRegI64(dreg, mkexpr(new_d), condT);
   7356                putDRegI64(mreg, mkexpr(new_m), condT);
   7357             }
   7358             DIP("vzip.%u %c%u, %c%u\n",
   7359                 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7360             return True;
   7361          } else if (B == 8) {
   7362             /* VMOVN */
   7363             IROp op;
   7364             mreg >>= 1;
   7365             switch (size) {
   7366                case 0: op = Iop_NarrowUn16to8x8;  break;
   7367                case 1: op = Iop_NarrowUn32to16x4; break;
   7368                case 2: op = Iop_NarrowUn64to32x2; break;
   7369                case 3: return False;
   7370                default: vassert(0);
   7371             }
   7372             putDRegI64(dreg, unop(op, getQReg(mreg)), condT);
   7373             DIP("vmovn.i%u d%u, q%u\n", 16 << size, dreg, mreg);
   7374             return True;
   7375          } else if (B == 9 || (B >> 1) == 5) {
   7376             /* VQMOVN, VQMOVUN */
   7377             IROp op, op2;
   7378             IRTemp tmp;
   7379             dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
   7380             mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
   7381             if (mreg & 1)
   7382                return False;
   7383             mreg >>= 1;
   7384             switch (size) {
   7385                case 0: op2 = Iop_NarrowUn16to8x8;  break;
   7386                case 1: op2 = Iop_NarrowUn32to16x4; break;
   7387                case 2: op2 = Iop_NarrowUn64to32x2; break;
   7388                case 3: return False;
   7389                default: vassert(0);
   7390             }
   7391             switch (B & 3) {
   7392                case 0:
   7393                   vassert(0);
   7394                case 1:
   7395                   switch (size) {
   7396                      case 0: op = Iop_QNarrowUn16Sto8Ux8;  break;
   7397                      case 1: op = Iop_QNarrowUn32Sto16Ux4; break;
   7398                      case 2: op = Iop_QNarrowUn64Sto32Ux2; break;
   7399                      case 3: return False;
   7400                      default: vassert(0);
   7401                   }
   7402                   DIP("vqmovun.s%u d%u, q%u\n", 16 << size, dreg, mreg);
   7403                   break;
   7404                case 2:
   7405                   switch (size) {
   7406                      case 0: op = Iop_QNarrowUn16Sto8Sx8;  break;
   7407                      case 1: op = Iop_QNarrowUn32Sto16Sx4; break;
   7408                      case 2: op = Iop_QNarrowUn64Sto32Sx2; break;
   7409                      case 3: return False;
   7410                      default: vassert(0);
   7411                   }
   7412                   DIP("vqmovn.s%u d%u, q%u\n", 16 << size, dreg, mreg);
   7413                   break;
   7414                case 3:
   7415                   switch (size) {
   7416                      case 0: op = Iop_QNarrowUn16Uto8Ux8;  break;
   7417                      case 1: op = Iop_QNarrowUn32Uto16Ux4; break;
   7418                      case 2: op = Iop_QNarrowUn64Uto32Ux2; break;
   7419                      case 3: return False;
   7420                      default: vassert(0);
   7421                   }
   7422                   DIP("vqmovn.u%u d%u, q%u\n", 16 << size, dreg, mreg);
   7423                   break;
   7424                default:
   7425                   vassert(0);
   7426             }
   7427             res = newTemp(Ity_I64);
   7428             tmp = newTemp(Ity_I64);
   7429             assign(res, unop(op, getQReg(mreg)));
   7430             assign(tmp, unop(op2, getQReg(mreg)));
   7431             setFlag_QC(mkexpr(res), mkexpr(tmp), False, condT);
   7432             putDRegI64(dreg, mkexpr(res), condT);
   7433             return True;
   7434          } else if (B == 12) {
   7435             /* VSHLL (maximum shift) */
   7436             IROp op, cvt;
   7437             UInt shift_imm;
   7438             if (Q)
   7439                return False;
   7440             if (dreg & 1)
   7441                return False;
   7442             dreg >>= 1;
   7443             shift_imm = 8 << size;
   7444             res = newTemp(Ity_V128);
   7445             switch (size) {
   7446                case 0: op = Iop_ShlN16x8; cvt = Iop_Widen8Uto16x8;  break;
   7447                case 1: op = Iop_ShlN32x4; cvt = Iop_Widen16Uto32x4; break;
   7448                case 2: op = Iop_ShlN64x2; cvt = Iop_Widen32Uto64x2; break;
   7449                case 3: return False;
   7450                default: vassert(0);
   7451             }
   7452             assign(res, binop(op, unop(cvt, getDRegI64(mreg)),
   7453                                   mkU8(shift_imm)));
   7454             putQReg(dreg, mkexpr(res), condT);
   7455             DIP("vshll.i%u q%u, d%u, #%u\n", 8 << size, dreg, mreg, 8 << size);
   7456             return True;
   7457          } else if ((B >> 3) == 3 && (B & 3) == 0) {
   7458             /* VCVT (half<->single) */
   7459             /* Half-precision extensions are needed to run this */
   7460             vassert(0); // ATC
   7461             if (((theInstr >> 18) & 3) != 1)
   7462                return False;
   7463             if ((theInstr >> 8) & 1) {
   7464                if (dreg & 1)
   7465                   return False;
   7466                dreg >>= 1;
   7467                putQReg(dreg, unop(Iop_F16toF32x4, getDRegI64(mreg)),
   7468                      condT);
   7469                DIP("vcvt.f32.f16 q%u, d%u\n", dreg, mreg);
   7470             } else {
   7471                if (mreg & 1)
   7472                   return False;
   7473                mreg >>= 1;
   7474                putDRegI64(dreg, unop(Iop_F32toF16x4, getQReg(mreg)),
   7475                                 condT);
   7476                DIP("vcvt.f16.f32 d%u, q%u\n", dreg, mreg);
   7477             }
   7478             return True;
   7479          } else {
   7480             return False;
   7481          }
   7482          vassert(0);
   7483          return True;
   7484       case 3:
   7485          if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,0)) {
   7486             /* VRECPE */
   7487             IROp op;
   7488             F = (theInstr >> 8) & 1;
   7489             if (size != 2)
   7490                return False;
   7491             if (Q) {
   7492                op = F ? Iop_Recip32Fx4 : Iop_Recip32x4;
   7493                putQReg(dreg, unop(op, getQReg(mreg)), condT);
   7494                DIP("vrecpe.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
   7495             } else {
   7496                op = F ? Iop_Recip32Fx2 : Iop_Recip32x2;
   7497                putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
   7498                DIP("vrecpe.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
   7499             }
   7500             return True;
   7501          } else if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,1)) {
   7502             /* VRSQRTE */
   7503             IROp op;
   7504             F = (B >> 2) & 1;
   7505             if (size != 2)
   7506                return False;
   7507             if (F) {
   7508                /* fp */
   7509                op = Q ? Iop_Rsqrte32Fx4 : Iop_Rsqrte32Fx2;
   7510             } else {
   7511                /* unsigned int */
   7512                op = Q ? Iop_Rsqrte32x4 : Iop_Rsqrte32x2;
   7513             }
   7514             if (Q) {
   7515                putQReg(dreg, unop(op, getQReg(mreg)), condT);
   7516                DIP("vrsqrte.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
   7517             } else {
   7518                putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
   7519                DIP("vrsqrte.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
   7520             }
   7521             return True;
   7522          } else if ((B >> 3) == 3) {
   7523             /* VCVT (fp<->integer) */
   7524             IROp op;
   7525             if (size != 2)
   7526                return False;
   7527             switch ((B >> 1) & 3) {
   7528                case 0:
   7529                   op = Q ? Iop_I32StoFx4 : Iop_I32StoFx2;
   7530                   DIP("vcvt.f32.s32 %c%u, %c%u\n",
   7531                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7532                   break;
   7533                case 1:
   7534                   op = Q ? Iop_I32UtoFx4 : Iop_I32UtoFx2;
   7535                   DIP("vcvt.f32.u32 %c%u, %c%u\n",
   7536                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7537                   break;
   7538                case 2:
   7539                   op = Q ? Iop_FtoI32Sx4_RZ : Iop_FtoI32Sx2_RZ;
   7540                   DIP("vcvt.s32.f32 %c%u, %c%u\n",
   7541                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7542                   break;
   7543                case 3:
   7544                   op = Q ? Iop_FtoI32Ux4_RZ : Iop_FtoI32Ux2_RZ;
   7545                   DIP("vcvt.u32.f32 %c%u, %c%u\n",
   7546                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7547                   break;
   7548                default:
   7549                   vassert(0);
   7550             }
   7551             if (Q) {
   7552                putQReg(dreg, unop(op, getQReg(mreg)), condT);
   7553             } else {
   7554                putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
   7555             }
   7556             return True;
   7557          } else {
   7558             return False;
   7559          }
   7560          vassert(0);
   7561          return True;
   7562       default:
   7563          vassert(0);
   7564    }
   7565    return False;
   7566 }
   7567 
   7568 /* A7.4.6 One register and a modified immediate value */
   7569 static
   7570 void ppNeonImm(UInt imm, UInt cmode, UInt op)
   7571 {
   7572    int i;
   7573    switch (cmode) {
   7574       case 0: case 1: case 8: case 9:
   7575          vex_printf("0x%x", imm);
   7576          break;
   7577       case 2: case 3: case 10: case 11:
   7578          vex_printf("0x%x00", imm);
   7579          break;
   7580       case 4: case 5:
   7581          vex_printf("0x%x0000", imm);
   7582          break;
   7583       case 6: case 7:
   7584          vex_printf("0x%x000000", imm);
   7585          break;
   7586       case 12:
   7587          vex_printf("0x%xff", imm);
   7588          break;
   7589       case 13:
   7590          vex_printf("0x%xffff", imm);
   7591          break;
   7592       case 14:
   7593          if (op) {
   7594             vex_printf("0x");
   7595             for (i = 7; i >= 0; i--)
   7596                vex_printf("%s", (imm & (1 << i)) ? "ff" : "00");
   7597          } else {
   7598             vex_printf("0x%x", imm);
   7599          }
   7600          break;
   7601       case 15:
   7602          vex_printf("0x%x", imm);
   7603          break;
   7604    }
   7605 }
   7606 
   7607 static
   7608 const char *ppNeonImmType(UInt cmode, UInt op)
   7609 {
   7610    switch (cmode) {
   7611       case 0 ... 7:
   7612       case 12: case 13:
   7613          return "i32";
   7614       case 8 ... 11:
   7615          return "i16";
   7616       case 14:
   7617          if (op)
   7618             return "i64";
   7619          else
   7620             return "i8";
   7621       case 15:
   7622          if (op)
   7623             vassert(0);
   7624          else
   7625             return "f32";
   7626       default:
   7627          vassert(0);
   7628    }
   7629 }
   7630 
   7631 static
   7632 void DIPimm(UInt imm, UInt cmode, UInt op,
   7633             const char *instr, UInt Q, UInt dreg)
   7634 {
   7635    if (vex_traceflags & VEX_TRACE_FE) {
   7636       vex_printf("%s.%s %c%u, #", instr,
   7637                  ppNeonImmType(cmode, op), Q ? 'q' : 'd', dreg);
   7638       ppNeonImm(imm, cmode, op);
   7639       vex_printf("\n");
   7640    }
   7641 }
   7642 
   7643 static
   7644 Bool dis_neon_data_1reg_and_imm ( UInt theInstr, IRTemp condT )
   7645 {
   7646    UInt dreg = get_neon_d_regno(theInstr);
   7647    ULong imm_raw = ((theInstr >> 17) & 0x80) | ((theInstr >> 12) & 0x70) |
   7648                   (theInstr & 0xf);
   7649    ULong imm_raw_pp = imm_raw;
   7650    UInt cmode = (theInstr >> 8) & 0xf;
   7651    UInt op_bit = (theInstr >> 5) & 1;
   7652    ULong imm = 0;
   7653    UInt Q = (theInstr >> 6) & 1;
   7654    int i, j;
   7655    UInt tmp;
   7656    IRExpr *imm_val;
   7657    IRExpr *expr;
   7658    IRTemp tmp_var;
   7659    switch(cmode) {
   7660       case 7: case 6:
   7661          imm_raw = imm_raw << 8;
   7662          /* fallthrough */
   7663       case 5: case 4:
   7664          imm_raw = imm_raw << 8;
   7665          /* fallthrough */
   7666       case 3: case 2:
   7667          imm_raw = imm_raw << 8;
   7668          /* fallthrough */
   7669       case 0: case 1:
   7670          imm = (imm_raw << 32) | imm_raw;
   7671          break;
   7672       case 11: case 10:
   7673          imm_raw = imm_raw << 8;
   7674          /* fallthrough */
   7675       case 9: case 8:
   7676          imm_raw = (imm_raw << 16) | imm_raw;
   7677          imm = (imm_raw << 32) | imm_raw;
   7678          break;
   7679       case 13:
   7680          imm_raw = (imm_raw << 8) | 0xff;
   7681          /* fallthrough */
   7682       case 12:
   7683          imm_raw = (imm_raw << 8) | 0xff;
   7684          imm = (imm_raw << 32) | imm_raw;
   7685          break;
   7686       case 14:
   7687          if (! op_bit) {
   7688             for(i = 0; i < 8; i++) {
   7689                imm = (imm << 8) | imm_raw;
   7690             }
   7691          } else {
   7692             for(i = 7; i >= 0; i--) {
   7693                tmp = 0;
   7694                for(j = 0; j < 8; j++) {
   7695                   tmp = (tmp << 1) | ((imm_raw >> i) & 1);
   7696                }
   7697                imm = (imm << 8) | tmp;
   7698             }
   7699          }
   7700          break;
   7701       case 15:
   7702          imm = (imm_raw & 0x80) << 5;
   7703          imm |= ((~imm_raw & 0x40) << 5);
   7704          for(i = 1; i <= 4; i++)
   7705             imm |= (imm_raw & 0x40) << i;
   7706          imm |= (imm_raw & 0x7f);
   7707          imm = imm << 19;
   7708          imm = (imm << 32) | imm;
   7709          break;
   7710       default:
   7711          return False;
   7712    }
   7713    if (Q) {
   7714       imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
   7715    } else {
   7716       imm_val = mkU64(imm);
   7717    }
   7718    if (((op_bit == 0) &&
   7719       (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 12) == 12))) ||
   7720       ((op_bit == 1) && (cmode == 14))) {
   7721       /* VMOV (immediate) */
   7722       if (Q) {
   7723          putQReg(dreg, imm_val, condT);
   7724       } else {
   7725          putDRegI64(dreg, imm_val, condT);
   7726       }
   7727       DIPimm(imm_raw_pp, cmode, op_bit, "vmov", Q, dreg);
   7728       return True;
   7729    }
   7730    if ((op_bit == 1) &&
   7731       (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 14) == 12))) {
   7732       /* VMVN (immediate) */
   7733       if (Q) {
   7734          putQReg(dreg, unop(Iop_NotV128, imm_val), condT);
   7735       } else {
   7736          putDRegI64(dreg, unop(Iop_Not64, imm_val), condT);
   7737       }
   7738       DIPimm(imm_raw_pp, cmode, op_bit, "vmvn", Q, dreg);
   7739       return True;
   7740    }
   7741    if (Q) {
   7742       tmp_var = newTemp(Ity_V128);
   7743       assign(tmp_var, getQReg(dreg));
   7744    } else {
   7745       tmp_var = newTemp(Ity_I64);
   7746       assign(tmp_var, getDRegI64(dreg));
   7747    }
   7748    if ((op_bit == 0) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
   7749       /* VORR (immediate) */
   7750       if (Q)
   7751          expr = binop(Iop_OrV128, mkexpr(tmp_var), imm_val);
   7752       else
   7753          expr = binop(Iop_Or64, mkexpr(tmp_var), imm_val);
   7754       DIPimm(imm_raw_pp, cmode, op_bit, "vorr", Q, dreg);
   7755    } else if ((op_bit == 1) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
   7756       /* VBIC (immediate) */
   7757       if (Q)
   7758          expr = binop(Iop_AndV128, mkexpr(tmp_var),
   7759                                    unop(Iop_NotV128, imm_val));
   7760       else
   7761          expr = binop(Iop_And64, mkexpr(tmp_var), unop(Iop_Not64, imm_val));
   7762       DIPimm(imm_raw_pp, cmode, op_bit, "vbic", Q, dreg);
   7763    } else {
   7764       return False;
   7765    }
   7766    if (Q)
   7767       putQReg(dreg, expr, condT);
   7768    else
   7769       putDRegI64(dreg, expr, condT);
   7770    return True;
   7771 }
   7772 
   7773 /* A7.4 Advanced SIMD data-processing instructions */
   7774 static
   7775 Bool dis_neon_data_processing ( UInt theInstr, IRTemp condT )
   7776 {
   7777    UInt A = (theInstr >> 19) & 0x1F;
   7778    UInt B = (theInstr >>  8) & 0xF;
   7779    UInt C = (theInstr >>  4) & 0xF;
   7780    UInt U = (theInstr >> 24) & 0x1;
   7781 
   7782    if (! (A & 0x10)) {
   7783       return dis_neon_data_3same(theInstr, condT);
   7784    }
   7785    if (((A & 0x17) == 0x10) && ((C & 0x9) == 0x1)) {
   7786       return dis_neon_data_1reg_and_imm(theInstr, condT);
   7787    }
   7788    if ((C & 1) == 1) {
   7789       return dis_neon_data_2reg_and_shift(theInstr, condT);
   7790    }
   7791    if (((C & 5) == 0) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
   7792       return dis_neon_data_3diff(theInstr, condT);
   7793    }
   7794    if (((C & 5) == 4) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
   7795       return dis_neon_data_2reg_and_scalar(theInstr, condT);
   7796    }
   7797    if ((A & 0x16) == 0x16) {
   7798       if ((U == 0) && ((C & 1) == 0)) {
   7799          return dis_neon_vext(theInstr, condT);
   7800       }
   7801       if ((U != 1) || ((C & 1) == 1))
   7802          return False;
   7803       if ((B & 8) == 0) {
   7804          return dis_neon_data_2reg_misc(theInstr, condT);
   7805       }
   7806       if ((B & 12) == 8) {
   7807          return dis_neon_vtb(theInstr, condT);
   7808       }
   7809       if ((B == 12) && ((C & 9) == 0)) {
   7810          return dis_neon_vdup(theInstr, condT);
   7811       }
   7812    }
   7813    return False;
   7814 }
   7815 
   7816 
   7817 /*------------------------------------------------------------*/
   7818 /*--- NEON loads and stores                                ---*/
   7819 /*------------------------------------------------------------*/
   7820 
   7821 /* For NEON memory operations, we use the standard scheme to handle
   7822    conditionalisation: generate a jump around the instruction if the
   7823    condition is false.  That's only necessary in Thumb mode, however,
   7824    since in ARM mode NEON instructions are unconditional. */
   7825 
   7826 /* A helper function for what follows.  It assumes we already went
   7827    uncond as per comments at the top of this section. */
   7828 static
   7829 void mk_neon_elem_load_to_one_lane( UInt rD, UInt inc, UInt index,
   7830                                     UInt N, UInt size, IRTemp addr )
   7831 {
   7832    UInt i;
   7833    switch (size) {
   7834       case 0:
   7835          putDRegI64(rD, triop(Iop_SetElem8x8, getDRegI64(rD), mkU8(index),
   7836                     loadLE(Ity_I8, mkexpr(addr))), IRTemp_INVALID);
   7837          break;
   7838       case 1:
   7839          putDRegI64(rD, triop(Iop_SetElem16x4, getDRegI64(rD), mkU8(index),
   7840                     loadLE(Ity_I16, mkexpr(addr))), IRTemp_INVALID);
   7841          break;
   7842       case 2:
   7843          putDRegI64(rD, triop(Iop_SetElem32x2, getDRegI64(rD), mkU8(index),
   7844                     loadLE(Ity_I32, mkexpr(addr))), IRTemp_INVALID);
   7845          break;
   7846       default:
   7847          vassert(0);
   7848    }
   7849    for (i = 1; i <= N; i++) {
   7850       switch (size) {
   7851          case 0:
   7852             putDRegI64(rD + i * inc,
   7853                        triop(Iop_SetElem8x8,
   7854                              getDRegI64(rD + i * inc),
   7855                              mkU8(index),
   7856                              loadLE(Ity_I8, binop(Iop_Add32,
   7857                                                   mkexpr(addr),
   7858                                                   mkU32(i * 1)))),
   7859                        IRTemp_INVALID);
   7860             break;
   7861          case 1:
   7862             putDRegI64(rD + i * inc,
   7863                        triop(Iop_SetElem16x4,
   7864                              getDRegI64(rD + i * inc),
   7865                              mkU8(index),
   7866                              loadLE(Ity_I16, binop(Iop_Add32,
   7867                                                    mkexpr(addr),
   7868                                                    mkU32(i * 2)))),
   7869                        IRTemp_INVALID);
   7870             break;
   7871          case 2:
   7872             putDRegI64(rD + i * inc,
   7873                        triop(Iop_SetElem32x2,
   7874                              getDRegI64(rD + i * inc),
   7875                              mkU8(index),
   7876                              loadLE(Ity_I32, binop(Iop_Add32,
   7877                                                    mkexpr(addr),
   7878                                                    mkU32(i * 4)))),
   7879                        IRTemp_INVALID);
   7880             break;
   7881          default:
   7882             vassert(0);
   7883       }
   7884    }
   7885 }
   7886 
   7887 /* A(nother) helper function for what follows.  It assumes we already
   7888    went uncond as per comments at the top of this section. */
   7889 static
   7890 void mk_neon_elem_store_from_one_lane( UInt rD, UInt inc, UInt index,
   7891                                        UInt N, UInt size, IRTemp addr )
   7892 {
   7893    UInt i;
   7894    switch (size) {
   7895       case 0:
   7896          storeLE(mkexpr(addr),
   7897                  binop(Iop_GetElem8x8, getDRegI64(rD), mkU8(index)));
   7898          break;
   7899       case 1:
   7900          storeLE(mkexpr(addr),
   7901                  binop(Iop_GetElem16x4, getDRegI64(rD), mkU8(index)));
   7902          break;
   7903       case 2:
   7904          storeLE(mkexpr(addr),
   7905                  binop(Iop_GetElem32x2, getDRegI64(rD), mkU8(index)));
   7906          break;
   7907       default:
   7908          vassert(0);
   7909    }
   7910    for (i = 1; i <= N; i++) {
   7911       switch (size) {
   7912          case 0:
   7913             storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 1)),
   7914                     binop(Iop_GetElem8x8, getDRegI64(rD + i * inc),
   7915                                           mkU8(index)));
   7916             break;
   7917          case 1:
   7918             storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 2)),
   7919                     binop(Iop_GetElem16x4, getDRegI64(rD + i * inc),
   7920                                            mkU8(index)));
   7921             break;
   7922          case 2:
   7923             storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 4)),
   7924                     binop(Iop_GetElem32x2, getDRegI64(rD + i * inc),
   7925                                            mkU8(index)));
   7926             break;
   7927          default:
   7928             vassert(0);
   7929       }
   7930    }
   7931 }
   7932 
   7933 /* Generate 2x64 -> 2x64 deinterleave code, for VLD2.  Caller must
   7934    make *u0 and *u1 be valid IRTemps before the call. */
   7935 static void math_DEINTERLEAVE_2 (/*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1,
   7936                                  IRTemp i0, IRTemp i1, Int laneszB)
   7937 {
   7938    /* The following assumes that the guest is little endian, and hence
   7939       that the memory-side (interleaved) data is stored
   7940       little-endianly. */
   7941    vassert(u0 && u1);
   7942    /* This is pretty easy, since we have primitives directly to
   7943       hand. */
   7944    if (laneszB == 4) {
   7945       // memLE(128 bits) == A0 B0 A1 B1
   7946       // i0 == B0 A0, i1 == B1 A1
   7947       // u0 == A1 A0, u1 == B1 B0
   7948       assign(*u0, binop(Iop_InterleaveLO32x2, mkexpr(i1), mkexpr(i0)));
   7949       assign(*u1, binop(Iop_InterleaveHI32x2, mkexpr(i1), mkexpr(i0)));
   7950    } else if (laneszB == 2) {
   7951       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3
   7952       // i0 == B1 A1 B0 A0, i1 == B3 A3 B2 A2
   7953       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0
   7954       assign(*u0, binop(Iop_CatEvenLanes16x4, mkexpr(i1), mkexpr(i0)));
   7955       assign(*u1, binop(Iop_CatOddLanes16x4,  mkexpr(i1), mkexpr(i0)));
   7956    } else if (laneszB == 1) {
   7957       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A6 B6 A7 B7
   7958       // i0 == B3 A3 B2 A2 B1 A1 B0 A0, i1 == B7 A7 B6 A6 B5 A5 B4 A4
   7959       // u0 == A7 A6 A5 A4 A3 A2 A1 A0, u1 == B7 B6 B5 B4 B3 B2 B1 B0
   7960       assign(*u0, binop(Iop_CatEvenLanes8x8, mkexpr(i1), mkexpr(i0)));
   7961       assign(*u1, binop(Iop_CatOddLanes8x8,  mkexpr(i1), mkexpr(i0)));
   7962    } else {
   7963       // Can never happen, since VLD2 only has valid lane widths of 32,
   7964       // 16 or 8 bits.
   7965       vpanic("math_DEINTERLEAVE_2");
   7966    }
   7967 }
   7968 
   7969 /* Generate 2x64 -> 2x64 interleave code, for VST2.  Caller must make
   7970    *u0 and *u1 be valid IRTemps before the call. */
   7971 static void math_INTERLEAVE_2 (/*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1,
   7972                                IRTemp u0, IRTemp u1, Int laneszB)
   7973 {
   7974    /* The following assumes that the guest is little endian, and hence
   7975       that the memory-side (interleaved) data is stored
   7976       little-endianly. */
   7977    vassert(i0 && *i1);
   7978    /* This is pretty easy, since we have primitives directly to
   7979       hand. */
   7980    if (laneszB == 4) {
   7981       // memLE(128 bits) == A0 B0 A1 B1
   7982       // i0 == B0 A0, i1 == B1 A1
   7983       // u0 == A1 A0, u1 == B1 B0
   7984       assign(*i0, binop(Iop_InterleaveLO32x2, mkexpr(u1), mkexpr(u0)));
   7985       assign(*i1, binop(Iop_InterleaveHI32x2, mkexpr(u1), mkexpr(u0)));
   7986    } else if (laneszB == 2) {
   7987       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3
   7988       // i0 == B1 A1 B0 A0, i1 == B3 A3 B2 A2
   7989       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0
   7990       assign(*i0, binop(Iop_InterleaveLO16x4, mkexpr(u1), mkexpr(u0)));
   7991       assign(*i1, binop(Iop_InterleaveHI16x4, mkexpr(u1), mkexpr(u0)));
   7992    } else if (laneszB == 1) {
   7993       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A6 B6 A7 B7
   7994       // i0 == B3 A3 B2 A2 B1 A1 B0 A0, i1 == B7 A7 B6 A6 B5 A5 B4 A4
   7995       // u0 == A7 A6 A5 A4 A3 A2 A1 A0, u1 == B7 B6 B5 B4 B3 B2 B1 B0
   7996       assign(*i0, binop(Iop_InterleaveLO8x8, mkexpr(u1), mkexpr(u0)));
   7997       assign(*i1, binop(Iop_InterleaveHI8x8, mkexpr(u1), mkexpr(u0)));
   7998    } else {
   7999       // Can never happen, since VST2 only has valid lane widths of 32,
   8000       // 16 or 8 bits.
   8001       vpanic("math_INTERLEAVE_2");
   8002    }
   8003 }
   8004 
   8005 // Helper function for generating arbitrary slicing 'n' dicing of
   8006 // 3 8x8 vectors, as needed for VLD3.8 and VST3.8.
   8007 static IRExpr* math_PERM_8x8x3(const UChar* desc,
   8008                                IRTemp s0, IRTemp s1, IRTemp s2)
   8009 {
   8010    // desc is an array of 8 pairs, encoded as 16 bytes,
   8011    // that describe how to assemble the result lanes, starting with
   8012    // lane 7.  Each pair is: first component (0..2) says which of
   8013    // s0/s1/s2 to use.  Second component (0..7) is the lane number
   8014    // in the source to use.
   8015    UInt si;
   8016    for (si = 0; si < 7; si++) {
   8017       vassert(desc[2 * si + 0] <= 2);
   8018       vassert(desc[2 * si + 1] <= 7);
   8019    }
   8020    IRTemp h3 = newTemp(Ity_I64);
   8021    IRTemp h2 = newTemp(Ity_I64);
   8022    IRTemp h1 = newTemp(Ity_I64);
   8023    IRTemp h0 = newTemp(Ity_I64);
   8024    IRTemp srcs[3] = {s0, s1, s2};
   8025 #  define SRC_VEC(_lane)   mkexpr(srcs[desc[2 * (7-(_lane)) + 0]])
   8026 #  define SRC_SHIFT(_lane) mkU8(56-8*(desc[2 * (7-(_lane)) + 1]))
   8027    assign(h3, binop(Iop_InterleaveHI8x8,
   8028                     binop(Iop_Shl64, SRC_VEC(7), SRC_SHIFT(7)),
   8029                     binop(Iop_Shl64, SRC_VEC(6), SRC_SHIFT(6))));
   8030    assign(h2, binop(Iop_InterleaveHI8x8,
   8031                     binop(Iop_Shl64, SRC_VEC(5), SRC_SHIFT(5)),
   8032                     binop(Iop_Shl64, SRC_VEC(4), SRC_SHIFT(4))));
   8033    assign(h1, binop(Iop_InterleaveHI8x8,
   8034                     binop(Iop_Shl64, SRC_VEC(3), SRC_SHIFT(3)),
   8035                     binop(Iop_Shl64, SRC_VEC(2), SRC_SHIFT(2))));
   8036    assign(h0, binop(Iop_InterleaveHI8x8,
   8037                     binop(Iop_Shl64, SRC_VEC(1), SRC_SHIFT(1)),
   8038                     binop(Iop_Shl64, SRC_VEC(0), SRC_SHIFT(0))));
   8039 #  undef SRC_VEC
   8040 #  undef SRC_SHIFT
   8041    // Now h3..h0 are 64 bit vectors with useful information only
   8042    // in the top 16 bits.  We now concatentate those four 16-bit
   8043    // groups so as to produce the final result.
   8044    IRTemp w1 = newTemp(Ity_I64);
   8045    IRTemp w0 = newTemp(Ity_I64);
   8046    assign(w1, binop(Iop_InterleaveHI16x4, mkexpr(h3), mkexpr(h2)));
   8047    assign(w0, binop(Iop_InterleaveHI16x4, mkexpr(h1), mkexpr(h0)));
   8048    return binop(Iop_InterleaveHI32x2, mkexpr(w1), mkexpr(w0));
   8049 }
   8050 
   8051 /* Generate 3x64 -> 3x64 deinterleave code, for VLD3.  Caller must
   8052    make *u0, *u1 and *u2 be valid IRTemps before the call. */
   8053 static void math_DEINTERLEAVE_3 (
   8054                /*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1, /*OUT*/IRTemp* u2,
   8055                IRTemp i0, IRTemp i1, IRTemp i2, Int laneszB
   8056             )
   8057 {
   8058 #  define IHI32x2(_e1, _e2) binop(Iop_InterleaveHI32x2, (_e1), (_e2))
   8059 #  define IHI16x4(_e1, _e2) binop(Iop_InterleaveHI16x4, (_e1), (_e2))
   8060 #  define SHL64(_tmp, _amt) binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
   8061    /* The following assumes that the guest is little endian, and hence
   8062       that the memory-side (interleaved) data is stored
   8063       little-endianly. */
   8064    vassert(u0 && u1 && u2);
   8065    if (laneszB == 4) {
   8066       // memLE(192 bits) == A0 B0 C0 A1 B1 C1
   8067       // i0 == B0 A0, i1 == A1 C0, i2 == C1 B1
   8068       // u0 == A1 A0, u1 == B1 B0, u2 == C1 C0
   8069       assign(*u0, IHI32x2(SHL64(i1,  0), SHL64(i0, 32)));
   8070       assign(*u1, IHI32x2(SHL64(i2, 32), SHL64(i0,  0)));
   8071       assign(*u2, IHI32x2(SHL64(i2,  0), SHL64(i1, 32)));
   8072    } else if (laneszB == 2) {
   8073       // memLE(192 bits) == A0 B0 C0 A1, B1 C1 A2 B2, C2 A3 B3 C3
   8074       // i0 == A1 C0 B0 A0, i1 == B2 A2 C1 B1, i2 == C3 B3 A3 C2
   8075       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0, u2 == C3 C2 C1 C0
   8076 #     define XXX(_tmp3,_la3,_tmp2,_la2,_tmp1,_la1,_tmp0,_la0) \
   8077                 IHI32x2(                                      \
   8078                    IHI16x4(SHL64((_tmp3),48-16*(_la3)),       \
   8079                            SHL64((_tmp2),48-16*(_la2))),      \
   8080                    IHI16x4(SHL64((_tmp1),48-16*(_la1)),       \
   8081                            SHL64((_tmp0),48-16*(_la0))))
   8082       assign(*u0, XXX(i2,1, i1,2, i0,3, i0,0));
   8083       assign(*u1, XXX(i2,2, i1,3, i1,0, i0,1));
   8084       assign(*u2, XXX(i2,3, i2,0, i1,1, i0,2));
   8085 #     undef XXX
   8086    } else if (laneszB == 1) {
   8087       // These describe how the result vectors [7..0] are
   8088       // assembled from the source vectors.  Each pair is
   8089       // (source vector number, lane number).
   8090       static const UChar de0[16] = {2,5, 2,2, 1,7, 1,4, 1,1, 0,6, 0,3, 0,0};
   8091       static const UChar de1[16] = {2,6, 2,3, 2,0, 1,5, 1,2, 0,7, 0,4, 0,1};
   8092       static const UChar de2[16] = {2,7, 2,4, 2,1, 1,6, 1,3, 1,0, 0,5, 0,2};
   8093       assign(*u0, math_PERM_8x8x3(de0, i0, i1, i2));
   8094       assign(*u1, math_PERM_8x8x3(de1, i0, i1, i2));
   8095       assign(*u2, math_PERM_8x8x3(de2, i0, i1, i2));
   8096    } else {
   8097       // Can never happen, since VLD3 only has valid lane widths of 32,
   8098       // 16 or 8 bits.
   8099       vpanic("math_DEINTERLEAVE_3");
   8100    }
   8101 #  undef SHL64
   8102 #  undef IHI16x4
   8103 #  undef IHI32x2
   8104 }
   8105 
   8106 /* Generate 3x64 -> 3x64 interleave code, for VST3.  Caller must
   8107    make *i0, *i1 and *i2 be valid IRTemps before the call. */
   8108 static void math_INTERLEAVE_3 (
   8109                /*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1, /*OUT*/IRTemp* i2,
   8110                IRTemp u0, IRTemp u1, IRTemp u2, Int laneszB
   8111             )
   8112 {
   8113 #  define IHI32x2(_e1, _e2) binop(Iop_InterleaveHI32x2, (_e1), (_e2))
   8114 #  define IHI16x4(_e1, _e2) binop(Iop_InterleaveHI16x4, (_e1), (_e2))
   8115 #  define SHL64(_tmp, _amt) binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
   8116    /* The following assumes that the guest is little endian, and hence
   8117       that the memory-side (interleaved) data is stored
   8118       little-endianly. */
   8119    vassert(i0 && i1 && i2);
   8120    if (laneszB == 4) {
   8121       // memLE(192 bits) == A0 B0 C0 A1 B1 C1
   8122       // i0 == B0 A0, i1 == A1 C0, i2 == C1 B1
   8123       // u0 == A1 A0, u1 == B1 B0, u2 == C1 C0
   8124       assign(*i0, IHI32x2(SHL64(u1, 32), SHL64(u0, 32)));
   8125       assign(*i1, IHI32x2(SHL64(u0,  0), SHL64(u2, 32)));
   8126       assign(*i2, IHI32x2(SHL64(u2,  0), SHL64(u1,  0)));
   8127    } else if (laneszB == 2) {
   8128       // memLE(192 bits) == A0 B0 C0 A1, B1 C1 A2 B2, C2 A3 B3 C3
   8129       // i0 == A1 C0 B0 A0, i1 == B2 A2 C1 B1, i2 == C3 B3 A3 C2
   8130       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0, u2 == C3 C2 C1 C0
   8131 #     define XXX(_tmp3,_la3,_tmp2,_la2,_tmp1,_la1,_tmp0,_la0) \
   8132                 IHI32x2(                                      \
   8133                    IHI16x4(SHL64((_tmp3),48-16*(_la3)),       \
   8134                            SHL64((_tmp2),48-16*(_la2))),      \
   8135                    IHI16x4(SHL64((_tmp1),48-16*(_la1)),       \
   8136                            SHL64((_tmp0),48-16*(_la0))))
   8137       assign(*i0, XXX(u0,1, u2,0, u1,0, u0,0));
   8138       assign(*i1, XXX(u1,2, u0,2, u2,1, u1,1));
   8139       assign(*i2, XXX(u2,3, u1,3, u0,3, u2,2));
   8140 #     undef XXX
   8141    } else if (laneszB == 1) {
   8142       // These describe how the result vectors [7..0] are
   8143       // assembled from the source vectors.  Each pair is
   8144       // (source vector number, lane number).
   8145       static const UChar in0[16] = {1,2, 0,2, 2,1, 1,1, 0,1, 2,0, 1,0, 0,0};
   8146       static const UChar in1[16] = {0,5, 2,4, 1,4, 0,4, 2,3, 1,3, 0,3, 2,2};
   8147       static const UChar in2[16] = {2,7, 1,7, 0,7, 2,6, 1,6, 0,6, 2,5, 1,5};
   8148       assign(*i0, math_PERM_8x8x3(in0, u0, u1, u2));
   8149       assign(*i1, math_PERM_8x8x3(in1, u0, u1, u2));
   8150       assign(*i2, math_PERM_8x8x3(in2, u0, u1, u2));
   8151    } else {
   8152       // Can never happen, since VST3 only has valid lane widths of 32,
   8153       // 16 or 8 bits.
   8154       vpanic("math_INTERLEAVE_3");
   8155    }
   8156 #  undef SHL64
   8157 #  undef IHI16x4
   8158 #  undef IHI32x2
   8159 }
   8160 
   8161 /* Generate 4x64 -> 4x64 deinterleave code, for VLD4.  Caller must
   8162    make *u0, *u1, *u2 and *u3 be valid IRTemps before the call. */
   8163 static void math_DEINTERLEAVE_4 (
   8164                /*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1,
   8165                /*OUT*/IRTemp* u2, /*OUT*/IRTemp* u3,
   8166                IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3, Int laneszB
   8167             )
   8168 {
   8169 #  define IHI32x2(_t1, _t2) \
   8170              binop(Iop_InterleaveHI32x2, mkexpr(_t1), mkexpr(_t2))
   8171 #  define ILO32x2(_t1, _t2) \
   8172              binop(Iop_InterleaveLO32x2, mkexpr(_t1), mkexpr(_t2))
   8173 #  define IHI16x4(_t1, _t2) \
   8174              binop(Iop_InterleaveHI16x4, mkexpr(_t1), mkexpr(_t2))
   8175 #  define ILO16x4(_t1, _t2) \
   8176              binop(Iop_InterleaveLO16x4, mkexpr(_t1), mkexpr(_t2))
   8177 #  define IHI8x8(_t1, _e2) \
   8178              binop(Iop_InterleaveHI8x8, mkexpr(_t1), _e2)
   8179 #  define SHL64(_tmp, _amt) \
   8180              binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
   8181    /* The following assumes that the guest is little endian, and hence
   8182       that the memory-side (interleaved) data is stored
   8183       little-endianly. */
   8184    vassert(u0 && u1 && u2 && u3);
   8185    if (laneszB == 4) {
   8186       assign(*u0, ILO32x2(i2, i0));
   8187       assign(*u1, IHI32x2(i2, i0));
   8188       assign(*u2, ILO32x2(i3, i1));
   8189       assign(*u3, IHI32x2(i3, i1));
   8190    } else if (laneszB == 2) {
   8191       IRTemp b1b0a1a0 = newTemp(Ity_I64);
   8192       IRTemp b3b2a3a2 = newTemp(Ity_I64);
   8193       IRTemp d1d0c1c0 = newTemp(Ity_I64);
   8194       IRTemp d3d2c3c2 = newTemp(Ity_I64);
   8195       assign(b1b0a1a0, ILO16x4(i1, i0));
   8196       assign(b3b2a3a2, ILO16x4(i3, i2));
   8197       assign(d1d0c1c0, IHI16x4(i1, i0));
   8198       assign(d3d2c3c2, IHI16x4(i3, i2));
   8199       // And now do what we did for the 32-bit case.
   8200       assign(*u0, ILO32x2(b3b2a3a2, b1b0a1a0));
   8201       assign(*u1, IHI32x2(b3b2a3a2, b1b0a1a0));
   8202       assign(*u2, ILO32x2(d3d2c3c2, d1d0c1c0));
   8203       assign(*u3, IHI32x2(d3d2c3c2, d1d0c1c0));
   8204    } else if (laneszB == 1) {
   8205       // Deinterleave into 16-bit chunks, then do as the 16-bit case.
   8206       IRTemp i0x = newTemp(Ity_I64);
   8207       IRTemp i1x = newTemp(Ity_I64);
   8208       IRTemp i2x = newTemp(Ity_I64);
   8209       IRTemp i3x = newTemp(Ity_I64);
   8210       assign(i0x, IHI8x8(i0, SHL64(i0, 32)));
   8211       assign(i1x, IHI8x8(i1, SHL64(i1, 32)));
   8212       assign(i2x, IHI8x8(i2, SHL64(i2, 32)));
   8213       assign(i3x, IHI8x8(i3, SHL64(i3, 32)));
   8214       // From here on is like the 16 bit case.
   8215       IRTemp b1b0a1a0 = newTemp(Ity_I64);
   8216       IRTemp b3b2a3a2 = newTemp(Ity_I64);
   8217       IRTemp d1d0c1c0 = newTemp(Ity_I64);
   8218       IRTemp d3d2c3c2 = newTemp(Ity_I64);
   8219       assign(b1b0a1a0, ILO16x4(i1x, i0x));
   8220       assign(b3b2a3a2, ILO16x4(i3x, i2x));
   8221       assign(d1d0c1c0, IHI16x4(i1x, i0x));
   8222       assign(d3d2c3c2, IHI16x4(i3x, i2x));
   8223       // And now do what we did for the 32-bit case.
   8224       assign(*u0, ILO32x2(b3b2a3a2, b1b0a1a0));
   8225       assign(*u1, IHI32x2(b3b2a3a2, b1b0a1a0));
   8226       assign(*u2, ILO32x2(d3d2c3c2, d1d0c1c0));
   8227       assign(*u3, IHI32x2(d3d2c3c2, d1d0c1c0));
   8228    } else {
   8229       // Can never happen, since VLD4 only has valid lane widths of 32,
   8230       // 16 or 8 bits.
   8231       vpanic("math_DEINTERLEAVE_4");
   8232    }
   8233 #  undef SHL64
   8234 #  undef IHI8x8
   8235 #  undef ILO16x4
   8236 #  undef IHI16x4
   8237 #  undef ILO32x2
   8238 #  undef IHI32x2
   8239 }
   8240 
   8241 /* Generate 4x64 -> 4x64 interleave code, for VST4.  Caller must
   8242    make *i0, *i1, *i2 and *i3 be valid IRTemps before the call. */
   8243 static void math_INTERLEAVE_4 (
   8244                /*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1,
   8245                /*OUT*/IRTemp* i2, /*OUT*/IRTemp* i3,
   8246                IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3, Int laneszB
   8247             )
   8248 {
   8249 #  define IHI32x2(_t1, _t2) \
   8250              binop(Iop_InterleaveHI32x2, mkexpr(_t1), mkexpr(_t2))
   8251 #  define ILO32x2(_t1, _t2) \
   8252              binop(Iop_InterleaveLO32x2, mkexpr(_t1), mkexpr(_t2))
   8253 #  define CEV16x4(_t1, _t2) \
   8254              binop(Iop_CatEvenLanes16x4, mkexpr(_t1), mkexpr(_t2))
   8255 #  define COD16x4(_t1, _t2) \
   8256              binop(Iop_CatOddLanes16x4, mkexpr(_t1), mkexpr(_t2))
   8257 #  define COD8x8(_t1, _e2) \
   8258              binop(Iop_CatOddLanes8x8, mkexpr(_t1), _e2)
   8259 #  define SHL64(_tmp, _amt) \
   8260              binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
   8261    /* The following assumes that the guest is little endian, and hence
   8262       that the memory-side (interleaved) data is stored
   8263       little-endianly. */
   8264    vassert(u0 && u1 && u2 && u3);
   8265    if (laneszB == 4) {
   8266       assign(*i0, ILO32x2(u1, u0));
   8267       assign(*i1, ILO32x2(u3, u2));
   8268       assign(*i2, IHI32x2(u1, u0));
   8269       assign(*i3, IHI32x2(u3, u2));
   8270    } else if (laneszB == 2) {
   8271       // First, interleave at the 32-bit lane size.
   8272       IRTemp b1b0a1a0 = newTemp(Ity_I64);
   8273       IRTemp b3b2a3a2 = newTemp(Ity_I64);
   8274       IRTemp d1d0c1c0 = newTemp(Ity_I64);
   8275       IRTemp d3d2c3c2 = newTemp(Ity_I64);
   8276       assign(b1b0a1a0, ILO32x2(u1, u0));
   8277       assign(b3b2a3a2, IHI32x2(u1, u0));
   8278       assign(d1d0c1c0, ILO32x2(u3, u2));
   8279       assign(d3d2c3c2, IHI32x2(u3, u2));
   8280       // And interleave (cat) at the 16 bit size.
   8281       assign(*i0, CEV16x4(d1d0c1c0, b1b0a1a0));
   8282       assign(*i1, COD16x4(d1d0c1c0, b1b0a1a0));
   8283       assign(*i2, CEV16x4(d3d2c3c2, b3b2a3a2));
   8284       assign(*i3, COD16x4(d3d2c3c2, b3b2a3a2));
   8285    } else if (laneszB == 1) {
   8286       // First, interleave at the 32-bit lane size.
   8287       IRTemp b1b0a1a0 = newTemp(Ity_I64);
   8288       IRTemp b3b2a3a2 = newTemp(Ity_I64);
   8289       IRTemp d1d0c1c0 = newTemp(Ity_I64);
   8290       IRTemp d3d2c3c2 = newTemp(Ity_I64);
   8291       assign(b1b0a1a0, ILO32x2(u1, u0));
   8292       assign(b3b2a3a2, IHI32x2(u1, u0));
   8293       assign(d1d0c1c0, ILO32x2(u3, u2));
   8294       assign(d3d2c3c2, IHI32x2(u3, u2));
   8295       // And interleave (cat) at the 16 bit size.
   8296       IRTemp i0x = newTemp(Ity_I64);
   8297       IRTemp i1x = newTemp(Ity_I64);
   8298       IRTemp i2x = newTemp(Ity_I64);
   8299       IRTemp i3x = newTemp(Ity_I64);
   8300       assign(i0x, CEV16x4(d1d0c1c0, b1b0a1a0));
   8301       assign(i1x, COD16x4(d1d0c1c0, b1b0a1a0));
   8302       assign(i2x, CEV16x4(d3d2c3c2, b3b2a3a2));
   8303       assign(i3x, COD16x4(d3d2c3c2, b3b2a3a2));
   8304       // And rearrange within each word, to get the right 8 bit lanes.
   8305       assign(*i0, COD8x8(i0x, SHL64(i0x, 8)));
   8306       assign(*i1, COD8x8(i1x, SHL64(i1x, 8)));
   8307       assign(*i2, COD8x8(i2x, SHL64(i2x, 8)));
   8308       assign(*i3, COD8x8(i3x, SHL64(i3x, 8)));
   8309    } else {
   8310       // Can never happen, since VLD4 only has valid lane widths of 32,
   8311       // 16 or 8 bits.
   8312       vpanic("math_DEINTERLEAVE_4");
   8313    }
   8314 #  undef SHL64
   8315 #  undef COD8x8
   8316 #  undef COD16x4
   8317 #  undef CEV16x4
   8318 #  undef ILO32x2
   8319 #  undef IHI32x2
   8320 }
   8321 
   8322 /* A7.7 Advanced SIMD element or structure load/store instructions */
   8323 static
   8324 Bool dis_neon_load_or_store ( UInt theInstr,
   8325                               Bool isT, IRTemp condT )
   8326 {
   8327 #  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
   8328    UInt bA = INSN(23,23);
   8329    UInt fB = INSN(11,8);
   8330    UInt bL = INSN(21,21);
   8331    UInt rD = (INSN(22,22) << 4) | INSN(15,12);
   8332    UInt rN = INSN(19,16);
   8333    UInt rM = INSN(3,0);
   8334    UInt N, size, i, j;
   8335    UInt inc;
   8336    UInt regs = 1;
   8337 
   8338    if (isT) {
   8339       vassert(condT != IRTemp_INVALID);
   8340    } else {
   8341       vassert(condT == IRTemp_INVALID);
   8342    }
   8343    /* So now, if condT is not IRTemp_INVALID, we know we're
   8344       dealing with Thumb code. */
   8345 
   8346    if (INSN(20,20) != 0)
   8347       return False;
   8348 
   8349    IRTemp initialRn = newTemp(Ity_I32);
   8350    assign(initialRn, isT ? getIRegT(rN) : getIRegA(rN));
   8351 
   8352    IRTemp initialRm = newTemp(Ity_I32);
   8353    assign(initialRm, isT ? getIRegT(rM) : getIRegA(rM));
   8354 
   8355    /* There are 3 cases:
   8356       (1) VSTn / VLDn (n-element structure from/to one lane)
   8357       (2) VLDn (single element to all lanes)
   8358       (3) VSTn / VLDn (multiple n-element structures)
   8359    */
   8360    if (bA) {
   8361       N = fB & 3;
   8362       if ((fB >> 2) < 3) {
   8363          /* ------------ Case (1) ------------
   8364             VSTn / VLDn (n-element structure from/to one lane) */
   8365 
   8366          size = fB >> 2;
   8367 
   8368          switch (size) {
   8369             case 0: i = INSN(7,5); inc = 1; break;
   8370             case 1: i = INSN(7,6); inc = INSN(5,5) ? 2 : 1; break;
   8371             case 2: i = INSN(7,7); inc = INSN(6,6) ? 2 : 1; break;
   8372             case 3: return False;
   8373             default: vassert(0);
   8374          }
   8375 
   8376          IRTemp addr = newTemp(Ity_I32);
   8377          assign(addr, mkexpr(initialRn));
   8378 
   8379          // go uncond
   8380          if (condT != IRTemp_INVALID)
   8381             mk_skip_over_T32_if_cond_is_false(condT);
   8382          // now uncond
   8383 
   8384          if (bL)
   8385             mk_neon_elem_load_to_one_lane(rD, inc, i, N, size, addr);
   8386          else
   8387             mk_neon_elem_store_from_one_lane(rD, inc, i, N, size, addr);
   8388          DIP("v%s%u.%u {", bL ? "ld" : "st", N + 1, 8 << size);
   8389          for (j = 0; j <= N; j++) {
   8390             if (j)
   8391                DIP(", ");
   8392             DIP("d%u[%u]", rD + j * inc, i);
   8393          }
   8394          DIP("}, [r%u]", rN);
   8395          if (rM != 13 && rM != 15) {
   8396             DIP(", r%u\n", rM);
   8397          } else {
   8398             DIP("%s\n", (rM != 15) ? "!" : "");
   8399          }
   8400       } else {
   8401          /* ------------ Case (2) ------------
   8402             VLDn (single element to all lanes) */
   8403          UInt r;
   8404          if (bL == 0)
   8405             return False;
   8406 
   8407          inc = INSN(5,5) + 1;
   8408          size = INSN(7,6);
   8409 
   8410          /* size == 3 and size == 2 cases differ in alignment constraints */
   8411          if (size == 3 && N == 3 && INSN(4,4) == 1)
   8412             size = 2;
   8413 
   8414          if (size == 0 && N == 0 && INSN(4,4) == 1)
   8415             return False;
   8416          if (N == 2 && INSN(4,4) == 1)
   8417             return False;
   8418          if (size == 3)
   8419             return False;
   8420 
   8421          // go uncond
   8422          if (condT != IRTemp_INVALID)
   8423             mk_skip_over_T32_if_cond_is_false(condT);
   8424          // now uncond
   8425 
   8426          IRTemp addr = newTemp(Ity_I32);
   8427          assign(addr, mkexpr(initialRn));
   8428 
   8429          if (N == 0 && INSN(5,5))
   8430             regs = 2;
   8431 
   8432          for (r = 0; r < regs; r++) {
   8433             switch (size) {
   8434                case 0:
   8435                   putDRegI64(rD + r, unop(Iop_Dup8x8,
   8436                                           loadLE(Ity_I8, mkexpr(addr))),
   8437                              IRTemp_INVALID);
   8438                   break;
   8439                case 1:
   8440                   putDRegI64(rD + r, unop(Iop_Dup16x4,
   8441                                           loadLE(Ity_I16, mkexpr(addr))),
   8442                              IRTemp_INVALID);
   8443                   break;
   8444                case 2:
   8445                   putDRegI64(rD + r, unop(Iop_Dup32x2,
   8446                                           loadLE(Ity_I32, mkexpr(addr))),
   8447                              IRTemp_INVALID);
   8448                   break;
   8449                default:
   8450                   vassert(0);
   8451             }
   8452             for (i = 1; i <= N; i++) {
   8453                switch (size) {
   8454                   case 0:
   8455                      putDRegI64(rD + r + i * inc,
   8456                                 unop(Iop_Dup8x8,
   8457                                      loadLE(Ity_I8, binop(Iop_Add32,
   8458                                                           mkexpr(addr),
   8459                                                           mkU32(i * 1)))),
   8460                                 IRTemp_INVALID);
   8461                      break;
   8462                   case 1:
   8463                      putDRegI64(rD + r + i * inc,
   8464                                 unop(Iop_Dup16x4,
   8465                                      loadLE(Ity_I16, binop(Iop_Add32,
   8466                                                            mkexpr(addr),
   8467                                                            mkU32(i * 2)))),
   8468                                 IRTemp_INVALID);
   8469                      break;
   8470                   case 2:
   8471                      putDRegI64(rD + r + i * inc,
   8472                                 unop(Iop_Dup32x2,
   8473                                      loadLE(Ity_I32, binop(Iop_Add32,
   8474                                                            mkexpr(addr),
   8475                                                            mkU32(i * 4)))),
   8476                                 IRTemp_INVALID);
   8477                      break;
   8478                   default:
   8479                      vassert(0);
   8480                }
   8481             }
   8482          }
   8483          DIP("vld%u.%u {", N + 1, 8 << size);
   8484          for (r = 0; r < regs; r++) {
   8485             for (i = 0; i <= N; i++) {
   8486                if (i || r)
   8487                   DIP(", ");
   8488                DIP("d%u[]", rD + r + i * inc);
   8489             }
   8490          }
   8491          DIP("}, [r%u]", rN);
   8492          if (rM != 13 && rM != 15) {
   8493             DIP(", r%u\n", rM);
   8494          } else {
   8495             DIP("%s\n", (rM != 15) ? "!" : "");
   8496          }
   8497       }
   8498       /* Writeback.  We're uncond here, so no condT-ing. */
   8499       if (rM != 15) {
   8500          if (rM == 13) {
   8501             IRExpr* e = binop(Iop_Add32,
   8502                               mkexpr(initialRn),
   8503                               mkU32((1 << size) * (N + 1)));
   8504             if (isT)
   8505                putIRegT(rN, e, IRTemp_INVALID);
   8506             else
   8507                putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
   8508          } else {
   8509             IRExpr* e = binop(Iop_Add32,
   8510                               mkexpr(initialRn),
   8511                               mkexpr(initialRm));
   8512             if (isT)
   8513                putIRegT(rN, e, IRTemp_INVALID);
   8514             else
   8515                putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
   8516          }
   8517       }
   8518       return True;
   8519    } else {
   8520       /* ------------ Case (3) ------------
   8521          VSTn / VLDn (multiple n-element structures) */
   8522       inc = (fB & 1) + 1;
   8523 
   8524       if (fB == BITS4(0,0,1,0)       // Dd, Dd+1, Dd+2, Dd+3  inc = 1  regs = 4
   8525           || fB == BITS4(0,1,1,0)    // Dd, Dd+1, Dd+2        inc = 1  regs = 3
   8526           || fB == BITS4(0,1,1,1)    // Dd                    inc = 2  regs = 1
   8527           || fB == BITS4(1,0,1,0)) { // Dd, Dd+1              inc = 1  regs = 2
   8528          N = 0; // VLD1/VST1.  'inc' does not appear to have any
   8529                 // meaning for the VLD1/VST1 cases.  'regs' is the number of
   8530                 // registers involved.
   8531          if (rD + regs > 32) return False;
   8532       }
   8533       else
   8534       if (fB == BITS4(0,0,1,1)       // Dd, Dd+1, Dd+2, Dd+3  inc=2  regs = 2
   8535           || fB == BITS4(1,0,0,0)    // Dd, Dd+1              inc=1  regs = 1
   8536           || fB == BITS4(1,0,0,1)) { // Dd, Dd+2              inc=2  regs = 1
   8537          N = 1; // VLD2/VST2.  'regs' is the number of register-pairs involved
   8538          if (regs == 1 && inc == 1 && rD + 1 >= 32) return False;
   8539          if (regs == 1 && inc == 2 && rD + 2 >= 32) return False;
   8540          if (regs == 2 && inc == 2 && rD + 3 >= 32) return False;
   8541       } else if (fB == BITS4(0,1,0,0) || fB == BITS4(0,1,0,1)) {
   8542          N = 2; // VLD3/VST3
   8543          if (inc == 1 && rD + 2 >= 32) return False;
   8544          if (inc == 2 && rD + 4 >= 32) return False;
   8545       } else if (fB == BITS4(0,0,0,0) || fB == BITS4(0,0,0,1)) {
   8546          N = 3; // VLD4/VST4
   8547          if (inc == 1 && rD + 3 >= 32) return False;
   8548          if (inc == 2 && rD + 6 >= 32) return False;
   8549       } else {
   8550          return False;
   8551       }
   8552 
   8553       if (N == 1 && fB == BITS4(0,0,1,1)) {
   8554          regs = 2;
   8555       } else if (N == 0) {
   8556          if (fB == BITS4(1,0,1,0)) {
   8557             regs = 2;
   8558          } else if (fB == BITS4(0,1,1,0)) {
   8559             regs = 3;
   8560          } else if (fB == BITS4(0,0,1,0)) {
   8561             regs = 4;
   8562          }
   8563       }
   8564 
   8565       size = INSN(7,6);
   8566       if (N == 0 && size == 3)
   8567          size = 2;
   8568       if (size == 3)
   8569          return False;
   8570 
   8571       // go uncond
   8572       if (condT != IRTemp_INVALID)
   8573          mk_skip_over_T32_if_cond_is_false(condT);
   8574       // now uncond
   8575 
   8576       IRTemp addr = newTemp(Ity_I32);
   8577       assign(addr, mkexpr(initialRn));
   8578 
   8579       if (N == 0 /* No interleaving -- VLD1/VST1 */) {
   8580          UInt r;
   8581          vassert(regs == 1 || regs == 2 || regs == 3 || regs == 4);
   8582          /* inc has no relevance here */
   8583          for (r = 0; r < regs; r++) {
   8584             if (bL)
   8585                putDRegI64(rD+r, loadLE(Ity_I64, mkexpr(addr)), IRTemp_INVALID);
   8586             else
   8587                storeLE(mkexpr(addr), getDRegI64(rD+r));
   8588             IRTemp tmp = newTemp(Ity_I32);
   8589             assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(8)));
   8590             addr = tmp;
   8591          }
   8592       }
   8593       else
   8594       if (N == 1 /* 2-interleaving -- VLD2/VST2 */) {
   8595          vassert( (regs == 1 && (inc == 1 || inc == 2))
   8596                    || (regs == 2 && inc == 2) );
   8597          // Make 'nregs' be the number of registers and 'regstep'
   8598          // equal the actual register-step.  The ARM encoding, using 'regs'
   8599          // and 'inc', is bizarre.  After this, we have:
   8600          // Dd, Dd+1              regs = 1, inc = 1,   nregs = 2, regstep = 1
   8601          // Dd, Dd+2              regs = 1, inc = 2,   nregs = 2, regstep = 2
   8602          // Dd, Dd+1, Dd+2, Dd+3  regs = 2, inc = 2,   nregs = 4, regstep = 1
   8603          UInt nregs   = 2;
   8604          UInt regstep = 1;
   8605          if (regs == 1 && inc == 1) {
   8606             /* nothing */
   8607          } else if (regs == 1 && inc == 2) {
   8608             regstep = 2;
   8609          } else if (regs == 2 && inc == 2) {
   8610             nregs = 4;
   8611          } else {
   8612             vassert(0);
   8613          }
   8614          // 'a' is address,
   8615          // 'di' is interleaved data, 'du' is uninterleaved data
   8616          if (nregs == 2) {
   8617             IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
   8618             IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
   8619             IRTemp  di0 = newTemp(Ity_I64);
   8620             IRTemp  di1 = newTemp(Ity_I64);
   8621             IRTemp  du0 = newTemp(Ity_I64);
   8622             IRTemp  du1 = newTemp(Ity_I64);
   8623             if (bL) {
   8624                assign(di0, loadLE(Ity_I64, a0));
   8625                assign(di1, loadLE(Ity_I64, a1));
   8626                math_DEINTERLEAVE_2(&du0, &du1, di0, di1, 1 << size);
   8627                putDRegI64(rD + 0 * regstep, mkexpr(du0), IRTemp_INVALID);
   8628                putDRegI64(rD + 1 * regstep, mkexpr(du1), IRTemp_INVALID);
   8629             } else {
   8630                assign(du0, getDRegI64(rD + 0 * regstep));
   8631                assign(du1, getDRegI64(rD + 1 * regstep));
   8632                math_INTERLEAVE_2(&di0, &di1, du0, du1, 1 << size);
   8633                storeLE(a0, mkexpr(di0));
   8634                storeLE(a1, mkexpr(di1));
   8635             }
   8636             IRTemp tmp = newTemp(Ity_I32);
   8637             assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(16)));
   8638             addr = tmp;
   8639          } else {
   8640             vassert(nregs == 4);
   8641             vassert(regstep == 1);
   8642             IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
   8643             IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
   8644             IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
   8645             IRExpr* a3  = binop(Iop_Add32, mkexpr(addr), mkU32(24));
   8646             IRTemp  di0 = newTemp(Ity_I64);
   8647             IRTemp  di1 = newTemp(Ity_I64);
   8648             IRTemp  di2 = newTemp(Ity_I64);
   8649             IRTemp  di3 = newTemp(Ity_I64);
   8650             IRTemp  du0 = newTemp(Ity_I64);
   8651             IRTemp  du1 = newTemp(Ity_I64);
   8652             IRTemp  du2 = newTemp(Ity_I64);
   8653             IRTemp  du3 = newTemp(Ity_I64);
   8654             if (bL) {
   8655                assign(di0, loadLE(Ity_I64, a0));
   8656                assign(di1, loadLE(Ity_I64, a1));
   8657                assign(di2, loadLE(Ity_I64, a2));
   8658                assign(di3, loadLE(Ity_I64, a3));
   8659                // Note spooky interleaving: du0, du2, di0, di1 etc
   8660                math_DEINTERLEAVE_2(&du0, &du2, di0, di1, 1 << size);
   8661                math_DEINTERLEAVE_2(&du1, &du3, di2, di3, 1 << size);
   8662                putDRegI64(rD + 0 * regstep, mkexpr(du0), IRTemp_INVALID);
   8663                putDRegI64(rD + 1 * regstep, mkexpr(du1), IRTemp_INVALID);
   8664                putDRegI64(rD + 2 * regstep, mkexpr(du2), IRTemp_INVALID);
   8665                putDRegI64(rD + 3 * regstep, mkexpr(du3), IRTemp_INVALID);
   8666             } else {
   8667                assign(du0, getDRegI64(rD + 0 * regstep));
   8668                assign(du1, getDRegI64(rD + 1 * regstep));
   8669                assign(du2, getDRegI64(rD + 2 * regstep));
   8670                assign(du3, getDRegI64(rD + 3 * regstep));
   8671                // Note spooky interleaving: du0, du2, di0, di1 etc
   8672                math_INTERLEAVE_2(&di0, &di1, du0, du2, 1 << size);
   8673                math_INTERLEAVE_2(&di2, &di3, du1, du3, 1 << size);
   8674                storeLE(a0, mkexpr(di0));
   8675                storeLE(a1, mkexpr(di1));
   8676                storeLE(a2, mkexpr(di2));
   8677                storeLE(a3, mkexpr(di3));
   8678             }
   8679 
   8680             IRTemp tmp = newTemp(Ity_I32);
   8681             assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(32)));
   8682             addr = tmp;
   8683          }
   8684       }
   8685       else
   8686       if (N == 2 /* 3-interleaving -- VLD3/VST3 */) {
   8687          // Dd, Dd+1, Dd+2   regs = 1, inc = 1
   8688          // Dd, Dd+2, Dd+4   regs = 1, inc = 2
   8689          vassert(regs == 1 && (inc == 1 || inc == 2));
   8690          IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
   8691          IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
   8692          IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
   8693          IRTemp  di0 = newTemp(Ity_I64);
   8694          IRTemp  di1 = newTemp(Ity_I64);
   8695          IRTemp  di2 = newTemp(Ity_I64);
   8696          IRTemp  du0 = newTemp(Ity_I64);
   8697          IRTemp  du1 = newTemp(Ity_I64);
   8698          IRTemp  du2 = newTemp(Ity_I64);
   8699          if (bL) {
   8700             assign(di0, loadLE(Ity_I64, a0));
   8701             assign(di1, loadLE(Ity_I64, a1));
   8702             assign(di2, loadLE(Ity_I64, a2));
   8703             math_DEINTERLEAVE_3(&du0, &du1, &du2, di0, di1, di2, 1 << size);
   8704             putDRegI64(rD + 0 * inc, mkexpr(du0), IRTemp_INVALID);
   8705             putDRegI64(rD + 1 * inc, mkexpr(du1), IRTemp_INVALID);
   8706             putDRegI64(rD + 2 * inc, mkexpr(du2), IRTemp_INVALID);
   8707          } else {
   8708             assign(du0, getDRegI64(rD + 0 * inc));
   8709             assign(du1, getDRegI64(rD + 1 * inc));
   8710             assign(du2, getDRegI64(rD + 2 * inc));
   8711             math_INTERLEAVE_3(&di0, &di1, &di2, du0, du1, du2, 1 << size);
   8712             storeLE(a0, mkexpr(di0));
   8713             storeLE(a1, mkexpr(di1));
   8714             storeLE(a2, mkexpr(di2));
   8715          }
   8716          IRTemp tmp = newTemp(Ity_I32);
   8717          assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(24)));
   8718          addr = tmp;
   8719       }
   8720       else
   8721       if (N == 3 /* 4-interleaving -- VLD4/VST4 */) {
   8722          // Dd, Dd+1, Dd+2, Dd+3   regs = 1, inc = 1
   8723          // Dd, Dd+2, Dd+4, Dd+6   regs = 1, inc = 2
   8724          vassert(regs == 1 && (inc == 1 || inc == 2));
   8725          IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
   8726          IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
   8727          IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
   8728          IRExpr* a3  = binop(Iop_Add32, mkexpr(addr), mkU32(24));
   8729          IRTemp  di0 = newTemp(Ity_I64);
   8730          IRTemp  di1 = newTemp(Ity_I64);
   8731          IRTemp  di2 = newTemp(Ity_I64);
   8732          IRTemp  di3 = newTemp(Ity_I64);
   8733          IRTemp  du0 = newTemp(Ity_I64);
   8734          IRTemp  du1 = newTemp(Ity_I64);
   8735          IRTemp  du2 = newTemp(Ity_I64);
   8736          IRTemp  du3 = newTemp(Ity_I64);
   8737          if (bL) {
   8738             assign(di0, loadLE(Ity_I64, a0));
   8739             assign(di1, loadLE(Ity_I64, a1));
   8740             assign(di2, loadLE(Ity_I64, a2));
   8741             assign(di3, loadLE(Ity_I64, a3));
   8742             math_DEINTERLEAVE_4(&du0, &du1, &du2, &du3,
   8743                                 di0, di1, di2, di3, 1 << size);
   8744             putDRegI64(rD + 0 * inc, mkexpr(du0), IRTemp_INVALID);
   8745             putDRegI64(rD + 1 * inc, mkexpr(du1), IRTemp_INVALID);
   8746             putDRegI64(rD + 2 * inc, mkexpr(du2), IRTemp_INVALID);
   8747             putDRegI64(rD + 3 * inc, mkexpr(du3), IRTemp_INVALID);
   8748          } else {
   8749             assign(du0, getDRegI64(rD + 0 * inc));
   8750             assign(du1, getDRegI64(rD + 1 * inc));
   8751             assign(du2, getDRegI64(rD + 2 * inc));
   8752             assign(du3, getDRegI64(rD + 3 * inc));
   8753             math_INTERLEAVE_4(&di0, &di1, &di2, &di3,
   8754                               du0, du1, du2, du3, 1 << size);
   8755             storeLE(a0, mkexpr(di0));
   8756             storeLE(a1, mkexpr(di1));
   8757             storeLE(a2, mkexpr(di2));
   8758             storeLE(a3, mkexpr(di3));
   8759          }
   8760          IRTemp tmp = newTemp(Ity_I32);
   8761          assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(32)));
   8762          addr = tmp;
   8763       }
   8764       else {
   8765          vassert(0);
   8766       }
   8767 
   8768       /* Writeback */
   8769       if (rM != 15) {
   8770          IRExpr* e;
   8771          if (rM == 13) {
   8772             e = binop(Iop_Add32, mkexpr(initialRn),
   8773                                  mkU32(8 * (N + 1) * regs));
   8774          } else {
   8775             e = binop(Iop_Add32, mkexpr(initialRn),
   8776                                  mkexpr(initialRm));
   8777          }
   8778          if (isT)
   8779             putIRegT(rN, e, IRTemp_INVALID);
   8780          else
   8781             putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
   8782       }
   8783 
   8784       DIP("v%s%u.%u {", bL ? "ld" : "st", N + 1, 8 << INSN(7,6));
   8785       if ((inc == 1 && regs * (N + 1) > 1)
   8786           || (inc == 2 && regs > 1 && N > 0)) {
   8787          DIP("d%u-d%u", rD, rD + regs * (N + 1) - 1);
   8788       } else {
   8789          UInt r;
   8790          for (r = 0; r < regs; r++) {
   8791             for (i = 0; i <= N; i++) {
   8792                if (i || r)
   8793                   DIP(", ");
   8794                DIP("d%u", rD + r + i * inc);
   8795             }
   8796          }
   8797       }
   8798       DIP("}, [r%u]", rN);
   8799       if (rM != 13 && rM != 15) {
   8800          DIP(", r%u\n", rM);
   8801       } else {
   8802          DIP("%s\n", (rM != 15) ? "!" : "");
   8803       }
   8804       return True;
   8805    }
   8806 #  undef INSN
   8807 }
   8808 
   8809 
   8810 /*------------------------------------------------------------*/
   8811 /*--- NEON, top level control                              ---*/
   8812 /*------------------------------------------------------------*/
   8813 
   8814 /* Both ARM and Thumb */
   8815 
   8816 /* Translate a NEON instruction.    If successful, returns
   8817    True and *dres may or may not be updated.  If failure, returns
   8818    False and doesn't change *dres nor create any IR.
   8819 
   8820    The Thumb and ARM encodings are similar for the 24 bottom bits, but
   8821    the top 8 bits are slightly different.  In both cases, the caller
   8822    must pass the entire 32 bits.  Callers may pass any instruction;
   8823    this ignores non-NEON ones.
   8824 
   8825    Caller must supply an IRTemp 'condT' holding the gating condition,
   8826    or IRTemp_INVALID indicating the insn is always executed.  In ARM
   8827    code, this must always be IRTemp_INVALID because NEON insns are
   8828    unconditional for ARM.
   8829 
   8830    Finally, the caller must indicate whether this occurs in ARM or in
   8831    Thumb code.
   8832 */
   8833 static Bool decode_NEON_instruction (
   8834                /*MOD*/DisResult* dres,
   8835                UInt              insn32,
   8836                IRTemp            condT,
   8837                Bool              isT
   8838             )
   8839 {
   8840 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn32, (_bMax), (_bMin))
   8841 
   8842    /* There are two kinds of instruction to deal with: load/store and
   8843       data processing.  In each case, in ARM mode we merely identify
   8844       the kind, and pass it on to the relevant sub-handler.  In Thumb
   8845       mode we identify the kind, swizzle the bits around to make it
   8846       have the same encoding as in ARM, and hand it on to the
   8847       sub-handler.
   8848    */
   8849 
   8850    /* In ARM mode, NEON instructions can't be conditional. */
   8851    if (!isT)
   8852       vassert(condT == IRTemp_INVALID);
   8853 
   8854    /* Data processing:
   8855       Thumb: 111U 1111 AAAA Axxx xxxx BBBB CCCC xxxx
   8856       ARM:   1111 001U AAAA Axxx xxxx BBBB CCCC xxxx
   8857    */
   8858    if (!isT && INSN(31,25) == BITS7(1,1,1,1,0,0,1)) {
   8859       // ARM, DP
   8860       return dis_neon_data_processing(INSN(31,0), condT);
   8861    }
   8862    if (isT && INSN(31,29) == BITS3(1,1,1)
   8863        && INSN(27,24) == BITS4(1,1,1,1)) {
   8864       // Thumb, DP
   8865       UInt reformatted = INSN(23,0);
   8866       reformatted |= (INSN(28,28) << 24); // U bit
   8867       reformatted |= (BITS7(1,1,1,1,0,0,1) << 25);
   8868       return dis_neon_data_processing(reformatted, condT);
   8869    }
   8870 
   8871    /* Load/store:
   8872       Thumb: 1111 1001 AxL0 xxxx xxxx BBBB xxxx xxxx
   8873       ARM:   1111 0100 AxL0 xxxx xxxx BBBB xxxx xxxx
   8874    */
   8875    if (!isT && INSN(31,24) == BITS8(1,1,1,1,0,1,0,0)) {
   8876       // ARM, memory
   8877       return dis_neon_load_or_store(INSN(31,0), isT, condT);
   8878    }
   8879    if (isT && INSN(31,24) == BITS8(1,1,1,1,1,0,0,1)) {
   8880       UInt reformatted = INSN(23,0);
   8881       reformatted |= (BITS8(1,1,1,1,0,1,0,0) << 24);
   8882       return dis_neon_load_or_store(reformatted, isT, condT);
   8883    }
   8884 
   8885    /* Doesn't match. */
   8886    return False;
   8887 
   8888 #  undef INSN
   8889 }
   8890 
   8891 
   8892 /*------------------------------------------------------------*/
   8893 /*--- V6 MEDIA instructions                                ---*/
   8894 /*------------------------------------------------------------*/
   8895 
   8896 /* Both ARM and Thumb */
   8897 
   8898 /* Translate a V6 media instruction.    If successful, returns
   8899    True and *dres may or may not be updated.  If failure, returns
   8900    False and doesn't change *dres nor create any IR.
   8901 
   8902    The Thumb and ARM encodings are completely different.  In Thumb
   8903    mode, the caller must pass the entire 32 bits.  In ARM mode it must
   8904    pass the lower 28 bits.  Apart from that, callers may pass any
   8905    instruction; this function ignores anything it doesn't recognise.
   8906 
   8907    Caller must supply an IRTemp 'condT' holding the gating condition,
   8908    or IRTemp_INVALID indicating the insn is always executed.
   8909 
   8910    Caller must also supply an ARMCondcode 'cond'.  This is only used
   8911    for debug printing, no other purpose.  For ARM, this is simply the
   8912    top 4 bits of the original instruction.  For Thumb, the condition
   8913    is not (really) known until run time, and so ARMCondAL should be
   8914    passed, only so that printing of these instructions does not show
   8915    any condition.
   8916 
   8917    Finally, the caller must indicate whether this occurs in ARM or in
   8918    Thumb code.
   8919 */
   8920 static Bool decode_V6MEDIA_instruction (
   8921                /*MOD*/DisResult* dres,
   8922                UInt              insnv6m,
   8923                IRTemp            condT,
   8924                ARMCondcode       conq,
   8925                Bool              isT
   8926             )
   8927 {
   8928 #  define INSNA(_bMax,_bMin)   SLICE_UInt(insnv6m, (_bMax), (_bMin))
   8929 #  define INSNT0(_bMax,_bMin)  SLICE_UInt( ((insnv6m >> 16) & 0xFFFF), \
   8930                                            (_bMax), (_bMin) )
   8931 #  define INSNT1(_bMax,_bMin)  SLICE_UInt( ((insnv6m >> 0)  & 0xFFFF), \
   8932                                            (_bMax), (_bMin) )
   8933    HChar dis_buf[128];
   8934    dis_buf[0] = 0;
   8935 
   8936    if (isT) {
   8937       vassert(conq == ARMCondAL);
   8938    } else {
   8939       vassert(INSNA(31,28) == BITS4(0,0,0,0)); // caller's obligation
   8940       vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
   8941    }
   8942 
   8943    /* ----------- smulbb, smulbt, smultb, smultt ----------- */
   8944    {
   8945      UInt regD = 99, regM = 99, regN = 99, bitM = 0, bitN = 0;
   8946      Bool gate = False;
   8947 
   8948      if (isT) {
   8949         if (INSNT0(15,4) == 0xFB1 && INSNT1(15,12) == BITS4(1,1,1,1)
   8950             && INSNT1(7,6) == BITS2(0,0)) {
   8951            regD = INSNT1(11,8);
   8952            regM = INSNT1(3,0);
   8953            regN = INSNT0(3,0);
   8954            bitM = INSNT1(4,4);
   8955            bitN = INSNT1(5,5);
   8956            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   8957               gate = True;
   8958         }
   8959      } else {
   8960         if (BITS8(0,0,0,1,0,1,1,0) == INSNA(27,20) &&
   8961             BITS4(0,0,0,0)         == INSNA(15,12) &&
   8962             BITS4(1,0,0,0)         == (INSNA(7,4) & BITS4(1,0,0,1)) ) {
   8963            regD = INSNA(19,16);
   8964            regM = INSNA(11,8);
   8965            regN = INSNA(3,0);
   8966            bitM = INSNA(6,6);
   8967            bitN = INSNA(5,5);
   8968            if (regD != 15 && regN != 15 && regM != 15)
   8969               gate = True;
   8970         }
   8971      }
   8972 
   8973      if (gate) {
   8974         IRTemp srcN = newTemp(Ity_I32);
   8975         IRTemp srcM = newTemp(Ity_I32);
   8976         IRTemp res  = newTemp(Ity_I32);
   8977 
   8978         assign( srcN, binop(Iop_Sar32,
   8979                             binop(Iop_Shl32,
   8980                                   isT ? getIRegT(regN) : getIRegA(regN),
   8981                                   mkU8(bitN ? 0 : 16)), mkU8(16)) );
   8982         assign( srcM, binop(Iop_Sar32,
   8983                             binop(Iop_Shl32,
   8984                                   isT ? getIRegT(regM) : getIRegA(regM),
   8985                                   mkU8(bitM ? 0 : 16)), mkU8(16)) );
   8986         assign( res, binop(Iop_Mul32, mkexpr(srcN), mkexpr(srcM)) );
   8987 
   8988         if (isT)
   8989            putIRegT( regD, mkexpr(res), condT );
   8990         else
   8991            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   8992 
   8993         DIP( "smul%c%c%s r%u, r%u, r%u\n", bitN ? 't' : 'b', bitM ? 't' : 'b',
   8994              nCC(conq), regD, regN, regM );
   8995         return True;
   8996      }
   8997      /* fall through */
   8998    }
   8999 
   9000    /* ------------ smulwb<y><c> <Rd>,<Rn>,<Rm> ------------- */
   9001    /* ------------ smulwt<y><c> <Rd>,<Rn>,<Rm> ------------- */
   9002    {
   9003      UInt regD = 99, regN = 99, regM = 99, bitM = 0;
   9004      Bool gate = False;
   9005 
   9006      if (isT) {
   9007         if (INSNT0(15,4) == 0xFB3 && INSNT1(15,12) == BITS4(1,1,1,1)
   9008             && INSNT1(7,5) == BITS3(0,0,0)) {
   9009           regN = INSNT0(3,0);
   9010           regD = INSNT1(11,8);
   9011           regM = INSNT1(3,0);
   9012           bitM = INSNT1(4,4);
   9013           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9014              gate = True;
   9015         }
   9016      } else {
   9017         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
   9018             INSNA(15,12) == BITS4(0,0,0,0)         &&
   9019             (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,1,0)) {
   9020            regD = INSNA(19,16);
   9021            regN = INSNA(3,0);
   9022            regM = INSNA(11,8);
   9023            bitM = INSNA(6,6);
   9024            if (regD != 15 && regN != 15 && regM != 15)
   9025               gate = True;
   9026         }
   9027      }
   9028 
   9029      if (gate) {
   9030         IRTemp irt_prod = newTemp(Ity_I64);
   9031 
   9032         assign( irt_prod,
   9033                 binop(Iop_MullS32,
   9034                       isT ? getIRegT(regN) : getIRegA(regN),
   9035                       binop(Iop_Sar32,
   9036                             binop(Iop_Shl32,
   9037                                   isT ? getIRegT(regM) : getIRegA(regM),
   9038                                   mkU8(bitM ? 0 : 16)),
   9039                             mkU8(16))) );
   9040 
   9041         IRExpr* ire_result = binop(Iop_Or32,
   9042                                    binop( Iop_Shl32,
   9043                                           unop(Iop_64HIto32, mkexpr(irt_prod)),
   9044                                           mkU8(16) ),
   9045                                    binop( Iop_Shr32,
   9046                                           unop(Iop_64to32, mkexpr(irt_prod)),
   9047                                           mkU8(16) ) );
   9048 
   9049         if (isT)
   9050            putIRegT( regD, ire_result, condT );
   9051         else
   9052            putIRegA( regD, ire_result, condT, Ijk_Boring );
   9053 
   9054         DIP("smulw%c%s r%u, r%u, r%u\n",
   9055             bitM ? 't' : 'b', nCC(conq),regD,regN,regM);
   9056         return True;
   9057      }
   9058      /* fall through */
   9059    }
   9060 
   9061    /* ------------ pkhbt<c> Rd, Rn, Rm {,LSL #imm} ------------- */
   9062    /* ------------ pkhtb<c> Rd, Rn, Rm {,ASR #imm} ------------- */
   9063    {
   9064      UInt regD = 99, regN = 99, regM = 99, imm5 = 99, shift_type = 99;
   9065      Bool tbform = False;
   9066      Bool gate = False;
   9067 
   9068      if (isT) {
   9069         if (INSNT0(15,4) == 0xEAC
   9070             && INSNT1(15,15) == 0 && INSNT1(4,4) == 0) {
   9071            regN = INSNT0(3,0);
   9072            regD = INSNT1(11,8);
   9073            regM = INSNT1(3,0);
   9074            imm5 = (INSNT1(14,12) << 2) | INSNT1(7,6);
   9075            shift_type = (INSNT1(5,5) << 1) | 0;
   9076            tbform = (INSNT1(5,5) == 0) ? False : True;
   9077            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9078               gate = True;
   9079         }
   9080      } else {
   9081         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
   9082             INSNA(5,4)   == BITS2(0,1)             &&
   9083             (INSNA(6,6)  == 0 || INSNA(6,6) == 1) ) {
   9084            regD = INSNA(15,12);
   9085            regN = INSNA(19,16);
   9086            regM = INSNA(3,0);
   9087            imm5 = INSNA(11,7);
   9088            shift_type = (INSNA(6,6) << 1) | 0;
   9089            tbform = (INSNA(6,6) == 0) ? False : True;
   9090            if (regD != 15 && regN != 15 && regM != 15)
   9091               gate = True;
   9092         }
   9093      }
   9094 
   9095      if (gate) {
   9096         IRTemp irt_regM       = newTemp(Ity_I32);
   9097         IRTemp irt_regM_shift = newTemp(Ity_I32);
   9098         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   9099         compute_result_and_C_after_shift_by_imm5(
   9100            dis_buf, &irt_regM_shift, NULL, irt_regM, shift_type, imm5, regM );
   9101 
   9102         UInt mask = (tbform == True) ? 0x0000FFFF : 0xFFFF0000;
   9103         IRExpr* ire_result
   9104           = binop( Iop_Or32,
   9105                    binop(Iop_And32, mkexpr(irt_regM_shift), mkU32(mask)),
   9106                    binop(Iop_And32, isT ? getIRegT(regN) : getIRegA(regN),
   9107                                     unop(Iop_Not32, mkU32(mask))) );
   9108 
   9109         if (isT)
   9110            putIRegT( regD, ire_result, condT );
   9111         else
   9112            putIRegA( regD, ire_result, condT, Ijk_Boring );
   9113 
   9114         DIP( "pkh%s%s r%u, r%u, r%u %s\n", tbform ? "tb" : "bt",
   9115              nCC(conq), regD, regN, regM, dis_buf );
   9116 
   9117         return True;
   9118      }
   9119      /* fall through */
   9120    }
   9121 
   9122    /* ---------- usat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
   9123    {
   9124      UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
   9125      Bool gate = False;
   9126 
   9127      if (isT) {
   9128         if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,1,0)
   9129             && INSNT0(4,4) == 0
   9130             && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
   9131            regD       = INSNT1(11,8);
   9132            regN       = INSNT0(3,0);
   9133            shift_type = (INSNT0(5,5) << 1) | 0;
   9134            imm5       = (INSNT1(14,12) << 2) | INSNT1(7,6);
   9135            sat_imm    = INSNT1(4,0);
   9136            if (!isBadRegT(regD) && !isBadRegT(regN))
   9137               gate = True;
   9138            if (shift_type == BITS2(1,0) && imm5 == 0)
   9139               gate = False;
   9140         }
   9141      } else {
   9142         if (INSNA(27,21) == BITS7(0,1,1,0,1,1,1) &&
   9143             INSNA(5,4)   == BITS2(0,1)) {
   9144            regD       = INSNA(15,12);
   9145            regN       = INSNA(3,0);
   9146            shift_type = (INSNA(6,6) << 1) | 0;
   9147            imm5       = INSNA(11,7);
   9148            sat_imm    = INSNA(20,16);
   9149            if (regD != 15 && regN != 15)
   9150               gate = True;
   9151         }
   9152      }
   9153 
   9154      if (gate) {
   9155         IRTemp irt_regN       = newTemp(Ity_I32);
   9156         IRTemp irt_regN_shift = newTemp(Ity_I32);
   9157         IRTemp irt_sat_Q      = newTemp(Ity_I32);
   9158         IRTemp irt_result     = newTemp(Ity_I32);
   9159 
   9160         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   9161         compute_result_and_C_after_shift_by_imm5(
   9162                 dis_buf, &irt_regN_shift, NULL,
   9163                 irt_regN, shift_type, imm5, regN );
   9164 
   9165         armUnsignedSatQ( &irt_result, &irt_sat_Q, irt_regN_shift, sat_imm );
   9166         or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
   9167 
   9168         if (isT)
   9169            putIRegT( regD, mkexpr(irt_result), condT );
   9170         else
   9171            putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
   9172 
   9173         DIP("usat%s r%u, #0x%04x, %s\n",
   9174             nCC(conq), regD, imm5, dis_buf);
   9175         return True;
   9176      }
   9177      /* fall through */
   9178    }
   9179 
   9180   /* ----------- ssat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
   9181    {
   9182      UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
   9183      Bool gate = False;
   9184 
   9185      if (isT) {
   9186         if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,0,0)
   9187             && INSNT0(4,4) == 0
   9188             && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
   9189            regD       = INSNT1(11,8);
   9190            regN       = INSNT0(3,0);
   9191            shift_type = (INSNT0(5,5) << 1) | 0;
   9192            imm5       = (INSNT1(14,12) << 2) | INSNT1(7,6);
   9193            sat_imm    = INSNT1(4,0) + 1;
   9194            if (!isBadRegT(regD) && !isBadRegT(regN))
   9195               gate = True;
   9196            if (shift_type == BITS2(1,0) && imm5 == 0)
   9197               gate = False;
   9198         }
   9199      } else {
   9200         if (INSNA(27,21) == BITS7(0,1,1,0,1,0,1) &&
   9201             INSNA(5,4)   == BITS2(0,1)) {
   9202            regD       = INSNA(15,12);
   9203            regN       = INSNA(3,0);
   9204            shift_type = (INSNA(6,6) << 1) | 0;
   9205            imm5       = INSNA(11,7);
   9206            sat_imm    = INSNA(20,16) + 1;
   9207            if (regD != 15 && regN != 15)
   9208               gate = True;
   9209         }
   9210      }
   9211 
   9212      if (gate) {
   9213         IRTemp irt_regN       = newTemp(Ity_I32);
   9214         IRTemp irt_regN_shift = newTemp(Ity_I32);
   9215         IRTemp irt_sat_Q      = newTemp(Ity_I32);
   9216         IRTemp irt_result     = newTemp(Ity_I32);
   9217 
   9218         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   9219         compute_result_and_C_after_shift_by_imm5(
   9220                 dis_buf, &irt_regN_shift, NULL,
   9221                 irt_regN, shift_type, imm5, regN );
   9222 
   9223         armSignedSatQ( irt_regN_shift, sat_imm, &irt_result, &irt_sat_Q );
   9224         or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
   9225 
   9226         if (isT)
   9227            putIRegT( regD, mkexpr(irt_result), condT );
   9228         else
   9229            putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
   9230 
   9231         DIP( "ssat%s r%u, #0x%04x, %s\n",
   9232              nCC(conq), regD, imm5, dis_buf);
   9233         return True;
   9234     }
   9235     /* fall through */
   9236   }
   9237 
   9238    /* ----------- ssat16<c> <Rd>,#<imm>,<Rn> ----------- */
   9239    {
   9240      UInt regD = 99, regN = 99, sat_imm = 99;
   9241      Bool gate = False;
   9242 
   9243      if (isT) {
   9244         if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,0,0)
   9245             && INSNT0(5,4) == BITS2(1,0)
   9246             && INSNT1(15,12) == BITS4(0,0,0,0)
   9247             && INSNT1(7,4) == BITS4(0,0,0,0)) {
   9248            regD       = INSNT1(11,8);
   9249            regN       = INSNT0(3,0);
   9250            sat_imm    = INSNT1(3,0) + 1;
   9251            if (!isBadRegT(regD) && !isBadRegT(regN))
   9252               gate = True;
   9253         }
   9254      } else {
   9255         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,1,0) &&
   9256             INSNA(11,4)   == BITS8(1,1,1,1,0,0,1,1)) {
   9257            regD       = INSNA(15,12);
   9258            regN       = INSNA(3,0);
   9259            sat_imm    = INSNA(19,16) + 1;
   9260            if (regD != 15 && regN != 15)
   9261               gate = True;
   9262         }
   9263      }
   9264 
   9265      if (gate) {
   9266         IRTemp irt_regN    = newTemp(Ity_I32);
   9267         IRTemp irt_regN_lo = newTemp(Ity_I32);
   9268         IRTemp irt_regN_hi = newTemp(Ity_I32);
   9269         IRTemp irt_Q_lo    = newTemp(Ity_I32);
   9270         IRTemp irt_Q_hi    = newTemp(Ity_I32);
   9271         IRTemp irt_res_lo  = newTemp(Ity_I32);
   9272         IRTemp irt_res_hi  = newTemp(Ity_I32);
   9273 
   9274         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   9275         assign( irt_regN_lo,
   9276                 binop( Iop_Sar32,
   9277                        binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
   9278                        mkU8(16)) );
   9279         assign( irt_regN_hi, binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)) );
   9280 
   9281         armSignedSatQ( irt_regN_lo, sat_imm, &irt_res_lo, &irt_Q_lo );
   9282         or_into_QFLAG32( mkexpr(irt_Q_lo), condT );
   9283 
   9284         armSignedSatQ( irt_regN_hi, sat_imm, &irt_res_hi, &irt_Q_hi );
   9285         or_into_QFLAG32( mkexpr(irt_Q_hi), condT );
   9286 
   9287         IRExpr* ire_result
   9288            = binop(Iop_Or32,
   9289                    binop(Iop_And32, mkexpr(irt_res_lo), mkU32(0xFFFF)),
   9290                    binop(Iop_Shl32, mkexpr(irt_res_hi), mkU8(16)));
   9291         if (isT)
   9292            putIRegT( regD, ire_result, condT );
   9293         else
   9294            putIRegA( regD, ire_result, condT, Ijk_Boring );
   9295 
   9296         DIP( "ssat16%s r%u, #0x%04x, r%u\n", nCC(conq), regD, sat_imm, regN );
   9297         return True;
   9298      }
   9299      /* fall through */
   9300    }
   9301 
   9302    /* -------------- usat16<c> <Rd>,#<imm4>,<Rn> --------------- */
   9303    {
   9304      UInt regD = 99, regN = 99, sat_imm = 99;
   9305      Bool gate = False;
   9306 
   9307      if (isT) {
   9308         if (INSNT0(15,4) == 0xF3A && (INSNT1(15,0) & 0xF0F0) == 0x0000) {
   9309            regN = INSNT0(3,0);
   9310            regD = INSNT1(11,8);
   9311            sat_imm = INSNT1(3,0);
   9312            if (!isBadRegT(regD) && !isBadRegT(regN))
   9313               gate = True;
   9314        }
   9315      } else {
   9316         if (INSNA(27,20) == BITS8(0,1,1,0,1,1,1,0) &&
   9317             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9318             INSNA(7,4)   == BITS4(0,0,1,1)) {
   9319            regD    = INSNA(15,12);
   9320            regN    = INSNA(3,0);
   9321            sat_imm = INSNA(19,16);
   9322            if (regD != 15 && regN != 15)
   9323               gate = True;
   9324         }
   9325      }
   9326 
   9327      if (gate) {
   9328         IRTemp irt_regN    = newTemp(Ity_I32);
   9329         IRTemp irt_regN_lo = newTemp(Ity_I32);
   9330         IRTemp irt_regN_hi = newTemp(Ity_I32);
   9331         IRTemp irt_Q_lo    = newTemp(Ity_I32);
   9332         IRTemp irt_Q_hi    = newTemp(Ity_I32);
   9333         IRTemp irt_res_lo  = newTemp(Ity_I32);
   9334         IRTemp irt_res_hi  = newTemp(Ity_I32);
   9335 
   9336         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   9337         assign( irt_regN_lo, binop( Iop_Sar32,
   9338                                     binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
   9339                                     mkU8(16)) );
   9340         assign( irt_regN_hi, binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)) );
   9341 
   9342         armUnsignedSatQ( &irt_res_lo, &irt_Q_lo, irt_regN_lo, sat_imm );
   9343         or_into_QFLAG32( mkexpr(irt_Q_lo), condT );
   9344 
   9345         armUnsignedSatQ( &irt_res_hi, &irt_Q_hi, irt_regN_hi, sat_imm );
   9346         or_into_QFLAG32( mkexpr(irt_Q_hi), condT );
   9347 
   9348         IRExpr* ire_result = binop( Iop_Or32,
   9349                                     binop(Iop_Shl32, mkexpr(irt_res_hi), mkU8(16)),
   9350                                     mkexpr(irt_res_lo) );
   9351 
   9352         if (isT)
   9353            putIRegT( regD, ire_result, condT );
   9354         else
   9355            putIRegA( regD, ire_result, condT, Ijk_Boring );
   9356 
   9357         DIP( "usat16%s r%u, #0x%04x, r%u\n", nCC(conq), regD, sat_imm, regN );
   9358         return True;
   9359      }
   9360      /* fall through */
   9361    }
   9362 
   9363    /* -------------- uadd16<c> <Rd>,<Rn>,<Rm> -------------- */
   9364    {
   9365      UInt regD = 99, regN = 99, regM = 99;
   9366      Bool gate = False;
   9367 
   9368      if (isT) {
   9369         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
   9370            regN = INSNT0(3,0);
   9371            regD = INSNT1(11,8);
   9372            regM = INSNT1(3,0);
   9373            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9374               gate = True;
   9375         }
   9376      } else {
   9377         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
   9378             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9379             INSNA(7,4)   == BITS4(0,0,0,1)) {
   9380            regD = INSNA(15,12);
   9381            regN = INSNA(19,16);
   9382            regM = INSNA(3,0);
   9383            if (regD != 15 && regN != 15 && regM != 15)
   9384               gate = True;
   9385         }
   9386      }
   9387 
   9388      if (gate) {
   9389         IRTemp rNt  = newTemp(Ity_I32);
   9390         IRTemp rMt  = newTemp(Ity_I32);
   9391         IRTemp res  = newTemp(Ity_I32);
   9392         IRTemp reso = newTemp(Ity_I32);
   9393 
   9394         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9395         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9396 
   9397         assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
   9398         if (isT)
   9399            putIRegT( regD, mkexpr(res), condT );
   9400         else
   9401            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9402 
   9403         assign(reso, binop(Iop_HAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
   9404         set_GE_32_10_from_bits_31_15(reso, condT);
   9405 
   9406         DIP("uadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9407         return True;
   9408      }
   9409      /* fall through */
   9410    }
   9411 
   9412    /* -------------- sadd16<c> <Rd>,<Rn>,<Rm> -------------- */
   9413    {
   9414      UInt regD = 99, regN = 99, regM = 99;
   9415      Bool gate = False;
   9416 
   9417      if (isT) {
   9418         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
   9419            regN = INSNT0(3,0);
   9420            regD = INSNT1(11,8);
   9421            regM = INSNT1(3,0);
   9422            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9423               gate = True;
   9424         }
   9425      } else {
   9426         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
   9427             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9428             INSNA(7,4)   == BITS4(0,0,0,1)) {
   9429            regD = INSNA(15,12);
   9430            regN = INSNA(19,16);
   9431            regM = INSNA(3,0);
   9432            if (regD != 15 && regN != 15 && regM != 15)
   9433               gate = True;
   9434         }
   9435      }
   9436 
   9437      if (gate) {
   9438         IRTemp rNt  = newTemp(Ity_I32);
   9439         IRTemp rMt  = newTemp(Ity_I32);
   9440         IRTemp res  = newTemp(Ity_I32);
   9441         IRTemp reso = newTemp(Ity_I32);
   9442 
   9443         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9444         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9445 
   9446         assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
   9447         if (isT)
   9448            putIRegT( regD, mkexpr(res), condT );
   9449         else
   9450            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9451 
   9452         assign(reso, unop(Iop_Not32,
   9453                           binop(Iop_HAdd16Sx2, mkexpr(rNt), mkexpr(rMt))));
   9454         set_GE_32_10_from_bits_31_15(reso, condT);
   9455 
   9456         DIP("sadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9457         return True;
   9458      }
   9459      /* fall through */
   9460    }
   9461 
   9462    /* ---------------- usub16<c> <Rd>,<Rn>,<Rm> ---------------- */
   9463    {
   9464      UInt regD = 99, regN = 99, regM = 99;
   9465      Bool gate = False;
   9466 
   9467      if (isT) {
   9468         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
   9469            regN = INSNT0(3,0);
   9470            regD = INSNT1(11,8);
   9471            regM = INSNT1(3,0);
   9472            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9473               gate = True;
   9474         }
   9475      } else {
   9476         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
   9477             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9478             INSNA(7,4)   == BITS4(0,1,1,1)) {
   9479            regD = INSNA(15,12);
   9480            regN = INSNA(19,16);
   9481            regM = INSNA(3,0);
   9482            if (regD != 15 && regN != 15 && regM != 15)
   9483              gate = True;
   9484         }
   9485      }
   9486 
   9487      if (gate) {
   9488         IRTemp rNt  = newTemp(Ity_I32);
   9489         IRTemp rMt  = newTemp(Ity_I32);
   9490         IRTemp res  = newTemp(Ity_I32);
   9491         IRTemp reso = newTemp(Ity_I32);
   9492 
   9493         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9494         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9495 
   9496         assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
   9497         if (isT)
   9498            putIRegT( regD, mkexpr(res), condT );
   9499         else
   9500            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9501 
   9502         assign(reso, unop(Iop_Not32,
   9503                           binop(Iop_HSub16Ux2, mkexpr(rNt), mkexpr(rMt))));
   9504         set_GE_32_10_from_bits_31_15(reso, condT);
   9505 
   9506         DIP("usub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9507         return True;
   9508      }
   9509      /* fall through */
   9510    }
   9511 
   9512    /* -------------- ssub16<c> <Rd>,<Rn>,<Rm> -------------- */
   9513    {
   9514      UInt regD = 99, regN = 99, regM = 99;
   9515      Bool gate = False;
   9516 
   9517      if (isT) {
   9518         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
   9519            regN = INSNT0(3,0);
   9520            regD = INSNT1(11,8);
   9521            regM = INSNT1(3,0);
   9522            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9523               gate = True;
   9524         }
   9525      } else {
   9526         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
   9527             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9528             INSNA(7,4)   == BITS4(0,1,1,1)) {
   9529            regD = INSNA(15,12);
   9530            regN = INSNA(19,16);
   9531            regM = INSNA(3,0);
   9532            if (regD != 15 && regN != 15 && regM != 15)
   9533               gate = True;
   9534         }
   9535      }
   9536 
   9537      if (gate) {
   9538         IRTemp rNt  = newTemp(Ity_I32);
   9539         IRTemp rMt  = newTemp(Ity_I32);
   9540         IRTemp res  = newTemp(Ity_I32);
   9541         IRTemp reso = newTemp(Ity_I32);
   9542 
   9543         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9544         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9545 
   9546         assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
   9547         if (isT)
   9548            putIRegT( regD, mkexpr(res), condT );
   9549         else
   9550            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9551 
   9552         assign(reso, unop(Iop_Not32,
   9553                           binop(Iop_HSub16Sx2, mkexpr(rNt), mkexpr(rMt))));
   9554         set_GE_32_10_from_bits_31_15(reso, condT);
   9555 
   9556         DIP("ssub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9557         return True;
   9558      }
   9559      /* fall through */
   9560    }
   9561 
   9562    /* ----------------- uadd8<c> <Rd>,<Rn>,<Rm> ---------------- */
   9563    {
   9564      UInt regD = 99, regN = 99, regM = 99;
   9565      Bool gate = False;
   9566 
   9567      if (isT) {
   9568         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
   9569            regN = INSNT0(3,0);
   9570            regD = INSNT1(11,8);
   9571            regM = INSNT1(3,0);
   9572            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9573               gate = True;
   9574         }
   9575      } else {
   9576         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
   9577             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9578             (INSNA(7,4)  == BITS4(1,0,0,1))) {
   9579            regD = INSNA(15,12);
   9580            regN = INSNA(19,16);
   9581            regM = INSNA(3,0);
   9582            if (regD != 15 && regN != 15 && regM != 15)
   9583               gate = True;
   9584         }
   9585      }
   9586 
   9587      if (gate) {
   9588         IRTemp rNt  = newTemp(Ity_I32);
   9589         IRTemp rMt  = newTemp(Ity_I32);
   9590         IRTemp res  = newTemp(Ity_I32);
   9591         IRTemp reso = newTemp(Ity_I32);
   9592 
   9593         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9594         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9595 
   9596         assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
   9597         if (isT)
   9598            putIRegT( regD, mkexpr(res), condT );
   9599         else
   9600            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9601 
   9602         assign(reso, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
   9603         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
   9604 
   9605         DIP("uadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9606         return True;
   9607      }
   9608      /* fall through */
   9609    }
   9610 
   9611    /* ------------------- sadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
   9612    {
   9613      UInt regD = 99, regN = 99, regM = 99;
   9614      Bool gate = False;
   9615 
   9616      if (isT) {
   9617         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
   9618            regN = INSNT0(3,0);
   9619            regD = INSNT1(11,8);
   9620            regM = INSNT1(3,0);
   9621            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9622               gate = True;
   9623         }
   9624      } else {
   9625         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
   9626             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9627             (INSNA(7,4)  == BITS4(1,0,0,1))) {
   9628            regD = INSNA(15,12);
   9629            regN = INSNA(19,16);
   9630            regM = INSNA(3,0);
   9631            if (regD != 15 && regN != 15 && regM != 15)
   9632               gate = True;
   9633         }
   9634      }
   9635 
   9636      if (gate) {
   9637         IRTemp rNt  = newTemp(Ity_I32);
   9638         IRTemp rMt  = newTemp(Ity_I32);
   9639         IRTemp res  = newTemp(Ity_I32);
   9640         IRTemp reso = newTemp(Ity_I32);
   9641 
   9642         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9643         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9644 
   9645         assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
   9646         if (isT)
   9647            putIRegT( regD, mkexpr(res), condT );
   9648         else
   9649            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9650 
   9651         assign(reso, unop(Iop_Not32,
   9652                           binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt))));
   9653         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
   9654 
   9655         DIP("sadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9656         return True;
   9657      }
   9658      /* fall through */
   9659    }
   9660 
   9661    /* ------------------- usub8<c> <Rd>,<Rn>,<Rm> ------------------ */
   9662    {
   9663      UInt regD = 99, regN = 99, regM = 99;
   9664      Bool gate = False;
   9665 
   9666      if (isT) {
   9667         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
   9668            regN = INSNT0(3,0);
   9669            regD = INSNT1(11,8);
   9670            regM = INSNT1(3,0);
   9671            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9672               gate = True;
   9673         }
   9674      } else {
   9675         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
   9676             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9677             (INSNA(7,4)  == BITS4(1,1,1,1))) {
   9678            regD = INSNA(15,12);
   9679            regN = INSNA(19,16);
   9680            regM = INSNA(3,0);
   9681            if (regD != 15 && regN != 15 && regM != 15)
   9682              gate = True;
   9683         }
   9684      }
   9685 
   9686      if (gate) {
   9687         IRTemp rNt  = newTemp(Ity_I32);
   9688         IRTemp rMt  = newTemp(Ity_I32);
   9689         IRTemp res  = newTemp(Ity_I32);
   9690         IRTemp reso = newTemp(Ity_I32);
   9691 
   9692         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9693         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9694 
   9695         assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
   9696         if (isT)
   9697            putIRegT( regD, mkexpr(res), condT );
   9698         else
   9699            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9700 
   9701         assign(reso, unop(Iop_Not32,
   9702                           binop(Iop_HSub8Ux4, mkexpr(rNt), mkexpr(rMt))));
   9703         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
   9704 
   9705         DIP("usub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9706         return True;
   9707      }
   9708      /* fall through */
   9709    }
   9710 
   9711    /* ------------------- ssub8<c> <Rd>,<Rn>,<Rm> ------------------ */
   9712    {
   9713      UInt regD = 99, regN = 99, regM = 99;
   9714      Bool gate = False;
   9715 
   9716      if (isT) {
   9717         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
   9718            regN = INSNT0(3,0);
   9719            regD = INSNT1(11,8);
   9720            regM = INSNT1(3,0);
   9721            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9722               gate = True;
   9723         }
   9724      } else {
   9725         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
   9726             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9727             INSNA(7,4)   == BITS4(1,1,1,1)) {
   9728            regD = INSNA(15,12);
   9729            regN = INSNA(19,16);
   9730            regM = INSNA(3,0);
   9731            if (regD != 15 && regN != 15 && regM != 15)
   9732               gate = True;
   9733         }
   9734      }
   9735 
   9736      if (gate) {
   9737         IRTemp rNt  = newTemp(Ity_I32);
   9738         IRTemp rMt  = newTemp(Ity_I32);
   9739         IRTemp res  = newTemp(Ity_I32);
   9740         IRTemp reso = newTemp(Ity_I32);
   9741 
   9742         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9743         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9744 
   9745         assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
   9746         if (isT)
   9747            putIRegT( regD, mkexpr(res), condT );
   9748         else
   9749            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9750 
   9751         assign(reso, unop(Iop_Not32,
   9752                           binop(Iop_HSub8Sx4, mkexpr(rNt), mkexpr(rMt))));
   9753         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
   9754 
   9755         DIP("ssub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9756         return True;
   9757      }
   9758      /* fall through */
   9759    }
   9760 
   9761    /* ------------------ qadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
   9762    {
   9763      UInt regD = 99, regN = 99, regM = 99;
   9764      Bool gate = False;
   9765 
   9766      if (isT) {
   9767         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
   9768            regN = INSNT0(3,0);
   9769            regD = INSNT1(11,8);
   9770            regM = INSNT1(3,0);
   9771            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9772               gate = True;
   9773         }
   9774      } else {
   9775         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
   9776             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9777             INSNA(7,4)   == BITS4(1,0,0,1)) {
   9778            regD = INSNA(15,12);
   9779            regN = INSNA(19,16);
   9780            regM = INSNA(3,0);
   9781            if (regD != 15 && regN != 15 && regM != 15)
   9782               gate = True;
   9783         }
   9784      }
   9785 
   9786      if (gate) {
   9787         IRTemp rNt   = newTemp(Ity_I32);
   9788         IRTemp rMt   = newTemp(Ity_I32);
   9789         IRTemp res_q = newTemp(Ity_I32);
   9790 
   9791         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9792         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9793 
   9794         assign(res_q, binop(Iop_QAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
   9795         if (isT)
   9796            putIRegT( regD, mkexpr(res_q), condT );
   9797         else
   9798            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   9799 
   9800         DIP("qadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9801         return True;
   9802      }
   9803      /* fall through */
   9804    }
   9805 
   9806    /* ------------------ qsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
   9807    {
   9808      UInt regD = 99, regN = 99, regM = 99;
   9809      Bool gate = False;
   9810 
   9811      if (isT) {
   9812         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
   9813            regN = INSNT0(3,0);
   9814            regD = INSNT1(11,8);
   9815            regM = INSNT1(3,0);
   9816            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9817               gate = True;
   9818         }
   9819      } else {
   9820         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
   9821             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9822             INSNA(7,4)   == BITS4(1,1,1,1)) {
   9823            regD = INSNA(15,12);
   9824            regN = INSNA(19,16);
   9825            regM = INSNA(3,0);
   9826            if (regD != 15 && regN != 15 && regM != 15)
   9827               gate = True;
   9828         }
   9829      }
   9830 
   9831      if (gate) {
   9832         IRTemp rNt   = newTemp(Ity_I32);
   9833         IRTemp rMt   = newTemp(Ity_I32);
   9834         IRTemp res_q = newTemp(Ity_I32);
   9835 
   9836         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9837         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9838 
   9839         assign(res_q, binop(Iop_QSub8Sx4, mkexpr(rNt), mkexpr(rMt)));
   9840         if (isT)
   9841            putIRegT( regD, mkexpr(res_q), condT );
   9842         else
   9843            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   9844 
   9845         DIP("qsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9846         return True;
   9847      }
   9848      /* fall through */
   9849    }
   9850 
   9851    /* ------------------ uqadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
   9852    {
   9853      UInt regD = 99, regN = 99, regM = 99;
   9854      Bool gate = False;
   9855 
   9856      if (isT) {
   9857         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
   9858            regN = INSNT0(3,0);
   9859            regD = INSNT1(11,8);
   9860            regM = INSNT1(3,0);
   9861            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9862               gate = True;
   9863         }
   9864      } else {
   9865         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
   9866             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9867             (INSNA(7,4)  == BITS4(1,0,0,1))) {
   9868            regD = INSNA(15,12);
   9869            regN = INSNA(19,16);
   9870            regM = INSNA(3,0);
   9871            if (regD != 15 && regN != 15 && regM != 15)
   9872               gate = True;
   9873         }
   9874      }
   9875 
   9876      if (gate) {
   9877         IRTemp rNt   = newTemp(Ity_I32);
   9878         IRTemp rMt   = newTemp(Ity_I32);
   9879         IRTemp res_q = newTemp(Ity_I32);
   9880 
   9881         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9882         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9883 
   9884         assign(res_q, binop(Iop_QAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
   9885         if (isT)
   9886            putIRegT( regD, mkexpr(res_q), condT );
   9887         else
   9888            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   9889 
   9890         DIP("uqadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9891         return True;
   9892      }
   9893      /* fall through */
   9894    }
   9895 
   9896    /* ------------------ uqsub8<c> <Rd>,<Rn>,<Rm> ------------------ */
   9897    {
   9898      UInt regD = 99, regN = 99, regM = 99;
   9899      Bool gate = False;
   9900 
   9901      if (isT) {
   9902         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
   9903            regN = INSNT0(3,0);
   9904            regD = INSNT1(11,8);
   9905            regM = INSNT1(3,0);
   9906            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9907               gate = True;
   9908         }
   9909      } else {
   9910         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
   9911             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9912             (INSNA(7,4)  == BITS4(1,1,1,1))) {
   9913            regD = INSNA(15,12);
   9914            regN = INSNA(19,16);
   9915            regM = INSNA(3,0);
   9916            if (regD != 15 && regN != 15 && regM != 15)
   9917              gate = True;
   9918         }
   9919      }
   9920 
   9921      if (gate) {
   9922         IRTemp rNt   = newTemp(Ity_I32);
   9923         IRTemp rMt   = newTemp(Ity_I32);
   9924         IRTemp res_q = newTemp(Ity_I32);
   9925 
   9926         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9927         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9928 
   9929         assign(res_q, binop(Iop_QSub8Ux4, mkexpr(rNt), mkexpr(rMt)));
   9930         if (isT)
   9931            putIRegT( regD, mkexpr(res_q), condT );
   9932         else
   9933            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   9934 
   9935         DIP("uqsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9936         return True;
   9937      }
   9938      /* fall through */
   9939    }
   9940 
   9941    /* ----------------- uhadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
   9942    {
   9943      UInt regD = 99, regN = 99, regM = 99;
   9944      Bool gate = False;
   9945 
   9946      if (isT) {
   9947         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
   9948            regN = INSNT0(3,0);
   9949            regD = INSNT1(11,8);
   9950            regM = INSNT1(3,0);
   9951            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9952               gate = True;
   9953         }
   9954      } else {
   9955         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
   9956             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9957             INSNA(7,4)   == BITS4(1,0,0,1)) {
   9958            regD = INSNA(15,12);
   9959            regN = INSNA(19,16);
   9960            regM = INSNA(3,0);
   9961            if (regD != 15 && regN != 15 && regM != 15)
   9962               gate = True;
   9963         }
   9964      }
   9965 
   9966      if (gate) {
   9967         IRTemp rNt   = newTemp(Ity_I32);
   9968         IRTemp rMt   = newTemp(Ity_I32);
   9969         IRTemp res_q = newTemp(Ity_I32);
   9970 
   9971         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9972         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9973 
   9974         assign(res_q, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
   9975         if (isT)
   9976            putIRegT( regD, mkexpr(res_q), condT );
   9977         else
   9978            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   9979 
   9980         DIP("uhadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9981         return True;
   9982      }
   9983      /* fall through */
   9984    }
   9985 
   9986    /* ----------------- uhadd16<c> <Rd>,<Rn>,<Rm> ------------------- */
   9987    {
   9988      UInt regD = 99, regN = 99, regM = 99;
   9989      Bool gate = False;
   9990 
   9991      if (isT) {
   9992         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
   9993            regN = INSNT0(3,0);
   9994            regD = INSNT1(11,8);
   9995            regM = INSNT1(3,0);
   9996            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9997               gate = True;
   9998         }
   9999      } else {
   10000         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
   10001             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10002             INSNA(7,4)   == BITS4(0,0,0,1)) {
   10003            regD = INSNA(15,12);
   10004            regN = INSNA(19,16);
   10005            regM = INSNA(3,0);
   10006            if (regD != 15 && regN != 15 && regM != 15)
   10007               gate = True;
   10008         }
   10009      }
   10010 
   10011      if (gate) {
   10012         IRTemp rNt   = newTemp(Ity_I32);
   10013         IRTemp rMt   = newTemp(Ity_I32);
   10014         IRTemp res_q = newTemp(Ity_I32);
   10015 
   10016         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   10017         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   10018 
   10019         assign(res_q, binop(Iop_HAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
   10020         if (isT)
   10021            putIRegT( regD, mkexpr(res_q), condT );
   10022         else
   10023            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   10024 
   10025         DIP("uhadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   10026         return True;
   10027      }
   10028      /* fall through */
   10029    }
   10030 
   10031    /* ----------------- shadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
   10032    {
   10033      UInt regD = 99, regN = 99, regM = 99;
   10034      Bool gate = False;
   10035 
   10036      if (isT) {
   10037         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
   10038            regN = INSNT0(3,0);
   10039            regD = INSNT1(11,8);
   10040            regM = INSNT1(3,0);
   10041            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10042               gate = True;
   10043         }
   10044      } else {
   10045         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
   10046             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10047             INSNA(7,4)   == BITS4(1,0,0,1)) {
   10048            regD = INSNA(15,12);
   10049            regN = INSNA(19,16);
   10050            regM = INSNA(3,0);
   10051            if (regD != 15 && regN != 15 && regM != 15)
   10052               gate = True;
   10053         }
   10054      }
   10055 
   10056      if (gate) {
   10057         IRTemp rNt   = newTemp(Ity_I32);
   10058         IRTemp rMt   = newTemp(Ity_I32);
   10059         IRTemp res_q = newTemp(Ity_I32);
   10060 
   10061         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   10062         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   10063 
   10064         assign(res_q, binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
   10065         if (isT)
   10066            putIRegT( regD, mkexpr(res_q), condT );
   10067         else
   10068            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   10069 
   10070         DIP("shadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   10071         return True;
   10072      }
   10073      /* fall through */
   10074    }
   10075 
   10076    /* ------------------ qadd16<c> <Rd>,<Rn>,<Rm> ------------------ */
   10077    {
   10078      UInt regD = 99, regN = 99, regM = 99;
   10079      Bool gate = False;
   10080 
   10081      if (isT) {
   10082         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
   10083            regN = INSNT0(3,0);
   10084            regD = INSNT1(11,8);
   10085            regM = INSNT1(3,0);
   10086            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10087               gate = True;
   10088         }
   10089      } else {
   10090         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
   10091             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10092             INSNA(7,4)   == BITS4(0,0,0,1)) {
   10093            regD = INSNA(15,12);
   10094            regN = INSNA(19,16);
   10095            regM = INSNA(3,0);
   10096            if (regD != 15 && regN != 15 && regM != 15)
   10097               gate = True;
   10098         }
   10099      }
   10100 
   10101      if (gate) {
   10102         IRTemp rNt   = newTemp(Ity_I32);
   10103         IRTemp rMt   = newTemp(Ity_I32);
   10104         IRTemp res_q = newTemp(Ity_I32);
   10105 
   10106         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   10107         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   10108 
   10109         assign(res_q, binop(Iop_QAdd16Sx2, mkexpr(rNt), mkexpr(rMt)));
   10110         if (isT)
   10111            putIRegT( regD, mkexpr(res_q), condT );
   10112         else
   10113            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   10114 
   10115         DIP("qadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   10116         return True;
   10117      }
   10118      /* fall through */
   10119    }
   10120 
   10121    /* ------------------ qsub16<c> <Rd>,<Rn>,<Rm> ------------------ */
   10122    {
   10123      UInt regD = 99, regN = 99, regM = 99;
   10124      Bool gate = False;
   10125 
   10126       if (isT) {
   10127         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
   10128            regN = INSNT0(3,0);
   10129            regD = INSNT1(11,8);
   10130            regM = INSNT1(3,0);
   10131            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10132               gate = True;
   10133         }
   10134      } else {
   10135         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
   10136             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10137             INSNA(7,4)   == BITS4(0,1,1,1)) {
   10138            regD = INSNA(15,12);
   10139            regN = INSNA(19,16);
   10140            regM = INSNA(3,0);
   10141            if (regD != 15 && regN != 15 && regM != 15)
   10142              gate = True;
   10143         }
   10144      }
   10145 
   10146      if (gate) {
   10147         IRTemp rNt   = newTemp(Ity_I32);
   10148         IRTemp rMt   = newTemp(Ity_I32);
   10149         IRTemp res_q = newTemp(Ity_I32);
   10150 
   10151         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   10152         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   10153 
   10154         assign(res_q, binop(Iop_QSub16Sx2, mkexpr(rNt), mkexpr(rMt)));
   10155         if (isT)
   10156            putIRegT( regD, mkexpr(res_q), condT );
   10157         else
   10158            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   10159 
   10160         DIP("qsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   10161         return True;
   10162      }
   10163      /* fall through */
   10164    }
   10165 
   10166    /* ------------------- qsax<c> <Rd>,<Rn>,<Rm> ------------------- */
   10167    /* note: the hardware seems to construct the result differently
   10168       from wot the manual says. */
   10169    {
   10170      UInt regD = 99, regN = 99, regM = 99;
   10171      Bool gate = False;
   10172 
   10173      if (isT) {
   10174         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
   10175            regN = INSNT0(3,0);
   10176            regD = INSNT1(11,8);
   10177            regM = INSNT1(3,0);
   10178            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10179               gate = True;
   10180         }
   10181      } else {
   10182         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
   10183             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10184             INSNA(7,4)   == BITS4(0,1,0,1)) {
   10185            regD = INSNA(15,12);
   10186            regN = INSNA(19,16);
   10187            regM = INSNA(3,0);
   10188            if (regD != 15 && regN != 15 && regM != 15)
   10189               gate = True;
   10190         }
   10191      }
   10192 
   10193      if (gate) {
   10194         IRTemp irt_regN     = newTemp(Ity_I32);
   10195         IRTemp irt_regM     = newTemp(Ity_I32);
   10196         IRTemp irt_sum      = newTemp(Ity_I32);
   10197         IRTemp irt_diff     = newTemp(Ity_I32);
   10198         IRTemp irt_sum_res  = newTemp(Ity_I32);
   10199         IRTemp irt_diff_res = newTemp(Ity_I32);
   10200 
   10201         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   10202         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   10203 
   10204         assign( irt_diff,
   10205                 binop( Iop_Sub32,
   10206                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
   10207                        binop( Iop_Sar32,
   10208                               binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
   10209                               mkU8(16) ) ) );
   10210         armSignedSatQ( irt_diff, 0x10, &irt_diff_res, NULL);
   10211 
   10212         assign( irt_sum,
   10213                 binop( Iop_Add32,
   10214                        binop( Iop_Sar32,
   10215                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   10216                               mkU8(16) ),
   10217                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) )) );
   10218         armSignedSatQ( irt_sum, 0x10, &irt_sum_res, NULL );
   10219 
   10220         IRExpr* ire_result = binop( Iop_Or32,
   10221                                     binop( Iop_Shl32, mkexpr(irt_diff_res),
   10222                                            mkU8(16) ),
   10223                                     binop( Iop_And32, mkexpr(irt_sum_res),
   10224                                            mkU32(0xFFFF)) );
   10225 
   10226         if (isT)
   10227            putIRegT( regD, ire_result, condT );
   10228         else
   10229            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10230 
   10231         DIP( "qsax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   10232         return True;
   10233      }
   10234      /* fall through */
   10235    }
   10236 
   10237    /* ------------------- qasx<c> <Rd>,<Rn>,<Rm> ------------------- */
   10238    {
   10239      UInt regD = 99, regN = 99, regM = 99;
   10240      Bool gate = False;
   10241 
   10242      if (isT) {
   10243         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
   10244            regN = INSNT0(3,0);
   10245            regD = INSNT1(11,8);
   10246            regM = INSNT1(3,0);
   10247            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10248               gate = True;
   10249         }
   10250      } else {
   10251         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
   10252             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10253             INSNA(7,4)   == BITS4(0,0,1,1)) {
   10254            regD = INSNA(15,12);
   10255            regN = INSNA(19,16);
   10256            regM = INSNA(3,0);
   10257            if (regD != 15 && regN != 15 && regM != 15)
   10258               gate = True;
   10259         }
   10260      }
   10261 
   10262      if (gate) {
   10263         IRTemp irt_regN     = newTemp(Ity_I32);
   10264         IRTemp irt_regM     = newTemp(Ity_I32);
   10265         IRTemp irt_sum      = newTemp(Ity_I32);
   10266         IRTemp irt_diff     = newTemp(Ity_I32);
   10267         IRTemp irt_res_sum  = newTemp(Ity_I32);
   10268         IRTemp irt_res_diff = newTemp(Ity_I32);
   10269 
   10270         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   10271         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   10272 
   10273         assign( irt_diff,
   10274                 binop( Iop_Sub32,
   10275                        binop( Iop_Sar32,
   10276                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   10277                               mkU8(16) ),
   10278                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
   10279         armSignedSatQ( irt_diff, 0x10, &irt_res_diff, NULL );
   10280 
   10281         assign( irt_sum,
   10282                 binop( Iop_Add32,
   10283                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
   10284                        binop( Iop_Sar32,
   10285                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
   10286                               mkU8(16) ) ) );
   10287         armSignedSatQ( irt_sum, 0x10, &irt_res_sum, NULL );
   10288 
   10289         IRExpr* ire_result
   10290           = binop( Iop_Or32,
   10291                    binop( Iop_Shl32, mkexpr(irt_res_sum), mkU8(16) ),
   10292                    binop( Iop_And32, mkexpr(irt_res_diff), mkU32(0xFFFF) ) );
   10293 
   10294         if (isT)
   10295            putIRegT( regD, ire_result, condT );
   10296         else
   10297            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10298 
   10299         DIP( "qasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   10300         return True;
   10301      }
   10302      /* fall through */
   10303    }
   10304 
   10305    /* ------------------- sasx<c> <Rd>,<Rn>,<Rm> ------------------- */
   10306    {
   10307      UInt regD = 99, regN = 99, regM = 99;
   10308      Bool gate = False;
   10309 
   10310      if (isT) {
   10311         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
   10312            regN = INSNT0(3,0);
   10313            regD = INSNT1(11,8);
   10314            regM = INSNT1(3,0);
   10315            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10316               gate = True;
   10317         }
   10318      } else {
   10319         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
   10320             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10321             INSNA(7,4)   == BITS4(0,0,1,1)) {
   10322            regD = INSNA(15,12);
   10323            regN = INSNA(19,16);
   10324            regM = INSNA(3,0);
   10325            if (regD != 15 && regN != 15 && regM != 15)
   10326               gate = True;
   10327         }
   10328      }
   10329 
   10330      if (gate) {
   10331         IRTemp irt_regN = newTemp(Ity_I32);
   10332         IRTemp irt_regM = newTemp(Ity_I32);
   10333         IRTemp irt_sum  = newTemp(Ity_I32);
   10334         IRTemp irt_diff = newTemp(Ity_I32);
   10335 
   10336         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   10337         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   10338 
   10339         assign( irt_diff,
   10340                 binop( Iop_Sub32,
   10341                        binop( Iop_Sar32,
   10342                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   10343                               mkU8(16) ),
   10344                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
   10345 
   10346         assign( irt_sum,
   10347                 binop( Iop_Add32,
   10348                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
   10349                        binop( Iop_Sar32,
   10350                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
   10351                               mkU8(16) ) ) );
   10352 
   10353         IRExpr* ire_result
   10354           = binop( Iop_Or32,
   10355                    binop( Iop_Shl32, mkexpr(irt_sum), mkU8(16) ),
   10356                    binop( Iop_And32, mkexpr(irt_diff), mkU32(0xFFFF) ) );
   10357 
   10358         IRTemp ge10 = newTemp(Ity_I32);
   10359         assign(ge10, unop(Iop_Not32, mkexpr(irt_diff)));
   10360         put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
   10361         put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
   10362 
   10363         IRTemp ge32 = newTemp(Ity_I32);
   10364         assign(ge32, unop(Iop_Not32, mkexpr(irt_sum)));
   10365         put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
   10366         put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
   10367 
   10368         if (isT)
   10369            putIRegT( regD, ire_result, condT );
   10370         else
   10371            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10372 
   10373         DIP( "sasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   10374         return True;
   10375      }
   10376      /* fall through */
   10377    }
   10378 
   10379    /* --------------- smuad, smuadx<c><Rd>,<Rn>,<Rm> --------------- */
   10380    /* --------------- smsad, smsadx<c><Rd>,<Rn>,<Rm> --------------- */
   10381    {
   10382      UInt regD = 99, regN = 99, regM = 99, bitM = 99;
   10383      Bool gate = False, isAD = False;
   10384 
   10385      if (isT) {
   10386         if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
   10387             && (INSNT1(15,0) & 0xF0E0) == 0xF000) {
   10388            regN = INSNT0(3,0);
   10389            regD = INSNT1(11,8);
   10390            regM = INSNT1(3,0);
   10391            bitM = INSNT1(4,4);
   10392            isAD = INSNT0(15,4) == 0xFB2;
   10393            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10394               gate = True;
   10395         }
   10396      } else {
   10397         if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
   10398             INSNA(15,12) == BITS4(1,1,1,1)         &&
   10399             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1) ) {
   10400            regD = INSNA(19,16);
   10401            regN = INSNA(3,0);
   10402            regM = INSNA(11,8);
   10403            bitM = INSNA(5,5);
   10404            isAD = INSNA(6,6) == 0;
   10405            if (regD != 15 && regN != 15 && regM != 15)
   10406               gate = True;
   10407         }
   10408      }
   10409 
   10410      if (gate) {
   10411         IRTemp irt_regN    = newTemp(Ity_I32);
   10412         IRTemp irt_regM    = newTemp(Ity_I32);
   10413         IRTemp irt_prod_lo = newTemp(Ity_I32);
   10414         IRTemp irt_prod_hi = newTemp(Ity_I32);
   10415         IRTemp tmpM        = newTemp(Ity_I32);
   10416 
   10417         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   10418 
   10419         assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
   10420         assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
   10421 
   10422         assign( irt_prod_lo,
   10423                 binop( Iop_Mul32,
   10424                        binop( Iop_Sar32,
   10425                               binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
   10426                               mkU8(16) ),
   10427                        binop( Iop_Sar32,
   10428                               binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
   10429                               mkU8(16) ) ) );
   10430         assign( irt_prod_hi, binop(Iop_Mul32,
   10431                                    binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)),
   10432                                    binop(Iop_Sar32, mkexpr(irt_regM), mkU8(16))) );
   10433         IRExpr* ire_result
   10434            = binop( isAD ? Iop_Add32 : Iop_Sub32,
   10435                     mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) );
   10436 
   10437         if (isT)
   10438            putIRegT( regD, ire_result, condT );
   10439         else
   10440            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10441 
   10442         if (isAD) {
   10443            or_into_QFLAG32(
   10444               signed_overflow_after_Add32( ire_result,
   10445                                            irt_prod_lo, irt_prod_hi ),
   10446               condT
   10447            );
   10448         }
   10449 
   10450         DIP("smu%cd%s%s r%u, r%u, r%u\n",
   10451             isAD ? 'a' : 's',
   10452             bitM ? "x" : "", nCC(conq), regD, regN, regM);
   10453         return True;
   10454      }
   10455      /* fall through */
   10456    }
   10457 
   10458    /* --------------- smlad{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
   10459    /* --------------- smlsd{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
   10460    {
   10461      UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
   10462      Bool gate = False, isAD = False;
   10463 
   10464      if (isT) {
   10465        if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
   10466            && INSNT1(7,5) == BITS3(0,0,0)) {
   10467            regN = INSNT0(3,0);
   10468            regD = INSNT1(11,8);
   10469            regM = INSNT1(3,0);
   10470            regA = INSNT1(15,12);
   10471            bitM = INSNT1(4,4);
   10472            isAD = INSNT0(15,4) == 0xFB2;
   10473            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
   10474                && !isBadRegT(regA))
   10475               gate = True;
   10476         }
   10477      } else {
   10478         if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
   10479             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
   10480            regD = INSNA(19,16);
   10481            regA = INSNA(15,12);
   10482            regN = INSNA(3,0);
   10483            regM = INSNA(11,8);
   10484            bitM = INSNA(5,5);
   10485            isAD = INSNA(6,6) == 0;
   10486            if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
   10487               gate = True;
   10488         }
   10489      }
   10490 
   10491      if (gate) {
   10492         IRTemp irt_regN    = newTemp(Ity_I32);
   10493         IRTemp irt_regM    = newTemp(Ity_I32);
   10494         IRTemp irt_regA    = newTemp(Ity_I32);
   10495         IRTemp irt_prod_lo = newTemp(Ity_I32);
   10496         IRTemp irt_prod_hi = newTemp(Ity_I32);
   10497         IRTemp irt_sum     = newTemp(Ity_I32);
   10498         IRTemp tmpM        = newTemp(Ity_I32);
   10499 
   10500         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   10501         assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
   10502 
   10503         assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
   10504         assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
   10505 
   10506         assign( irt_prod_lo,
   10507                 binop(Iop_Mul32,
   10508                       binop(Iop_Sar32,
   10509                             binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   10510                             mkU8(16)),
   10511                       binop(Iop_Sar32,
   10512                             binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
   10513                             mkU8(16))) );
   10514         assign( irt_prod_hi,
   10515                 binop( Iop_Mul32,
   10516                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
   10517                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
   10518         assign( irt_sum, binop( isAD ? Iop_Add32 : Iop_Sub32,
   10519                                 mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) ) );
   10520 
   10521         IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_sum), mkexpr(irt_regA));
   10522 
   10523         if (isT)
   10524            putIRegT( regD, ire_result, condT );
   10525         else
   10526            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10527 
   10528         if (isAD) {
   10529            or_into_QFLAG32(
   10530               signed_overflow_after_Add32( mkexpr(irt_sum),
   10531                                            irt_prod_lo, irt_prod_hi ),
   10532               condT
   10533            );
   10534         }
   10535 
   10536         or_into_QFLAG32(
   10537            signed_overflow_after_Add32( ire_result, irt_sum, irt_regA ),
   10538            condT
   10539         );
   10540 
   10541         DIP("sml%cd%s%s r%u, r%u, r%u, r%u\n",
   10542             isAD ? 'a' : 's',
   10543             bitM ? "x" : "", nCC(conq), regD, regN, regM, regA);
   10544         return True;
   10545      }
   10546      /* fall through */
   10547    }
   10548 
   10549    /* ----- smlabb, smlabt, smlatb, smlatt <Rd>,<Rn>,<Rm>,<Ra> ----- */
   10550    {
   10551      UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99, bitN = 99;
   10552      Bool gate = False;
   10553 
   10554      if (isT) {
   10555         if (INSNT0(15,4) == 0xFB1 && INSNT1(7,6) == BITS2(0,0)) {
   10556            regN = INSNT0(3,0);
   10557            regD = INSNT1(11,8);
   10558            regM = INSNT1(3,0);
   10559            regA = INSNT1(15,12);
   10560            bitM = INSNT1(4,4);
   10561            bitN = INSNT1(5,5);
   10562            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
   10563                && !isBadRegT(regA))
   10564               gate = True;
   10565         }
   10566      } else {
   10567         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
   10568             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(1,0,0,0)) {
   10569            regD = INSNA(19,16);
   10570            regN = INSNA(3,0);
   10571            regM = INSNA(11,8);
   10572            regA = INSNA(15,12);
   10573            bitM = INSNA(6,6);
   10574            bitN = INSNA(5,5);
   10575            if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
   10576               gate = True;
   10577         }
   10578      }
   10579 
   10580      if (gate) {
   10581         IRTemp irt_regA = newTemp(Ity_I32);
   10582         IRTemp irt_prod = newTemp(Ity_I32);
   10583 
   10584         assign( irt_prod,
   10585                 binop(Iop_Mul32,
   10586                       binop(Iop_Sar32,
   10587                             binop(Iop_Shl32,
   10588                                   isT ? getIRegT(regN) : getIRegA(regN),
   10589                                   mkU8(bitN ? 0 : 16)),
   10590                             mkU8(16)),
   10591                       binop(Iop_Sar32,
   10592                             binop(Iop_Shl32,
   10593                                   isT ? getIRegT(regM) : getIRegA(regM),
   10594                                   mkU8(bitM ? 0 : 16)),
   10595                             mkU8(16))) );
   10596 
   10597         assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
   10598 
   10599         IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_prod), mkexpr(irt_regA));
   10600 
   10601         if (isT)
   10602            putIRegT( regD, ire_result, condT );
   10603         else
   10604            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10605 
   10606         or_into_QFLAG32(
   10607            signed_overflow_after_Add32( ire_result, irt_prod, irt_regA ),
   10608            condT
   10609         );
   10610 
   10611         DIP( "smla%c%c%s r%u, r%u, r%u, r%u\n",
   10612              bitN ? 't' : 'b', bitM ? 't' : 'b',
   10613              nCC(conq), regD, regN, regM, regA );
   10614         return True;
   10615      }
   10616      /* fall through */
   10617    }
   10618 
   10619    /* ----- smlalbb, smlalbt, smlaltb, smlaltt <Rd>,<Rn>,<Rm>,<Ra> ----- */
   10620    {
   10621      UInt regDHi = 99, regN = 99, regM = 99, regDLo = 99, bitM = 99, bitN = 99;
   10622      Bool gate = False;
   10623 
   10624      if (isT) {
   10625         if (INSNT0(15,4) == 0xFBC && INSNT1(7,6) == BITS2(1,0)) {
   10626            regN   = INSNT0(3,0);
   10627            regDHi = INSNT1(11,8);
   10628            regM   = INSNT1(3,0);
   10629            regDLo = INSNT1(15,12);
   10630            bitM   = INSNT1(4,4);
   10631            bitN   = INSNT1(5,5);
   10632            if (!isBadRegT(regDHi) && !isBadRegT(regN) && !isBadRegT(regM)
   10633                && !isBadRegT(regDLo) && regDHi != regDLo)
   10634               gate = True;
   10635         }
   10636      } else {
   10637         if (INSNA(27,20) == BITS8(0,0,0,1,0,1,0,0) &&
   10638             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(1,0,0,0)) {
   10639            regDHi = INSNA(19,16);
   10640            regN   = INSNA(3,0);
   10641            regM   = INSNA(11,8);
   10642            regDLo = INSNA(15,12);
   10643            bitM   = INSNA(6,6);
   10644            bitN   = INSNA(5,5);
   10645            if (regDHi != 15 && regN != 15 && regM != 15 && regDLo != 15 &&
   10646                regDHi != regDLo)
   10647               gate = True;
   10648         }
   10649      }
   10650 
   10651      if (gate) {
   10652         IRTemp irt_regD  = newTemp(Ity_I64);
   10653         IRTemp irt_prod  = newTemp(Ity_I64);
   10654         IRTemp irt_res   = newTemp(Ity_I64);
   10655         IRTemp irt_resHi = newTemp(Ity_I32);
   10656         IRTemp irt_resLo = newTemp(Ity_I32);
   10657 
   10658         assign( irt_prod,
   10659                 binop(Iop_MullS32,
   10660                       binop(Iop_Sar32,
   10661                             binop(Iop_Shl32,
   10662                                   isT ? getIRegT(regN) : getIRegA(regN),
   10663                                   mkU8(bitN ? 0 : 16)),
   10664                             mkU8(16)),
   10665                       binop(Iop_Sar32,
   10666                             binop(Iop_Shl32,
   10667                                   isT ? getIRegT(regM) : getIRegA(regM),
   10668                                   mkU8(bitM ? 0 : 16)),
   10669                             mkU8(16))) );
   10670 
   10671         assign( irt_regD, binop(Iop_32HLto64,
   10672                                 isT ? getIRegT(regDHi) : getIRegA(regDHi),
   10673                                 isT ? getIRegT(regDLo) : getIRegA(regDLo)) );
   10674         assign( irt_res, binop(Iop_Add64, mkexpr(irt_regD), mkexpr(irt_prod)) );
   10675         assign( irt_resHi, unop(Iop_64HIto32, mkexpr(irt_res)) );
   10676         assign( irt_resLo, unop(Iop_64to32, mkexpr(irt_res)) );
   10677 
   10678         if (isT) {
   10679            putIRegT( regDHi, mkexpr(irt_resHi), condT );
   10680            putIRegT( regDLo, mkexpr(irt_resLo), condT );
   10681         } else {
   10682            putIRegA( regDHi, mkexpr(irt_resHi), condT, Ijk_Boring );
   10683            putIRegA( regDLo, mkexpr(irt_resLo), condT, Ijk_Boring );
   10684         }
   10685 
   10686         DIP( "smlal%c%c%s r%u, r%u, r%u, r%u\n",
   10687              bitN ? 't' : 'b', bitM ? 't' : 'b',
   10688              nCC(conq), regDHi, regN, regM, regDLo );
   10689         return True;
   10690      }
   10691      /* fall through */
   10692    }
   10693 
   10694    /* ----- smlawb, smlawt <Rd>,<Rn>,<Rm>,<Ra> ----- */
   10695    {
   10696      UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
   10697      Bool gate = False;
   10698 
   10699      if (isT) {
   10700         if (INSNT0(15,4) == 0xFB3 && INSNT1(7,5) == BITS3(0,0,0)) {
   10701            regN = INSNT0(3,0);
   10702            regD = INSNT1(11,8);
   10703            regM = INSNT1(3,0);
   10704            regA = INSNT1(15,12);
   10705            bitM = INSNT1(4,4);
   10706            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
   10707                && !isBadRegT(regA))
   10708               gate = True;
   10709         }
   10710      } else {
   10711         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
   10712             (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,0,0)) {
   10713            regD = INSNA(19,16);
   10714            regN = INSNA(3,0);
   10715            regM = INSNA(11,8);
   10716            regA = INSNA(15,12);
   10717            bitM = INSNA(6,6);
   10718            if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
   10719               gate = True;
   10720         }
   10721      }
   10722 
   10723      if (gate) {
   10724         IRTemp irt_regA = newTemp(Ity_I32);
   10725         IRTemp irt_prod = newTemp(Ity_I64);
   10726 
   10727         assign( irt_prod,
   10728                 binop(Iop_MullS32,
   10729                       isT ? getIRegT(regN) : getIRegA(regN),
   10730                       binop(Iop_Sar32,
   10731                             binop(Iop_Shl32,
   10732                                   isT ? getIRegT(regM) : getIRegA(regM),
   10733                                   mkU8(bitM ? 0 : 16)),
   10734                             mkU8(16))) );
   10735 
   10736         assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
   10737 
   10738         IRTemp prod32 = newTemp(Ity_I32);
   10739         assign(prod32,
   10740                binop(Iop_Or32,
   10741                      binop(Iop_Shl32, unop(Iop_64HIto32, mkexpr(irt_prod)), mkU8(16)),
   10742                      binop(Iop_Shr32, unop(Iop_64to32, mkexpr(irt_prod)), mkU8(16))
   10743         ));
   10744 
   10745         IRExpr* ire_result = binop(Iop_Add32, mkexpr(prod32), mkexpr(irt_regA));
   10746 
   10747         if (isT)
   10748            putIRegT( regD, ire_result, condT );
   10749         else
   10750            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10751 
   10752         or_into_QFLAG32(
   10753            signed_overflow_after_Add32( ire_result, prod32, irt_regA ),
   10754            condT
   10755         );
   10756 
   10757         DIP( "smlaw%c%s r%u, r%u, r%u, r%u\n",
   10758              bitM ? 't' : 'b',
   10759              nCC(conq), regD, regN, regM, regA );
   10760         return True;
   10761      }
   10762      /* fall through */
   10763    }
   10764 
   10765    /* ------------------- sel<c> <Rd>,<Rn>,<Rm> -------------------- */
   10766    /* fixme: fix up the test in v6media.c so that we can pass the ge
   10767       flags as part of the test. */
   10768    {
   10769      UInt regD = 99, regN = 99, regM = 99;
   10770      Bool gate = False;
   10771 
   10772      if (isT) {
   10773         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
   10774            regN = INSNT0(3,0);
   10775            regD = INSNT1(11,8);
   10776            regM = INSNT1(3,0);
   10777            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10778               gate = True;
   10779         }
   10780      } else {
   10781         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
   10782             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10783             INSNA(7,4)   == BITS4(1,0,1,1)) {
   10784            regD = INSNA(15,12);
   10785            regN = INSNA(19,16);
   10786            regM = INSNA(3,0);
   10787            if (regD != 15 && regN != 15 && regM != 15)
   10788               gate = True;
   10789         }
   10790      }
   10791 
   10792      if (gate) {
   10793         IRTemp irt_ge_flag0 = newTemp(Ity_I32);
   10794         IRTemp irt_ge_flag1 = newTemp(Ity_I32);
   10795         IRTemp irt_ge_flag2 = newTemp(Ity_I32);
   10796         IRTemp irt_ge_flag3 = newTemp(Ity_I32);
   10797 
   10798         assign( irt_ge_flag0, get_GEFLAG32(0) );
   10799         assign( irt_ge_flag1, get_GEFLAG32(1) );
   10800         assign( irt_ge_flag2, get_GEFLAG32(2) );
   10801         assign( irt_ge_flag3, get_GEFLAG32(3) );
   10802 
   10803         IRExpr* ire_ge_flag0_or
   10804           = binop(Iop_Or32, mkexpr(irt_ge_flag0),
   10805                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag0)));
   10806         IRExpr* ire_ge_flag1_or
   10807           = binop(Iop_Or32, mkexpr(irt_ge_flag1),
   10808                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag1)));
   10809         IRExpr* ire_ge_flag2_or
   10810           = binop(Iop_Or32, mkexpr(irt_ge_flag2),
   10811                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag2)));
   10812         IRExpr* ire_ge_flag3_or
   10813           = binop(Iop_Or32, mkexpr(irt_ge_flag3),
   10814                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag3)));
   10815 
   10816         IRExpr* ire_ge_flags
   10817           = binop( Iop_Or32,
   10818                    binop(Iop_Or32,
   10819                          binop(Iop_And32,
   10820                                binop(Iop_Sar32, ire_ge_flag0_or, mkU8(31)),
   10821                                mkU32(0x000000ff)),
   10822                          binop(Iop_And32,
   10823                                binop(Iop_Sar32, ire_ge_flag1_or, mkU8(31)),
   10824                                mkU32(0x0000ff00))),
   10825                    binop(Iop_Or32,
   10826                          binop(Iop_And32,
   10827                                binop(Iop_Sar32, ire_ge_flag2_or, mkU8(31)),
   10828                                mkU32(0x00ff0000)),
   10829                          binop(Iop_And32,
   10830                                binop(Iop_Sar32, ire_ge_flag3_or, mkU8(31)),
   10831                                mkU32(0xff000000))) );
   10832 
   10833         IRExpr* ire_result
   10834           = binop(Iop_Or32,
   10835                   binop(Iop_And32,
   10836                         isT ? getIRegT(regN) : getIRegA(regN),
   10837                         ire_ge_flags ),
   10838                   binop(Iop_And32,
   10839                         isT ? getIRegT(regM) : getIRegA(regM),
   10840                         unop(Iop_Not32, ire_ge_flags)));
   10841 
   10842         if (isT)
   10843            putIRegT( regD, ire_result, condT );
   10844         else
   10845            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10846 
   10847         DIP("sel%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   10848         return True;
   10849      }
   10850      /* fall through */
   10851    }
   10852 
   10853    /* ----------------- uxtab16<c> Rd,Rn,Rm{,rot} ------------------ */
   10854    {
   10855      UInt regD = 99, regN = 99, regM = 99, rotate = 99;
   10856      Bool gate = False;
   10857 
   10858      if (isT) {
   10859         if (INSNT0(15,4) == 0xFA3 && (INSNT1(15,0) & 0xF0C0) == 0xF080) {
   10860            regN   = INSNT0(3,0);
   10861            regD   = INSNT1(11,8);
   10862            regM   = INSNT1(3,0);
   10863            rotate = INSNT1(5,4);
   10864            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10865               gate = True;
   10866         }
   10867      } else {
   10868         if (INSNA(27,20) == BITS8(0,1,1,0,1,1,0,0) &&
   10869             INSNA(9,4)   == BITS6(0,0,0,1,1,1) ) {
   10870            regD   = INSNA(15,12);
   10871            regN   = INSNA(19,16);
   10872            regM   = INSNA(3,0);
   10873            rotate = INSNA(11,10);
   10874            if (regD != 15 && regN != 15 && regM != 15)
   10875              gate = True;
   10876         }
   10877      }
   10878 
   10879      if (gate) {
   10880         IRTemp irt_regN = newTemp(Ity_I32);
   10881         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   10882 
   10883         IRTemp irt_regM = newTemp(Ity_I32);
   10884         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   10885 
   10886         IRTemp irt_rot = newTemp(Ity_I32);
   10887         assign( irt_rot, binop(Iop_And32,
   10888                                genROR32(irt_regM, 8 * rotate),
   10889                                mkU32(0x00FF00FF)) );
   10890 
   10891         IRExpr* resLo
   10892            = binop(Iop_And32,
   10893                    binop(Iop_Add32, mkexpr(irt_regN), mkexpr(irt_rot)),
   10894                    mkU32(0x0000FFFF));
   10895 
   10896         IRExpr* resHi
   10897            = binop(Iop_Add32,
   10898                    binop(Iop_And32, mkexpr(irt_regN), mkU32(0xFFFF0000)),
   10899                    binop(Iop_And32, mkexpr(irt_rot),  mkU32(0xFFFF0000)));
   10900 
   10901         IRExpr* ire_result
   10902            = binop( Iop_Or32, resHi, resLo );
   10903 
   10904         if (isT)
   10905            putIRegT( regD, ire_result, condT );
   10906         else
   10907            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10908 
   10909         DIP( "uxtab16%s r%u, r%u, r%u, ROR #%u\n",
   10910              nCC(conq), regD, regN, regM, 8 * rotate );
   10911         return True;
   10912      }
   10913      /* fall through */
   10914    }
   10915 
   10916    /* --------------- usad8  Rd,Rn,Rm    ---------------- */
   10917    /* --------------- usada8 Rd,Rn,Rm,Ra ---------------- */
   10918    {
   10919      UInt rD = 99, rN = 99, rM = 99, rA = 99;
   10920      Bool gate = False;
   10921 
   10922      if (isT) {
   10923        if (INSNT0(15,4) == 0xFB7 && INSNT1(7,4) == BITS4(0,0,0,0)) {
   10924            rN = INSNT0(3,0);
   10925            rA = INSNT1(15,12);
   10926            rD = INSNT1(11,8);
   10927            rM = INSNT1(3,0);
   10928            if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && rA != 13)
   10929               gate = True;
   10930         }
   10931      } else {
   10932         if (INSNA(27,20) == BITS8(0,1,1,1,1,0,0,0) &&
   10933             INSNA(7,4)   == BITS4(0,0,0,1) ) {
   10934            rD = INSNA(19,16);
   10935            rA = INSNA(15,12);
   10936            rM = INSNA(11,8);
   10937            rN = INSNA(3,0);
   10938            if (rD != 15 && rN != 15 && rM != 15 /* but rA can be 15 */)
   10939               gate = True;
   10940         }
   10941      }
   10942      /* We allow rA == 15, to denote the usad8 (no accumulator) case. */
   10943 
   10944      if (gate) {
   10945         IRExpr* rNe = isT ? getIRegT(rN) : getIRegA(rN);
   10946         IRExpr* rMe = isT ? getIRegT(rM) : getIRegA(rM);
   10947         IRExpr* rAe = rA == 15 ? mkU32(0)
   10948                                : (isT ? getIRegT(rA) : getIRegA(rA));
   10949         IRExpr* res = binop(Iop_Add32,
   10950                             binop(Iop_Sad8Ux4, rNe, rMe),
   10951                             rAe);
   10952         if (isT)
   10953            putIRegT( rD, res, condT );
   10954         else
   10955            putIRegA( rD, res, condT, Ijk_Boring );
   10956 
   10957         if (rA == 15) {
   10958            DIP( "usad8%s r%u, r%u, r%u\n",
   10959                 nCC(conq), rD, rN, rM );
   10960         } else {
   10961            DIP( "usada8%s r%u, r%u, r%u, r%u\n",
   10962                 nCC(conq), rD, rN, rM, rA );
   10963         }
   10964         return True;
   10965      }
   10966      /* fall through */
   10967    }
   10968 
   10969    /* ------------------ qadd<c> <Rd>,<Rn>,<Rm> ------------------- */
   10970    {
   10971      UInt regD = 99, regN = 99, regM = 99;
   10972      Bool gate = False;
   10973 
   10974      if (isT) {
   10975         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
   10976            regN = INSNT0(3,0);
   10977            regD = INSNT1(11,8);
   10978            regM = INSNT1(3,0);
   10979            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10980               gate = True;
   10981         }
   10982      } else {
   10983         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
   10984             INSNA(11,8)  == BITS4(0,0,0,0)         &&
   10985             INSNA(7,4)   == BITS4(0,1,0,1)) {
   10986            regD = INSNA(15,12);
   10987            regN = INSNA(19,16);
   10988            regM = INSNA(3,0);
   10989            if (regD != 15 && regN != 15 && regM != 15)
   10990               gate = True;
   10991         }
   10992      }
   10993 
   10994      if (gate) {
   10995         IRTemp rNt   = newTemp(Ity_I32);
   10996         IRTemp rMt   = newTemp(Ity_I32);
   10997         IRTemp res_q = newTemp(Ity_I32);
   10998 
   10999         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11000         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11001 
   11002         assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rNt)));
   11003         if (isT)
   11004            putIRegT( regD, mkexpr(res_q), condT );
   11005         else
   11006            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11007 
   11008         or_into_QFLAG32(
   11009            signed_overflow_after_Add32(
   11010               binop(Iop_Add32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
   11011            condT
   11012         );
   11013 
   11014         DIP("qadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
   11015         return True;
   11016      }
   11017      /* fall through */
   11018    }
   11019 
   11020    /* ------------------ qdadd<c> <Rd>,<Rm>,<Rn> ------------------- */
   11021    {
   11022      UInt regD = 99, regN = 99, regM = 99;
   11023      Bool gate = False;
   11024 
   11025      if (isT) {
   11026         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF090) {
   11027            regN = INSNT0(3,0);
   11028            regD = INSNT1(11,8);
   11029            regM = INSNT1(3,0);
   11030            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11031               gate = True;
   11032         }
   11033      } else {
   11034         if (INSNA(27,20) == BITS8(0,0,0,1,0,1,0,0) &&
   11035             INSNA(11,8)  == BITS4(0,0,0,0)         &&
   11036             INSNA(7,4)   == BITS4(0,1,0,1)) {
   11037            regD = INSNA(15,12);
   11038            regN = INSNA(19,16);
   11039            regM = INSNA(3,0);
   11040            if (regD != 15 && regN != 15 && regM != 15)
   11041               gate = True;
   11042         }
   11043      }
   11044 
   11045      if (gate) {
   11046         IRTemp rNt   = newTemp(Ity_I32);
   11047         IRTemp rMt   = newTemp(Ity_I32);
   11048         IRTemp rN_d  = newTemp(Ity_I32);
   11049         IRTemp res_q = newTemp(Ity_I32);
   11050 
   11051         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11052         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11053 
   11054         or_into_QFLAG32(
   11055            signed_overflow_after_Add32(
   11056               binop(Iop_Add32, mkexpr(rNt), mkexpr(rNt)), rNt, rNt),
   11057            condT
   11058         );
   11059 
   11060         assign(rN_d,  binop(Iop_QAdd32S, mkexpr(rNt), mkexpr(rNt)));
   11061         assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rN_d)));
   11062         if (isT)
   11063            putIRegT( regD, mkexpr(res_q), condT );
   11064         else
   11065            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11066 
   11067         or_into_QFLAG32(
   11068            signed_overflow_after_Add32(
   11069               binop(Iop_Add32, mkexpr(rMt), mkexpr(rN_d)), rMt, rN_d),
   11070            condT
   11071         );
   11072 
   11073         DIP("qdadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
   11074         return True;
   11075      }
   11076      /* fall through */
   11077    }
   11078 
   11079    /* ------------------ qsub<c> <Rd>,<Rn>,<Rm> ------------------- */
   11080    {
   11081      UInt regD = 99, regN = 99, regM = 99;
   11082      Bool gate = False;
   11083 
   11084      if (isT) {
   11085         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0A0) {
   11086            regN = INSNT0(3,0);
   11087            regD = INSNT1(11,8);
   11088            regM = INSNT1(3,0);
   11089            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11090               gate = True;
   11091         }
   11092      } else {
   11093         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
   11094             INSNA(11,8)  == BITS4(0,0,0,0)         &&
   11095             INSNA(7,4)   == BITS4(0,1,0,1)) {
   11096            regD = INSNA(15,12);
   11097            regN = INSNA(19,16);
   11098            regM = INSNA(3,0);
   11099            if (regD != 15 && regN != 15 && regM != 15)
   11100               gate = True;
   11101         }
   11102      }
   11103 
   11104      if (gate) {
   11105         IRTemp rNt   = newTemp(Ity_I32);
   11106         IRTemp rMt   = newTemp(Ity_I32);
   11107         IRTemp res_q = newTemp(Ity_I32);
   11108 
   11109         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11110         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11111 
   11112         assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rNt)));
   11113         if (isT)
   11114            putIRegT( regD, mkexpr(res_q), condT );
   11115         else
   11116            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11117 
   11118         or_into_QFLAG32(
   11119            signed_overflow_after_Sub32(
   11120               binop(Iop_Sub32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
   11121            condT
   11122         );
   11123 
   11124         DIP("qsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
   11125         return True;
   11126      }
   11127      /* fall through */
   11128    }
   11129 
   11130    /* ------------------ qdsub<c> <Rd>,<Rm>,<Rn> ------------------- */
   11131    {
   11132      UInt regD = 99, regN = 99, regM = 99;
   11133      Bool gate = False;
   11134 
   11135      if (isT) {
   11136         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0B0) {
   11137            regN = INSNT0(3,0);
   11138            regD = INSNT1(11,8);
   11139            regM = INSNT1(3,0);
   11140            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11141               gate = True;
   11142         }
   11143      } else {
   11144         if (INSNA(27,20) == BITS8(0,0,0,1,0,1,1,0) &&
   11145             INSNA(11,8)  == BITS4(0,0,0,0)         &&
   11146             INSNA(7,4)   == BITS4(0,1,0,1)) {
   11147            regD = INSNA(15,12);
   11148            regN = INSNA(19,16);
   11149            regM = INSNA(3,0);
   11150            if (regD != 15 && regN != 15 && regM != 15)
   11151               gate = True;
   11152         }
   11153      }
   11154 
   11155      if (gate) {
   11156         IRTemp rNt   = newTemp(Ity_I32);
   11157         IRTemp rMt   = newTemp(Ity_I32);
   11158         IRTemp rN_d  = newTemp(Ity_I32);
   11159         IRTemp res_q = newTemp(Ity_I32);
   11160 
   11161         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11162         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11163 
   11164         or_into_QFLAG32(
   11165            signed_overflow_after_Add32(
   11166               binop(Iop_Add32, mkexpr(rNt), mkexpr(rNt)), rNt, rNt),
   11167            condT
   11168         );
   11169 
   11170         assign(rN_d,  binop(Iop_QAdd32S, mkexpr(rNt), mkexpr(rNt)));
   11171         assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rN_d)));
   11172         if (isT)
   11173            putIRegT( regD, mkexpr(res_q), condT );
   11174         else
   11175            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11176 
   11177         or_into_QFLAG32(
   11178            signed_overflow_after_Sub32(
   11179               binop(Iop_Sub32, mkexpr(rMt), mkexpr(rN_d)), rMt, rN_d),
   11180            condT
   11181         );
   11182 
   11183         DIP("qdsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
   11184         return True;
   11185      }
   11186      /* fall through */
   11187    }
   11188 
   11189    /* ------------------ uqsub16<c> <Rd>,<Rn>,<Rm> ------------------ */
   11190    {
   11191      UInt regD = 99, regN = 99, regM = 99;
   11192      Bool gate = False;
   11193 
   11194      if (isT) {
   11195         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
   11196            regN = INSNT0(3,0);
   11197            regD = INSNT1(11,8);
   11198            regM = INSNT1(3,0);
   11199            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11200               gate = True;
   11201         }
   11202      } else {
   11203         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
   11204             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11205             INSNA(7,4)   == BITS4(0,1,1,1)) {
   11206            regD = INSNA(15,12);
   11207            regN = INSNA(19,16);
   11208            regM = INSNA(3,0);
   11209            if (regD != 15 && regN != 15 && regM != 15)
   11210              gate = True;
   11211         }
   11212      }
   11213 
   11214      if (gate) {
   11215         IRTemp rNt   = newTemp(Ity_I32);
   11216         IRTemp rMt   = newTemp(Ity_I32);
   11217         IRTemp res_q = newTemp(Ity_I32);
   11218 
   11219         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11220         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11221 
   11222         assign(res_q, binop(Iop_QSub16Ux2, mkexpr(rNt), mkexpr(rMt)));
   11223         if (isT)
   11224            putIRegT( regD, mkexpr(res_q), condT );
   11225         else
   11226            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11227 
   11228         DIP("uqsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   11229         return True;
   11230      }
   11231      /* fall through */
   11232    }
   11233 
   11234    /* ----------------- shadd16<c> <Rd>,<Rn>,<Rm> ------------------- */
   11235    {
   11236      UInt regD = 99, regN = 99, regM = 99;
   11237      Bool gate = False;
   11238 
   11239      if (isT) {
   11240         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
   11241            regN = INSNT0(3,0);
   11242            regD = INSNT1(11,8);
   11243            regM = INSNT1(3,0);
   11244            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11245               gate = True;
   11246         }
   11247      } else {
   11248         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
   11249             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11250             INSNA(7,4)   == BITS4(0,0,0,1)) {
   11251            regD = INSNA(15,12);
   11252            regN = INSNA(19,16);
   11253            regM = INSNA(3,0);
   11254            if (regD != 15 && regN != 15 && regM != 15)
   11255               gate = True;
   11256         }
   11257      }
   11258 
   11259      if (gate) {
   11260         IRTemp rNt   = newTemp(Ity_I32);
   11261         IRTemp rMt   = newTemp(Ity_I32);
   11262         IRTemp res_q = newTemp(Ity_I32);
   11263 
   11264         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11265         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11266 
   11267         assign(res_q, binop(Iop_HAdd16Sx2, mkexpr(rNt), mkexpr(rMt)));
   11268         if (isT)
   11269            putIRegT( regD, mkexpr(res_q), condT );
   11270         else
   11271            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11272 
   11273         DIP("shadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   11274         return True;
   11275      }
   11276      /* fall through */
   11277    }
   11278 
   11279    /* ----------------- uhsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
   11280    {
   11281      UInt regD = 99, regN = 99, regM = 99;
   11282      Bool gate = False;
   11283 
   11284      if (isT) {
   11285         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
   11286            regN = INSNT0(3,0);
   11287            regD = INSNT1(11,8);
   11288            regM = INSNT1(3,0);
   11289            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11290               gate = True;
   11291         }
   11292      } else {
   11293         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
   11294             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11295             INSNA(7,4)   == BITS4(1,1,1,1)) {
   11296            regD = INSNA(15,12);
   11297            regN = INSNA(19,16);
   11298            regM = INSNA(3,0);
   11299            if (regD != 15 && regN != 15 && regM != 15)
   11300               gate = True;
   11301         }
   11302      }
   11303 
   11304      if (gate) {
   11305         IRTemp rNt   = newTemp(Ity_I32);
   11306         IRTemp rMt   = newTemp(Ity_I32);
   11307         IRTemp res_q = newTemp(Ity_I32);
   11308 
   11309         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11310         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11311 
   11312         assign(res_q, binop(Iop_HSub8Ux4, mkexpr(rNt), mkexpr(rMt)));
   11313         if (isT)
   11314            putIRegT( regD, mkexpr(res_q), condT );
   11315         else
   11316            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11317 
   11318         DIP("uhsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   11319         return True;
   11320      }
   11321      /* fall through */
   11322    }
   11323 
   11324    /* ----------------- uhsub16<c> <Rd>,<Rn>,<Rm> ------------------- */
   11325    {
   11326      UInt regD = 99, regN = 99, regM = 99;
   11327      Bool gate = False;
   11328 
   11329      if (isT) {
   11330         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
   11331            regN = INSNT0(3,0);
   11332            regD = INSNT1(11,8);
   11333            regM = INSNT1(3,0);
   11334            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11335               gate = True;
   11336         }
   11337      } else {
   11338         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
   11339             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11340             INSNA(7,4)   == BITS4(0,1,1,1)) {
   11341            regD = INSNA(15,12);
   11342            regN = INSNA(19,16);
   11343            regM = INSNA(3,0);
   11344            if (regD != 15 && regN != 15 && regM != 15)
   11345               gate = True;
   11346         }
   11347      }
   11348 
   11349      if (gate) {
   11350         IRTemp rNt   = newTemp(Ity_I32);
   11351         IRTemp rMt   = newTemp(Ity_I32);
   11352         IRTemp res_q = newTemp(Ity_I32);
   11353 
   11354         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11355         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11356 
   11357         assign(res_q, binop(Iop_HSub16Ux2, mkexpr(rNt), mkexpr(rMt)));
   11358         if (isT)
   11359            putIRegT( regD, mkexpr(res_q), condT );
   11360         else
   11361            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11362 
   11363         DIP("uhsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   11364         return True;
   11365      }
   11366      /* fall through */
   11367    }
   11368 
   11369    /* ------------------ uqadd16<c> <Rd>,<Rn>,<Rm> ------------------ */
   11370    {
   11371      UInt regD = 99, regN = 99, regM = 99;
   11372      Bool gate = False;
   11373 
   11374      if (isT) {
   11375         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
   11376            regN = INSNT0(3,0);
   11377            regD = INSNT1(11,8);
   11378            regM = INSNT1(3,0);
   11379            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11380               gate = True;
   11381         }
   11382      } else {
   11383         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
   11384             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11385             INSNA(7,4)   == BITS4(0,0,0,1)) {
   11386            regD = INSNA(15,12);
   11387            regN = INSNA(19,16);
   11388            regM = INSNA(3,0);
   11389            if (regD != 15 && regN != 15 && regM != 15)
   11390               gate = True;
   11391         }
   11392      }
   11393 
   11394      if (gate) {
   11395         IRTemp rNt   = newTemp(Ity_I32);
   11396         IRTemp rMt   = newTemp(Ity_I32);
   11397         IRTemp res_q = newTemp(Ity_I32);
   11398 
   11399         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11400         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11401 
   11402         assign(res_q, binop(Iop_QAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
   11403         if (isT)
   11404            putIRegT( regD, mkexpr(res_q), condT );
   11405         else
   11406            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11407 
   11408         DIP("uqadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   11409         return True;
   11410      }
   11411      /* fall through */
   11412    }
   11413 
   11414    /* ------------------- uqsax<c> <Rd>,<Rn>,<Rm> ------------------- */
   11415    {
   11416      UInt regD = 99, regN = 99, regM = 99;
   11417      Bool gate = False;
   11418 
   11419      if (isT) {
   11420         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
   11421            regN = INSNT0(3,0);
   11422            regD = INSNT1(11,8);
   11423            regM = INSNT1(3,0);
   11424            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11425               gate = True;
   11426         }
   11427      } else {
   11428         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
   11429             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11430             INSNA(7,4)   == BITS4(0,1,0,1)) {
   11431            regD = INSNA(15,12);
   11432            regN = INSNA(19,16);
   11433            regM = INSNA(3,0);
   11434            if (regD != 15 && regN != 15 && regM != 15)
   11435               gate = True;
   11436         }
   11437      }
   11438 
   11439      if (gate) {
   11440         IRTemp irt_regN     = newTemp(Ity_I32);
   11441         IRTemp irt_regM     = newTemp(Ity_I32);
   11442         IRTemp irt_sum      = newTemp(Ity_I32);
   11443         IRTemp irt_diff     = newTemp(Ity_I32);
   11444         IRTemp irt_sum_res  = newTemp(Ity_I32);
   11445         IRTemp irt_diff_res = newTemp(Ity_I32);
   11446 
   11447         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   11448         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   11449 
   11450         assign( irt_diff,
   11451                 binop( Iop_Sub32,
   11452                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
   11453                        binop( Iop_Shr32,
   11454                               binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
   11455                               mkU8(16) ) ) );
   11456         armUnsignedSatQ( &irt_diff_res, NULL, irt_diff, 0x10);
   11457 
   11458         assign( irt_sum,
   11459                 binop( Iop_Add32,
   11460                        binop( Iop_Shr32,
   11461                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   11462                               mkU8(16) ),
   11463                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) )) );
   11464         armUnsignedSatQ( &irt_sum_res, NULL, irt_sum, 0x10 );
   11465 
   11466         IRExpr* ire_result = binop( Iop_Or32,
   11467                                     binop( Iop_Shl32, mkexpr(irt_diff_res),
   11468                                            mkU8(16) ),
   11469                                     binop( Iop_And32, mkexpr(irt_sum_res),
   11470                                            mkU32(0xFFFF)) );
   11471 
   11472         if (isT)
   11473            putIRegT( regD, ire_result, condT );
   11474         else
   11475            putIRegA( regD, ire_result, condT, Ijk_Boring );
   11476 
   11477         DIP( "uqsax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   11478         return True;
   11479      }
   11480      /* fall through */
   11481    }
   11482 
   11483    /* ------------------- uqasx<c> <Rd>,<Rn>,<Rm> ------------------- */
   11484    {
   11485      UInt regD = 99, regN = 99, regM = 99;
   11486      Bool gate = False;
   11487 
   11488      if (isT) {
   11489         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
   11490            regN = INSNT0(3,0);
   11491            regD = INSNT1(11,8);
   11492            regM = INSNT1(3,0);
   11493            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11494               gate = True;
   11495         }
   11496      } else {
   11497         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
   11498             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11499             INSNA(7,4)   == BITS4(0,0,1,1)) {
   11500            regD = INSNA(15,12);
   11501            regN = INSNA(19,16);
   11502            regM = INSNA(3,0);
   11503            if (regD != 15 && regN != 15 && regM != 15)
   11504               gate = True;
   11505         }
   11506      }
   11507 
   11508      if (gate) {
   11509         IRTemp irt_regN     = newTemp(Ity_I32);
   11510         IRTemp irt_regM     = newTemp(Ity_I32);
   11511         IRTemp irt_sum      = newTemp(Ity_I32);
   11512         IRTemp irt_diff     = newTemp(Ity_I32);
   11513         IRTemp irt_res_sum  = newTemp(Ity_I32);
   11514         IRTemp irt_res_diff = newTemp(Ity_I32);
   11515 
   11516         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   11517         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   11518 
   11519         assign( irt_diff,
   11520                 binop( Iop_Sub32,
   11521                        binop( Iop_Shr32,
   11522                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   11523                               mkU8(16) ),
   11524                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
   11525         armUnsignedSatQ( &irt_res_diff, NULL, irt_diff, 0x10 );
   11526 
   11527         assign( irt_sum,
   11528                 binop( Iop_Add32,
   11529                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
   11530                        binop( Iop_Shr32,
   11531                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
   11532                               mkU8(16) ) ) );
   11533         armUnsignedSatQ( &irt_res_sum, NULL, irt_sum, 0x10 );
   11534 
   11535         IRExpr* ire_result
   11536           = binop( Iop_Or32,
   11537                    binop( Iop_Shl32, mkexpr(irt_res_sum), mkU8(16) ),
   11538                    binop( Iop_And32, mkexpr(irt_res_diff), mkU32(0xFFFF) ) );
   11539 
   11540         if (isT)
   11541            putIRegT( regD, ire_result, condT );
   11542         else
   11543            putIRegA( regD, ire_result, condT, Ijk_Boring );
   11544 
   11545         DIP( "uqasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   11546         return True;
   11547      }
   11548      /* fall through */
   11549    }
   11550 
   11551    /* ------------------- usax<c> <Rd>,<Rn>,<Rm> ------------------- */
   11552    {
   11553      UInt regD = 99, regN = 99, regM = 99;
   11554      Bool gate = False;
   11555 
   11556      if (isT) {
   11557         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
   11558            regN = INSNT0(3,0);
   11559            regD = INSNT1(11,8);
   11560            regM = INSNT1(3,0);
   11561            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11562               gate = True;
   11563         }
   11564      } else {
   11565         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
   11566             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11567             INSNA(7,4)   == BITS4(0,1,0,1)) {
   11568            regD = INSNA(15,12);
   11569            regN = INSNA(19,16);
   11570            regM = INSNA(3,0);
   11571            if (regD != 15 && regN != 15 && regM != 15)
   11572               gate = True;
   11573         }
   11574      }
   11575 
   11576      if (gate) {
   11577         IRTemp irt_regN = newTemp(Ity_I32);
   11578         IRTemp irt_regM = newTemp(Ity_I32);
   11579         IRTemp irt_sum  = newTemp(Ity_I32);
   11580         IRTemp irt_diff = newTemp(Ity_I32);
   11581 
   11582         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   11583         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   11584 
   11585         assign( irt_sum,
   11586                 binop( Iop_Add32,
   11587                        unop( Iop_16Uto32,
   11588                              unop( Iop_32to16, mkexpr(irt_regN) )
   11589                        ),
   11590                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
   11591 
   11592         assign( irt_diff,
   11593                 binop( Iop_Sub32,
   11594                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
   11595                        unop( Iop_16Uto32,
   11596                              unop( Iop_32to16, mkexpr(irt_regM) )
   11597                        )
   11598                 )
   11599         );
   11600 
   11601         IRExpr* ire_result
   11602           = binop( Iop_Or32,
   11603                    binop( Iop_Shl32, mkexpr(irt_diff), mkU8(16) ),
   11604                    binop( Iop_And32, mkexpr(irt_sum), mkU32(0xFFFF) ) );
   11605 
   11606         IRTemp ge10 = newTemp(Ity_I32);
   11607         assign( ge10, IRExpr_ITE( binop( Iop_CmpLE32U,
   11608                                          mkU32(0x10000), mkexpr(irt_sum) ),
   11609                                   mkU32(1), mkU32(0) ) );
   11610         put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
   11611         put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
   11612 
   11613         IRTemp ge32 = newTemp(Ity_I32);
   11614         assign(ge32, unop(Iop_Not32, mkexpr(irt_diff)));
   11615         put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
   11616         put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
   11617 
   11618         if (isT)
   11619            putIRegT( regD, ire_result, condT );
   11620         else
   11621            putIRegA( regD, ire_result, condT, Ijk_Boring );
   11622 
   11623         DIP( "usax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   11624         return True;
   11625      }
   11626      /* fall through */
   11627    }
   11628 
   11629    /* ------------------- uasx<c> <Rd>,<Rn>,<Rm> ------------------- */
   11630    {
   11631      UInt regD = 99, regN = 99, regM = 99;
   11632      Bool gate = False;
   11633 
   11634      if (isT) {
   11635         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
   11636            regN = INSNT0(3,0);
   11637            regD = INSNT1(11,8);
   11638            regM = INSNT1(3,0);
   11639            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11640               gate = True;
   11641         }
   11642      } else {
   11643         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
   11644             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11645             INSNA(7,4)   == BITS4(0,0,1,1)) {
   11646            regD = INSNA(15,12);
   11647            regN = INSNA(19,16);
   11648            regM = INSNA(3,0);
   11649            if (regD != 15 && regN != 15 && regM != 15)
   11650               gate = True;
   11651         }
   11652      }
   11653 
   11654      if (gate) {
   11655         IRTemp irt_regN = newTemp(Ity_I32);
   11656         IRTemp irt_regM = newTemp(Ity_I32);
   11657         IRTemp irt_sum  = newTemp(Ity_I32);
   11658         IRTemp irt_diff = newTemp(Ity_I32);
   11659 
   11660         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   11661         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   11662 
   11663         assign( irt_diff,
   11664                 binop( Iop_Sub32,
   11665                        unop( Iop_16Uto32,
   11666                              unop( Iop_32to16, mkexpr(irt_regN) )
   11667                        ),
   11668                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
   11669 
   11670         assign( irt_sum,
   11671                 binop( Iop_Add32,
   11672                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
   11673                        unop( Iop_16Uto32,
   11674                              unop( Iop_32to16, mkexpr(irt_regM) )
   11675                        ) ) );
   11676 
   11677         IRExpr* ire_result
   11678           = binop( Iop_Or32,
   11679                    binop( Iop_Shl32, mkexpr(irt_sum), mkU8(16) ),
   11680                    binop( Iop_And32, mkexpr(irt_diff), mkU32(0xFFFF) ) );
   11681 
   11682         IRTemp ge10 = newTemp(Ity_I32);
   11683         assign(ge10, unop(Iop_Not32, mkexpr(irt_diff)));
   11684         put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
   11685         put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
   11686 
   11687         IRTemp ge32 = newTemp(Ity_I32);
   11688         assign( ge32, IRExpr_ITE( binop( Iop_CmpLE32U,
   11689                                          mkU32(0x10000), mkexpr(irt_sum) ),
   11690                                   mkU32(1), mkU32(0) ) );
   11691         put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
   11692         put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
   11693 
   11694         if (isT)
   11695            putIRegT( regD, ire_result, condT );
   11696         else
   11697            putIRegA( regD, ire_result, condT, Ijk_Boring );
   11698 
   11699         DIP( "uasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   11700         return True;
   11701      }
   11702      /* fall through */
   11703    }
   11704 
   11705    /* ------------------- ssax<c> <Rd>,<Rn>,<Rm> ------------------- */
   11706    {
   11707      UInt regD = 99, regN = 99, regM = 99;
   11708      Bool gate = False;
   11709 
   11710      if (isT) {
   11711         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
   11712            regN = INSNT0(3,0);
   11713            regD = INSNT1(11,8);
   11714            regM = INSNT1(3,0);
   11715            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11716               gate = True;
   11717         }
   11718      } else {
   11719         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
   11720             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11721             INSNA(7,4)   == BITS4(0,1,0,1)) {
   11722            regD = INSNA(15,12);
   11723            regN = INSNA(19,16);
   11724            regM = INSNA(3,0);
   11725            if (regD != 15 && regN != 15 && regM != 15)
   11726               gate = True;
   11727         }
   11728      }
   11729 
   11730      if (gate) {
   11731         IRTemp irt_regN = newTemp(Ity_I32);
   11732         IRTemp irt_regM = newTemp(Ity_I32);
   11733         IRTemp irt_sum  = newTemp(Ity_I32);
   11734         IRTemp irt_diff = newTemp(Ity_I32);
   11735 
   11736         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   11737         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   11738 
   11739         assign( irt_sum,
   11740                 binop( Iop_Add32,
   11741                        binop( Iop_Sar32,
   11742                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   11743                               mkU8(16) ),
   11744                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
   11745 
   11746         assign( irt_diff,
   11747                 binop( Iop_Sub32,
   11748                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
   11749                        binop( Iop_Sar32,
   11750                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
   11751                               mkU8(16) ) ) );
   11752 
   11753         IRExpr* ire_result
   11754           = binop( Iop_Or32,
   11755                    binop( Iop_Shl32, mkexpr(irt_diff), mkU8(16) ),
   11756                    binop( Iop_And32, mkexpr(irt_sum), mkU32(0xFFFF) ) );
   11757 
   11758         IRTemp ge10 = newTemp(Ity_I32);
   11759         assign(ge10, unop(Iop_Not32, mkexpr(irt_sum)));
   11760         put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
   11761         put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
   11762 
   11763         IRTemp ge32 = newTemp(Ity_I32);
   11764         assign(ge32, unop(Iop_Not32, mkexpr(irt_diff)));
   11765         put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
   11766         put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
   11767 
   11768         if (isT)
   11769            putIRegT( regD, ire_result, condT );
   11770         else
   11771            putIRegA( regD, ire_result, condT, Ijk_Boring );
   11772 
   11773         DIP( "ssax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   11774         return True;
   11775      }
   11776      /* fall through */
   11777    }
   11778 
   11779    /* ----------------- shsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
   11780    {
   11781      UInt regD = 99, regN = 99, regM = 99;
   11782      Bool gate = False;
   11783 
   11784      if (isT) {
   11785         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
   11786            regN = INSNT0(3,0);
   11787            regD = INSNT1(11,8);
   11788            regM = INSNT1(3,0);
   11789            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11790               gate = True;
   11791         }
   11792      } else {
   11793         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
   11794             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11795             INSNA(7,4)   == BITS4(1,1,1,1)) {
   11796            regD = INSNA(15,12);
   11797            regN = INSNA(19,16);
   11798            regM = INSNA(3,0);
   11799            if (regD != 15 && regN != 15 && regM != 15)
   11800               gate = True;
   11801         }
   11802      }
   11803 
   11804      if (gate) {
   11805         IRTemp rNt   = newTemp(Ity_I32);
   11806         IRTemp rMt   = newTemp(Ity_I32);
   11807         IRTemp res_q = newTemp(Ity_I32);
   11808 
   11809         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11810         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11811 
   11812         assign(res_q, binop(Iop_HSub8Sx4, mkexpr(rNt), mkexpr(rMt)));
   11813         if (isT)
   11814            putIRegT( regD, mkexpr(res_q), condT );
   11815         else
   11816            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11817 
   11818         DIP("shsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   11819         return True;
   11820      }
   11821      /* fall through */
   11822    }
   11823 
   11824    /* ----------------- sxtab16<c> Rd,Rn,Rm{,rot} ------------------ */
   11825    {
   11826      UInt regD = 99, regN = 99, regM = 99, rotate = 99;
   11827      Bool gate = False;
   11828 
   11829      if (isT) {
   11830         if (INSNT0(15,4) == 0xFA2 && (INSNT1(15,0) & 0xF0C0) == 0xF080) {
   11831            regN   = INSNT0(3,0);
   11832            regD   = INSNT1(11,8);
   11833            regM   = INSNT1(3,0);
   11834            rotate = INSNT1(5,4);
   11835            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11836               gate = True;
   11837         }
   11838      } else {
   11839         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
   11840             INSNA(9,4)   == BITS6(0,0,0,1,1,1) ) {
   11841            regD   = INSNA(15,12);
   11842            regN   = INSNA(19,16);
   11843            regM   = INSNA(3,0);
   11844            rotate = INSNA(11,10);
   11845            if (regD != 15 && regN != 15 && regM != 15)
   11846              gate = True;
   11847         }
   11848      }
   11849 
   11850      if (gate) {
   11851         IRTemp irt_regN = newTemp(Ity_I32);
   11852         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   11853 
   11854         IRTemp irt_regM = newTemp(Ity_I32);
   11855         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   11856 
   11857         IRTemp irt_rot = newTemp(Ity_I32);
   11858         assign( irt_rot, genROR32(irt_regM, 8 * rotate) );
   11859 
   11860         /* FIXME Maybe we can write this arithmetic in shorter form. */
   11861         IRExpr* resLo
   11862            = binop(Iop_And32,
   11863                    binop(Iop_Add32,
   11864                          mkexpr(irt_regN),
   11865                          unop(Iop_16Uto32,
   11866                               unop(Iop_8Sto16,
   11867                                    unop(Iop_32to8, mkexpr(irt_rot))))),
   11868                    mkU32(0x0000FFFF));
   11869 
   11870         IRExpr* resHi
   11871            = binop(Iop_And32,
   11872                    binop(Iop_Add32,
   11873                          mkexpr(irt_regN),
   11874                          binop(Iop_Shl32,
   11875                                unop(Iop_16Uto32,
   11876                                     unop(Iop_8Sto16,
   11877                                          unop(Iop_32to8,
   11878                                               binop(Iop_Shr32,
   11879                                                     mkexpr(irt_rot),
   11880                                                     mkU8(16))))),
   11881                                mkU8(16))),
   11882                    mkU32(0xFFFF0000));
   11883 
   11884         IRExpr* ire_result
   11885            = binop( Iop_Or32, resHi, resLo );
   11886 
   11887         if (isT)
   11888            putIRegT( regD, ire_result, condT );
   11889         else
   11890            putIRegA( regD, ire_result, condT, Ijk_Boring );
   11891 
   11892         DIP( "sxtab16%s r%u, r%u, r%u, ROR #%u\n",
   11893              nCC(conq), regD, regN, regM, 8 * rotate );
   11894         return True;
   11895      }
   11896      /* fall through */
   11897    }
   11898 
   11899    /* ----------------- shasx<c> <Rd>,<Rn>,<Rm> ------------------- */
   11900    {
   11901      UInt regD = 99, regN = 99, regM = 99;
   11902      Bool gate = False;
   11903 
   11904      if (isT) {
   11905         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
   11906            regN = INSNT0(3,0);
   11907            regD = INSNT1(11,8);
   11908            regM = INSNT1(3,0);
   11909            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11910               gate = True;
   11911         }
   11912      } else {
   11913         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
   11914             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11915             INSNA(7,4)   == BITS4(0,0,1,1)) {
   11916            regD = INSNA(15,12);
   11917            regN = INSNA(19,16);
   11918            regM = INSNA(3,0);
   11919            if (regD != 15 && regN != 15 && regM != 15)
   11920               gate = True;
   11921         }
   11922      }
   11923 
   11924      if (gate) {
   11925         IRTemp rNt   = newTemp(Ity_I32);
   11926         IRTemp rMt   = newTemp(Ity_I32);
   11927         IRTemp irt_diff  = newTemp(Ity_I32);
   11928         IRTemp irt_sum   = newTemp(Ity_I32);
   11929         IRTemp res_q = newTemp(Ity_I32);
   11930 
   11931         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11932         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11933 
   11934         assign( irt_diff,
   11935                 binop(Iop_Sub32,
   11936                       unop(Iop_16Sto32,
   11937                            unop(Iop_32to16,
   11938                                 mkexpr(rNt)
   11939                            )
   11940                       ),
   11941                       unop(Iop_16Sto32,
   11942                            unop(Iop_32to16,
   11943                                 binop(Iop_Shr32,
   11944                                       mkexpr(rMt), mkU8(16)
   11945                                 )
   11946                            )
   11947                       )
   11948                 )
   11949         );
   11950 
   11951         assign( irt_sum,
   11952                 binop(Iop_Add32,
   11953                       unop(Iop_16Sto32,
   11954                            unop(Iop_32to16,
   11955                                 binop(Iop_Shr32,
   11956                                       mkexpr(rNt), mkU8(16)
   11957                                 )
   11958                            )
   11959                       ),
   11960                       unop(Iop_16Sto32,
   11961                            unop(Iop_32to16, mkexpr(rMt)
   11962                            )
   11963                       )
   11964                 )
   11965         );
   11966 
   11967         assign( res_q,
   11968                 binop(Iop_Or32,
   11969                       unop(Iop_16Uto32,
   11970                            unop(Iop_32to16,
   11971                                 binop(Iop_Shr32,
   11972                                       mkexpr(irt_diff), mkU8(1)
   11973                                 )
   11974                            )
   11975                       ),
   11976                       binop(Iop_Shl32,
   11977                             binop(Iop_Shr32,
   11978                                   mkexpr(irt_sum), mkU8(1)
   11979                             ),
   11980                             mkU8(16)
   11981                      )
   11982                 )
   11983         );
   11984 
   11985         if (isT)
   11986            putIRegT( regD, mkexpr(res_q), condT );
   11987         else
   11988            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11989 
   11990         DIP("shasx%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   11991         return True;
   11992      }
   11993      /* fall through */
   11994    }
   11995 
   11996    /* ----------------- uhasx<c> <Rd>,<Rn>,<Rm> ------------------- */
   11997    {
   11998      UInt regD = 99, regN = 99, regM = 99;
   11999      Bool gate = False;
   12000 
   12001      if (isT) {
   12002         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
   12003            regN = INSNT0(3,0);
   12004            regD = INSNT1(11,8);
   12005            regM = INSNT1(3,0);
   12006            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   12007               gate = True;
   12008         }
   12009      } else {
   12010         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
   12011             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   12012             INSNA(7,4)   == BITS4(0,0,1,1)) {
   12013            regD = INSNA(15,12);
   12014            regN = INSNA(19,16);
   12015            regM = INSNA(3,0);
   12016            if (regD != 15 && regN != 15 && regM != 15)
   12017               gate = True;
   12018         }
   12019      }
   12020 
   12021      if (gate) {
   12022         IRTemp rNt   = newTemp(Ity_I32);
   12023         IRTemp rMt   = newTemp(Ity_I32);
   12024         IRTemp irt_diff  = newTemp(Ity_I32);
   12025         IRTemp irt_sum   = newTemp(Ity_I32);
   12026         IRTemp res_q = newTemp(Ity_I32);
   12027 
   12028         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   12029         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   12030 
   12031         assign( irt_diff,
   12032                 binop(Iop_Sub32,
   12033                       unop(Iop_16Uto32,
   12034                            unop(Iop_32to16,
   12035                                 mkexpr(rNt)
   12036                            )
   12037                       ),
   12038                       unop(Iop_16Uto32,
   12039                            unop(Iop_32to16,
   12040                                 binop(Iop_Shr32,
   12041                                       mkexpr(rMt), mkU8(16)
   12042                                 )
   12043                            )
   12044                       )
   12045                 )
   12046         );
   12047 
   12048         assign( irt_sum,
   12049                 binop(Iop_Add32,
   12050                       unop(Iop_16Uto32,
   12051                            unop(Iop_32to16,
   12052                                 binop(Iop_Shr32,
   12053                                       mkexpr(rNt), mkU8(16)
   12054                                 )
   12055                            )
   12056                       ),
   12057                       unop(Iop_16Uto32,
   12058                            unop(Iop_32to16, mkexpr(rMt)
   12059                            )
   12060                       )
   12061                 )
   12062         );
   12063 
   12064         assign( res_q,
   12065                 binop(Iop_Or32,
   12066                       unop(Iop_16Uto32,
   12067                            unop(Iop_32to16,
   12068                                 binop(Iop_Shr32,
   12069                                       mkexpr(irt_diff), mkU8(1)
   12070                                 )
   12071                            )
   12072                       ),
   12073                       binop(Iop_Shl32,
   12074                             binop(Iop_Shr32,
   12075                                   mkexpr(irt_sum), mkU8(1)
   12076                             ),
   12077                             mkU8(16)
   12078                      )
   12079                 )
   12080         );
   12081 
   12082         if (isT)
   12083            putIRegT( regD, mkexpr(res_q), condT );
   12084         else
   12085            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   12086 
   12087         DIP("uhasx%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   12088         return True;
   12089      }
   12090      /* fall through */
   12091    }
   12092 
   12093    /* ----------------- shsax<c> <Rd>,<Rn>,<Rm> ------------------- */
   12094    {
   12095      UInt regD = 99, regN = 99, regM = 99;
   12096      Bool gate = False;
   12097 
   12098      if (isT) {
   12099         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
   12100            regN = INSNT0(3,0);
   12101            regD = INSNT1(11,8);
   12102            regM = INSNT1(3,0);
   12103            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   12104               gate = True;
   12105         }
   12106      } else {
   12107         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
   12108             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   12109             INSNA(7,4)   == BITS4(0,1,0,1)) {
   12110            regD = INSNA(15,12);
   12111            regN = INSNA(19,16);
   12112            regM = INSNA(3,0);
   12113            if (regD != 15 && regN != 15 && regM != 15)
   12114               gate = True;
   12115         }
   12116      }
   12117 
   12118      if (gate) {
   12119         IRTemp rNt   = newTemp(Ity_I32);
   12120         IRTemp rMt   = newTemp(Ity_I32);
   12121         IRTemp irt_diff  = newTemp(Ity_I32);
   12122         IRTemp irt_sum   = newTemp(Ity_I32);
   12123         IRTemp res_q = newTemp(Ity_I32);
   12124 
   12125         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   12126         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   12127 
   12128         assign( irt_sum,
   12129                 binop(Iop_Add32,
   12130                       unop(Iop_16Sto32,
   12131                            unop(Iop_32to16,
   12132                                 mkexpr(rNt)
   12133                            )
   12134                       ),
   12135                       unop(Iop_16Sto32,
   12136                            unop(Iop_32to16,
   12137                                 binop(Iop_Shr32,
   12138                                       mkexpr(rMt), mkU8(16)
   12139                                 )
   12140                            )
   12141                       )
   12142                 )
   12143         );
   12144 
   12145         assign( irt_diff,
   12146                 binop(Iop_Sub32,
   12147                       unop(Iop_16Sto32,
   12148                            unop(Iop_32to16,
   12149                                 binop(Iop_Shr32,
   12150                                       mkexpr(rNt), mkU8(16)
   12151                                 )
   12152                            )
   12153                       ),
   12154                       unop(Iop_16Sto32,
   12155                            unop(Iop_32to16, mkexpr(rMt)
   12156                            )
   12157                       )
   12158                 )
   12159         );
   12160 
   12161         assign( res_q,
   12162                 binop(Iop_Or32,
   12163                       unop(Iop_16Uto32,
   12164                            unop(Iop_32to16,
   12165                                 binop(Iop_Shr32,
   12166                                       mkexpr(irt_sum), mkU8(1)
   12167                                 )
   12168                            )
   12169                       ),
   12170                       binop(Iop_Shl32,
   12171                             binop(Iop_Shr32,
   12172                                   mkexpr(irt_diff), mkU8(1)
   12173                             ),
   12174                             mkU8(16)
   12175                      )
   12176                 )
   12177         );
   12178 
   12179         if (isT)
   12180            putIRegT( regD, mkexpr(res_q), condT );
   12181         else
   12182            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   12183 
   12184         DIP("shsax%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   12185         return True;
   12186      }
   12187      /* fall through */
   12188    }
   12189 
   12190    /* ----------------- uhsax<c> <Rd>,<Rn>,<Rm> ------------------- */
   12191    {
   12192      UInt regD = 99, regN = 99, regM = 99;
   12193      Bool gate = False;
   12194 
   12195      if (isT) {
   12196         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
   12197            regN = INSNT0(3,0);
   12198            regD = INSNT1(11,8);
   12199            regM = INSNT1(3,0);
   12200            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   12201               gate = True;
   12202         }
   12203      } else {
   12204         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
   12205             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   12206             INSNA(7,4)   == BITS4(0,1,0,1)) {
   12207            regD = INSNA(15,12);
   12208            regN = INSNA(19,16);
   12209            regM = INSNA(3,0);
   12210            if (regD != 15 && regN != 15 && regM != 15)
   12211               gate = True;
   12212         }
   12213      }
   12214 
   12215      if (gate) {
   12216         IRTemp rNt   = newTemp(Ity_I32);
   12217         IRTemp rMt   = newTemp(Ity_I32);
   12218         IRTemp irt_diff  = newTemp(Ity_I32);
   12219         IRTemp irt_sum   = newTemp(Ity_I32);
   12220         IRTemp res_q = newTemp(Ity_I32);
   12221 
   12222         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   12223         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   12224 
   12225         assign( irt_sum,
   12226                 binop(Iop_Add32,
   12227                       unop(Iop_16Uto32,
   12228                            unop(Iop_32to16,
   12229                                 mkexpr(rNt)
   12230                            )
   12231                       ),
   12232                       unop(Iop_16Uto32,
   12233                            unop(Iop_32to16,
   12234                                 binop(Iop_Shr32,
   12235                                       mkexpr(rMt), mkU8(16)
   12236                                 )
   12237                            )
   12238                       )
   12239                 )
   12240         );
   12241 
   12242         assign( irt_diff,
   12243                 binop(Iop_Sub32,
   12244                       unop(Iop_16Uto32,
   12245                            unop(Iop_32to16,
   12246                                 binop(Iop_Shr32,
   12247                                       mkexpr(rNt), mkU8(16)
   12248                                 )
   12249                            )
   12250                       ),
   12251                       unop(Iop_16Uto32,
   12252                            unop(Iop_32to16, mkexpr(rMt)
   12253                            )
   12254                       )
   12255                 )
   12256         );
   12257 
   12258         assign( res_q,
   12259                 binop(Iop_Or32,
   12260                       unop(Iop_16Uto32,
   12261                            unop(Iop_32to16,
   12262                                 binop(Iop_Shr32,
   12263                                       mkexpr(irt_sum), mkU8(1)
   12264                                 )
   12265                            )
   12266                       ),
   12267                       binop(Iop_Shl32,
   12268                             binop(Iop_Shr32,
   12269                                   mkexpr(irt_diff), mkU8(1)
   12270                             ),
   12271                             mkU8(16)
   12272                      )
   12273                 )
   12274         );
   12275 
   12276         if (isT)
   12277            putIRegT( regD, mkexpr(res_q), condT );
   12278         else
   12279            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   12280 
   12281         DIP("uhsax%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   12282         return True;
   12283      }
   12284      /* fall through */
   12285    }
   12286 
   12287    /* ----------------- shsub16<c> <Rd>,<Rn>,<Rm> ------------------- */
   12288    {
   12289      UInt regD = 99, regN = 99, regM = 99;
   12290      Bool gate = False;
   12291 
   12292      if (isT) {
   12293         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
   12294            regN = INSNT0(3,0);
   12295            regD = INSNT1(11,8);
   12296            regM = INSNT1(3,0);
   12297            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   12298               gate = True;
   12299         }
   12300      } else {
   12301         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
   12302             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   12303             INSNA(7,4)   == BITS4(0,1,1,1)) {
   12304            regD = INSNA(15,12);
   12305            regN = INSNA(19,16);
   12306            regM = INSNA(3,0);
   12307            if (regD != 15 && regN != 15 && regM != 15)
   12308               gate = True;
   12309         }
   12310      }
   12311 
   12312      if (gate) {
   12313         IRTemp rNt   = newTemp(Ity_I32);
   12314         IRTemp rMt   = newTemp(Ity_I32);
   12315         IRTemp res_q = newTemp(Ity_I32);
   12316 
   12317         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   12318         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   12319 
   12320         assign(res_q, binop(Iop_HSub16Sx2, mkexpr(rNt), mkexpr(rMt)));
   12321         if (isT)
   12322            putIRegT( regD, mkexpr(res_q), condT );
   12323         else
   12324            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   12325 
   12326         DIP("shsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   12327         return True;
   12328      }
   12329      /* fall through */
   12330    }
   12331 
   12332    /* ----------------- smmls{r}<c> <Rd>,<Rn>,<Rm>,<Ra> ------------------- */
   12333    {
   12334      UInt rD = 99, rN = 99, rM = 99, rA = 99;
   12335      Bool round  = False;
   12336      Bool gate   = False;
   12337 
   12338      if (isT) {
   12339         if (INSNT0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
   12340             && INSNT0(6,4) == BITS3(1,1,0)
   12341             && INSNT1(7,5) == BITS3(0,0,0)) {
   12342            round = INSNT1(4,4);
   12343            rA    = INSNT1(15,12);
   12344            rD    = INSNT1(11,8);
   12345            rM    = INSNT1(3,0);
   12346            rN    = INSNT0(3,0);
   12347            if (!isBadRegT(rD)
   12348                && !isBadRegT(rN) && !isBadRegT(rM) && !isBadRegT(rA))
   12349               gate = True;
   12350         }
   12351      } else {
   12352         if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,1)
   12353             && INSNA(15,12) != BITS4(1,1,1,1)
   12354             && (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(1,1,0,1)) {
   12355            round = INSNA(5,5);
   12356            rD    = INSNA(19,16);
   12357            rA    = INSNA(15,12);
   12358            rM    = INSNA(11,8);
   12359            rN    = INSNA(3,0);
   12360            if (rD != 15 && rM != 15 && rN != 15)
   12361               gate = True;
   12362         }
   12363      }
   12364      if (gate) {
   12365         IRTemp irt_rA   = newTemp(Ity_I32);
   12366         IRTemp irt_rN   = newTemp(Ity_I32);
   12367         IRTemp irt_rM   = newTemp(Ity_I32);
   12368         assign( irt_rA, isT ? getIRegT(rA) : getIRegA(rA) );
   12369         assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN) );
   12370         assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM) );
   12371         IRExpr* res
   12372         = unop(Iop_64HIto32,
   12373                binop(Iop_Add64,
   12374                      binop(Iop_Sub64,
   12375                            binop(Iop_32HLto64, mkexpr(irt_rA), mkU32(0)),
   12376                            binop(Iop_MullS32, mkexpr(irt_rN), mkexpr(irt_rM))),
   12377                      mkU64(round ? 0x80000000ULL : 0ULL)));
   12378         if (isT)
   12379            putIRegT( rD, res, condT );
   12380         else
   12381            putIRegA(rD, res, condT, Ijk_Boring);
   12382         DIP("smmls%s%s r%u, r%u, r%u, r%u\n",
   12383             round ? "r" : "", nCC(conq), rD, rN, rM, rA);
   12384         return True;
   12385      }
   12386      /* fall through */
   12387    }
   12388 
   12389    /* -------------- smlald{x}<c> <RdLo>,<RdHi>,<Rn>,<Rm> ---------------- */
   12390    {
   12391      UInt rN = 99, rDlo = 99, rDhi = 99, rM = 99;
   12392      Bool m_swap = False;
   12393      Bool gate   = False;
   12394 
   12395      if (isT) {
   12396         if (INSNT0(15,4) == 0xFBC &&
   12397             (INSNT1(7,4) & BITS4(1,1,1,0)) == BITS4(1,1,0,0)) {
   12398            rN     = INSNT0(3,0);
   12399            rDlo   = INSNT1(15,12);
   12400            rDhi   = INSNT1(11,8);
   12401            rM     = INSNT1(3,0);
   12402            m_swap = (INSNT1(4,4) & 1) == 1;
   12403            if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
   12404                && !isBadRegT(rM) && rDhi != rDlo)
   12405               gate = True;
   12406         }
   12407      } else {
   12408         if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,0)
   12409             && (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
   12410            rN     = INSNA(3,0);
   12411            rDlo   = INSNA(15,12);
   12412            rDhi   = INSNA(19,16);
   12413            rM     = INSNA(11,8);
   12414            m_swap = ( INSNA(5,5) & 1 ) == 1;
   12415            if (rDlo != 15 && rDhi != 15
   12416                && rN != 15 && rM != 15 && rDlo != rDhi)
   12417               gate = True;
   12418         }
   12419      }
   12420 
   12421      if (gate) {
   12422         IRTemp irt_rM   = newTemp(Ity_I32);
   12423         IRTemp irt_rN   = newTemp(Ity_I32);
   12424         IRTemp irt_rDhi = newTemp(Ity_I32);
   12425         IRTemp irt_rDlo = newTemp(Ity_I32);
   12426         IRTemp op_2     = newTemp(Ity_I32);
   12427         IRTemp pr_1     = newTemp(Ity_I64);
   12428         IRTemp pr_2     = newTemp(Ity_I64);
   12429         IRTemp result   = newTemp(Ity_I64);
   12430         IRTemp resHi    = newTemp(Ity_I32);
   12431         IRTemp resLo    = newTemp(Ity_I32);
   12432         assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM));
   12433         assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN));
   12434         assign( irt_rDhi, isT ? getIRegT(rDhi) : getIRegA(rDhi));
   12435         assign( irt_rDlo, isT ? getIRegT(rDlo) : getIRegA(rDlo));
   12436         assign( op_2, genROR32(irt_rM, m_swap ? 16 : 0) );
   12437         assign( pr_1, binop(Iop_MullS32,
   12438                             unop(Iop_16Sto32,
   12439                                  unop(Iop_32to16, mkexpr(irt_rN))
   12440                             ),
   12441                             unop(Iop_16Sto32,
   12442                                  unop(Iop_32to16, mkexpr(op_2))
   12443                             )
   12444                       )
   12445         );
   12446         assign( pr_2, binop(Iop_MullS32,
   12447                             binop(Iop_Sar32, mkexpr(irt_rN), mkU8(16)),
   12448                             binop(Iop_Sar32, mkexpr(op_2), mkU8(16))
   12449                       )
   12450         );
   12451         assign( result, binop(Iop_Add64,
   12452                               binop(Iop_Add64,
   12453                                     mkexpr(pr_1),
   12454                                     mkexpr(pr_2)
   12455                               ),
   12456                               binop(Iop_32HLto64,
   12457                                     mkexpr(irt_rDhi),
   12458                                     mkexpr(irt_rDlo)
   12459                               )
   12460                         )
   12461         );
   12462         assign( resHi, unop(Iop_64HIto32, mkexpr(result)) );
   12463         assign( resLo, unop(Iop_64to32, mkexpr(result)) );
   12464         if (isT) {
   12465            putIRegT( rDhi, mkexpr(resHi), condT );
   12466            putIRegT( rDlo, mkexpr(resLo), condT );
   12467         } else {
   12468            putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
   12469            putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
   12470         }
   12471         DIP("smlald%c%s r%u, r%u, r%u, r%u\n",
   12472             m_swap ? 'x' : ' ', nCC(conq), rDlo, rDhi, rN, rM);
   12473         return True;
   12474      }
   12475      /* fall through */
   12476    }
   12477 
   12478    /* -------------- smlsld{x}<c> <RdLo>,<RdHi>,<Rn>,<Rm> ---------------- */
   12479    {
   12480      UInt rN = 99, rDlo = 99, rDhi = 99, rM = 99;
   12481      Bool m_swap = False;
   12482      Bool gate   = False;
   12483 
   12484      if (isT) {
   12485         if ((INSNT0(15,4) == 0xFBD &&
   12486             (INSNT1(7,4) & BITS4(1,1,1,0)) == BITS4(1,1,0,0))) {
   12487            rN     = INSNT0(3,0);
   12488            rDlo   = INSNT1(15,12);
   12489            rDhi   = INSNT1(11,8);
   12490            rM     = INSNT1(3,0);
   12491            m_swap = (INSNT1(4,4) & 1) == 1;
   12492            if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN) &&
   12493                !isBadRegT(rM) && rDhi != rDlo)
   12494               gate = True;
   12495         }
   12496      } else {
   12497         if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,0) &&
   12498             (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(0,1,0,1)) {
   12499            rN     = INSNA(3,0);
   12500            rDlo   = INSNA(15,12);
   12501            rDhi   = INSNA(19,16);
   12502            rM     = INSNA(11,8);
   12503            m_swap = (INSNA(5,5) & 1) == 1;
   12504            if (rDlo != 15 && rDhi != 15 &&
   12505                rN != 15 && rM != 15 && rDlo != rDhi)
   12506               gate = True;
   12507         }
   12508      }
   12509      if (gate) {
   12510         IRTemp irt_rM   = newTemp(Ity_I32);
   12511         IRTemp irt_rN   = newTemp(Ity_I32);
   12512         IRTemp irt_rDhi = newTemp(Ity_I32);
   12513         IRTemp irt_rDlo = newTemp(Ity_I32);
   12514         IRTemp op_2     = newTemp(Ity_I32);
   12515         IRTemp pr_1     = newTemp(Ity_I64);
   12516         IRTemp pr_2     = newTemp(Ity_I64);
   12517         IRTemp result   = newTemp(Ity_I64);
   12518         IRTemp resHi    = newTemp(Ity_I32);
   12519         IRTemp resLo    = newTemp(Ity_I32);
   12520         assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM) );
   12521         assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN) );
   12522         assign( irt_rDhi, isT ? getIRegT(rDhi) : getIRegA(rDhi) );
   12523         assign( irt_rDlo, isT ? getIRegT(rDlo) : getIRegA(rDlo) );
   12524         assign( op_2, genROR32(irt_rM, m_swap ? 16 : 0) );
   12525         assign( pr_1, binop(Iop_MullS32,
   12526                             unop(Iop_16Sto32,
   12527                                  unop(Iop_32to16, mkexpr(irt_rN))
   12528                             ),
   12529                             unop(Iop_16Sto32,
   12530                                  unop(Iop_32to16, mkexpr(op_2))
   12531                             )
   12532                       )
   12533         );
   12534         assign( pr_2, binop(Iop_MullS32,
   12535                             binop(Iop_Sar32, mkexpr(irt_rN), mkU8(16)),
   12536                             binop(Iop_Sar32, mkexpr(op_2), mkU8(16))
   12537                       )
   12538         );
   12539         assign( result, binop(Iop_Add64,
   12540                               binop(Iop_Sub64,
   12541                                     mkexpr(pr_1),
   12542                                     mkexpr(pr_2)
   12543                               ),
   12544                               binop(Iop_32HLto64,
   12545                                     mkexpr(irt_rDhi),
   12546                                     mkexpr(irt_rDlo)
   12547                               )
   12548                         )
   12549         );
   12550         assign( resHi, unop(Iop_64HIto32, mkexpr(result)) );
   12551         assign( resLo, unop(Iop_64to32, mkexpr(result)) );
   12552         if (isT) {
   12553            putIRegT( rDhi, mkexpr(resHi), condT );
   12554            putIRegT( rDlo, mkexpr(resLo), condT );
   12555         } else {
   12556            putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
   12557            putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
   12558         }
   12559         DIP("smlsld%c%s r%u, r%u, r%u, r%u\n",
   12560             m_swap ? 'x' : ' ', nCC(conq), rDlo, rDhi, rN, rM);
   12561         return True;
   12562      }
   12563      /* fall through */
   12564    }
   12565 
   12566    /* ---------- Doesn't match anything. ---------- */
   12567    return False;
   12568 
   12569 #  undef INSNA
   12570 #  undef INSNT0
   12571 #  undef INSNT1
   12572 }
   12573 
   12574 
   12575 /*------------------------------------------------------------*/
   12576 /*--- LDMxx/STMxx helper (both ARM and Thumb32)            ---*/
   12577 /*------------------------------------------------------------*/
   12578 
   12579 /* Generate IR for LDMxx and STMxx.  This is complex.  Assumes it's
   12580    unconditional, so the caller must produce a jump-around before
   12581    calling this, if the insn is to be conditional.  Caller is
   12582    responsible for all validation of parameters.  For LDMxx, if PC is
   12583    amongst the values loaded, caller is also responsible for
   12584    generating the jump. */
   12585 static void mk_ldm_stm ( Bool arm,     /* True: ARM, False: Thumb */
   12586                          UInt rN,      /* base reg */
   12587                          UInt bINC,    /* 1: inc,  0: dec */
   12588                          UInt bBEFORE, /* 1: inc/dec before, 0: after */
   12589                          UInt bW,      /* 1: writeback to Rn */
   12590                          UInt bL,      /* 1: load, 0: store */
   12591                          UInt regList )
   12592 {
   12593    Int i, r, m, nRegs;
   12594    IRTemp jk = Ijk_Boring;
   12595 
   12596    /* Get hold of the old Rn value.  We might need to write its value
   12597       to memory during a store, and if it's also the writeback
   12598       register then we need to get its value now.  We can't treat it
   12599       exactly like the other registers we're going to transfer,
   12600       because for xxMDA and xxMDB writeback forms, the generated IR
   12601       updates Rn in the guest state before any transfers take place.
   12602       We have to do this as per comments below, in order that if Rn is
   12603       the stack pointer then it always has a value is below or equal
   12604       to any of the transfer addresses.  Ick. */
   12605    IRTemp oldRnT = newTemp(Ity_I32);
   12606    assign(oldRnT, arm ? getIRegA(rN) : getIRegT(rN));
   12607 
   12608    IRTemp anchorT = newTemp(Ity_I32);
   12609    /* The old (Addison-Wesley) ARM ARM seems to say that LDMxx/STMxx
   12610       ignore the bottom two bits of the address.  However, Cortex-A8
   12611       doesn't seem to care.  Hence: */
   12612    /* No .. don't force alignment .. */
   12613    /* assign(anchorT, binop(Iop_And32, mkexpr(oldRnT), mkU32(~3U))); */
   12614    /* Instead, use the potentially misaligned address directly. */
   12615    assign(anchorT, mkexpr(oldRnT));
   12616 
   12617    IROp opADDorSUB = bINC ? Iop_Add32 : Iop_Sub32;
   12618    // bINC == 1:  xxMIA, xxMIB
   12619    // bINC == 0:  xxMDA, xxMDB
   12620 
   12621    // For xxMDA and xxMDB, update Rn first if necessary.  We have
   12622    // to do this first so that, for the common idiom of the transfers
   12623    // faulting because we're pushing stuff onto a stack and the stack
   12624    // is growing down onto allocate-on-fault pages (as Valgrind simulates),
   12625    // we need to have the SP up-to-date "covering" (pointing below) the
   12626    // transfer area.  For the same reason, if we are doing xxMIA or xxMIB,
   12627    // do the transfer first, and then update rN afterwards.
   12628    nRegs = 0;
   12629    for (i = 0; i < 16; i++) {
   12630      if ((regList & (1 << i)) != 0)
   12631          nRegs++;
   12632    }
   12633    if (bW == 1 && !bINC) {
   12634       IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
   12635       if (arm)
   12636          putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
   12637       else
   12638          putIRegT( rN, e, IRTemp_INVALID );
   12639    }
   12640 
   12641    // Make up a list of the registers to transfer, and their offsets
   12642    // in memory relative to the anchor.  If the base reg (Rn) is part
   12643    // of the transfer, then do it last for a load and first for a store.
   12644    UInt xReg[16], xOff[16];
   12645    Int  nX = 0;
   12646    m = 0;
   12647    for (i = 0; i < 16; i++) {
   12648       r = bINC ? i : (15-i);
   12649       if (0 == (regList & (1<<r)))
   12650          continue;
   12651       if (bBEFORE)
   12652          m++;
   12653       /* paranoia: check we aren't transferring the writeback
   12654          register during a load. Should be assured by decode-point
   12655          check above. */
   12656       if (bW == 1 && bL == 1)
   12657          vassert(r != rN);
   12658 
   12659       xOff[nX] = 4 * m;
   12660       xReg[nX] = r;
   12661       nX++;
   12662 
   12663       if (!bBEFORE)
   12664          m++;
   12665    }
   12666    vassert(m == nRegs);
   12667    vassert(nX == nRegs);
   12668    vassert(nX <= 16);
   12669 
   12670    if (bW == 0 && (regList & (1<<rN)) != 0) {
   12671       /* Non-writeback, and basereg is to be transferred.  Do its
   12672          transfer last for a load and first for a store.  Requires
   12673          reordering xOff/xReg. */
   12674       if (0) {
   12675          vex_printf("\nREG_LIST_PRE: (rN=%d)\n", rN);
   12676          for (i = 0; i < nX; i++)
   12677             vex_printf("reg %d   off %d\n", xReg[i], xOff[i]);
   12678          vex_printf("\n");
   12679       }
   12680 
   12681       vassert(nX > 0);
   12682       for (i = 0; i < nX; i++) {
   12683          if (xReg[i] == rN)
   12684              break;
   12685       }
   12686       vassert(i < nX); /* else we didn't find it! */
   12687       UInt tReg = xReg[i];
   12688       UInt tOff = xOff[i];
   12689       if (bL == 1) {
   12690          /* load; make this transfer happen last */
   12691          if (i < nX-1) {
   12692             for (m = i+1; m < nX; m++) {
   12693                xReg[m-1] = xReg[m];
   12694                xOff[m-1] = xOff[m];
   12695             }
   12696             vassert(m == nX);
   12697             xReg[m-1] = tReg;
   12698             xOff[m-1] = tOff;
   12699          }
   12700       } else {
   12701          /* store; make this transfer happen first */
   12702          if (i > 0) {
   12703             for (m = i-1; m >= 0; m--) {
   12704                xReg[m+1] = xReg[m];
   12705                xOff[m+1] = xOff[m];
   12706             }
   12707             vassert(m == -1);
   12708             xReg[0] = tReg;
   12709             xOff[0] = tOff;
   12710          }
   12711       }
   12712 
   12713       if (0) {
   12714          vex_printf("REG_LIST_POST:\n");
   12715          for (i = 0; i < nX; i++)
   12716             vex_printf("reg %d   off %d\n", xReg[i], xOff[i]);
   12717          vex_printf("\n");
   12718       }
   12719    }
   12720 
   12721    /* According to the Cortex A8 TRM Sec. 5.2.1, LDM(1) with r13 as the base
   12722        register and PC in the register list is a return for purposes of branch
   12723        prediction.
   12724       The ARM ARM Sec. C9.10.1 further specifies that writeback must be enabled
   12725        to be counted in event 0x0E (Procedure return).*/
   12726    if (rN == 13 && bL == 1 && bINC && !bBEFORE && bW == 1) {
   12727       jk = Ijk_Ret;
   12728    }
   12729 
   12730    /* Actually generate the transfers */
   12731    for (i = 0; i < nX; i++) {
   12732       r = xReg[i];
   12733       if (bL == 1) {
   12734          IRExpr* e = loadLE(Ity_I32,
   12735                             binop(opADDorSUB, mkexpr(anchorT),
   12736                                   mkU32(xOff[i])));
   12737          if (arm) {
   12738             putIRegA( r, e, IRTemp_INVALID, jk );
   12739          } else {
   12740             // no: putIRegT( r, e, IRTemp_INVALID );
   12741             // putIRegT refuses to write to R15.  But that might happen.
   12742             // Since this is uncond, and we need to be able to
   12743             // write the PC, just use the low level put:
   12744             llPutIReg( r, e );
   12745          }
   12746       } else {
   12747          /* if we're storing Rn, make sure we use the correct
   12748             value, as per extensive comments above */
   12749          storeLE( binop(opADDorSUB, mkexpr(anchorT), mkU32(xOff[i])),
   12750                   r == rN ? mkexpr(oldRnT)
   12751                           : (arm ? getIRegA(r) : getIRegT(r) ) );
   12752       }
   12753    }
   12754 
   12755    // If we are doing xxMIA or xxMIB,
   12756    // do the transfer first, and then update rN afterwards.
   12757    if (bW == 1 && bINC) {
   12758       IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
   12759       if (arm)
   12760          putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
   12761       else
   12762          putIRegT( rN, e, IRTemp_INVALID );
   12763    }
   12764 }
   12765 
   12766 
   12767 /*------------------------------------------------------------*/
   12768 /*--- VFP (CP 10 and 11) instructions                      ---*/
   12769 /*------------------------------------------------------------*/
   12770 
   12771 /* Both ARM and Thumb */
   12772 
   12773 /* Translate a CP10 or CP11 instruction.  If successful, returns
   12774    True and *dres may or may not be updated.  If failure, returns
   12775    False and doesn't change *dres nor create any IR.
   12776 
   12777    The ARM and Thumb encodings are identical for the low 28 bits of
   12778    the insn (yay!) and that's what the caller must supply, iow, imm28
   12779    has the top 4 bits masked out.  Caller is responsible for
   12780    determining whether the masked-out bits are valid for a CP10/11
   12781    insn.  The rules for the top 4 bits are:
   12782 
   12783      ARM: 0000 to 1110 allowed, and this is the gating condition.
   12784      1111 (NV) is not allowed.
   12785 
   12786      Thumb: must be 1110.  The gating condition is taken from
   12787      ITSTATE in the normal way.
   12788 
   12789    Conditionalisation:
   12790 
   12791    Caller must supply an IRTemp 'condT' holding the gating condition,
   12792    or IRTemp_INVALID indicating the insn is always executed.
   12793 
   12794    Caller must also supply an ARMCondcode 'cond'.  This is only used
   12795    for debug printing, no other purpose.  For ARM, this is simply the
   12796    top 4 bits of the original instruction.  For Thumb, the condition
   12797    is not (really) known until run time, and so ARMCondAL should be
   12798    passed, only so that printing of these instructions does not show
   12799    any condition.
   12800 
   12801    Finally, the caller must indicate whether this occurs in ARM or
   12802    Thumb code.
   12803 */
   12804 static Bool decode_CP10_CP11_instruction (
   12805                /*MOD*/DisResult* dres,
   12806                UInt              insn28,
   12807                IRTemp            condT,
   12808                ARMCondcode       conq,
   12809                Bool              isT
   12810             )
   12811 {
   12812 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn28, (_bMax), (_bMin))
   12813 
   12814    vassert(INSN(31,28) == BITS4(0,0,0,0)); // caller's obligation
   12815 
   12816    if (isT) {
   12817       vassert(conq == ARMCondAL);
   12818    } else {
   12819       vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
   12820    }
   12821 
   12822    /* ----------------------------------------------------------- */
   12823    /* -- VFP instructions -- double precision (mostly)         -- */
   12824    /* ----------------------------------------------------------- */
   12825 
   12826    /* --------------------- fldmx, fstmx --------------------- */
   12827    /*
   12828                                  31   27   23   19 15 11   7   0
   12829                                          P U WL
   12830       C4-100, C5-26  1  FSTMX    cond 1100 1000 Rn Dd 1011 offset
   12831       C4-100, C5-28  2  FSTMIAX  cond 1100 1010 Rn Dd 1011 offset
   12832       C4-100, C5-30  3  FSTMDBX  cond 1101 0010 Rn Dd 1011 offset
   12833 
   12834       C4-42, C5-26   1  FLDMX    cond 1100 1001 Rn Dd 1011 offset
   12835       C4-42, C5-28   2  FLDMIAX  cond 1100 1011 Rn Dd 1011 offset
   12836       C4-42, C5-30   3  FLDMDBX  cond 1101 0011 Rn Dd 1011 offset
   12837 
   12838       Regs transferred: Dd .. D(d + (offset-3)/2)
   12839       offset must be odd, must not imply a reg > 15
   12840       IA/DB: Rn is changed by (4 + 8 x # regs transferred)
   12841 
   12842       case coding:
   12843          1  at-Rn   (access at Rn)
   12844          2  ia-Rn   (access at Rn, then Rn += 4+8n)
   12845          3  db-Rn   (Rn -= 4+8n,   then access at Rn)
   12846    */
   12847    if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
   12848        && INSN(11,8) == BITS4(1,0,1,1)) {
   12849       UInt bP      = (insn28 >> 24) & 1;
   12850       UInt bU      = (insn28 >> 23) & 1;
   12851       UInt bW      = (insn28 >> 21) & 1;
   12852       UInt bL      = (insn28 >> 20) & 1;
   12853       UInt offset  = (insn28 >> 0) & 0xFF;
   12854       UInt rN      = INSN(19,16);
   12855       UInt dD      = (INSN(22,22) << 4) | INSN(15,12);
   12856       UInt nRegs   = (offset - 1) / 2;
   12857       UInt summary = 0;
   12858       Int  i;
   12859 
   12860       /**/ if (bP == 0 && bU == 1 && bW == 0) {
   12861          summary = 1;
   12862       }
   12863       else if (bP == 0 && bU == 1 && bW == 1) {
   12864          summary = 2;
   12865       }
   12866       else if (bP == 1 && bU == 0 && bW == 1) {
   12867          summary = 3;
   12868       }
   12869       else goto after_vfp_fldmx_fstmx;
   12870 
   12871       /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
   12872       if (rN == 15 && (summary == 2 || summary == 3 || isT))
   12873          goto after_vfp_fldmx_fstmx;
   12874 
   12875       /* offset must be odd, and specify at least one register */
   12876       if (0 == (offset & 1) || offset < 3)
   12877          goto after_vfp_fldmx_fstmx;
   12878 
   12879       /* can't transfer regs after D15 */
   12880       if (dD + nRegs - 1 >= 32)
   12881          goto after_vfp_fldmx_fstmx;
   12882 
   12883       /* Now, we can't do a conditional load or store, since that very
   12884          likely will generate an exception.  So we have to take a side
   12885          exit at this point if the condition is false. */
   12886       if (condT != IRTemp_INVALID) {
   12887          if (isT)
   12888             mk_skip_over_T32_if_cond_is_false( condT );
   12889          else
   12890             mk_skip_over_A32_if_cond_is_false( condT );
   12891          condT = IRTemp_INVALID;
   12892       }
   12893       /* Ok, now we're unconditional.  Do the load or store. */
   12894 
   12895       /* get the old Rn value */
   12896       IRTemp rnT = newTemp(Ity_I32);
   12897       assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
   12898                            rN == 15));
   12899 
   12900       /* make a new value for Rn, post-insn */
   12901       IRTemp rnTnew = IRTemp_INVALID;
   12902       if (summary == 2 || summary == 3) {
   12903          rnTnew = newTemp(Ity_I32);
   12904          assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
   12905                               mkexpr(rnT),
   12906                               mkU32(4 + 8 * nRegs)));
   12907       }
   12908 
   12909       /* decide on the base transfer address */
   12910       IRTemp taT = newTemp(Ity_I32);
   12911       assign(taT,  summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
   12912 
   12913       /* update Rn if necessary -- in case 3, we're moving it down, so
   12914          update before any memory reference, in order to keep Memcheck
   12915          and V's stack-extending logic (on linux) happy */
   12916       if (summary == 3) {
   12917          if (isT)
   12918             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
   12919          else
   12920             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
   12921       }
   12922 
   12923       /* generate the transfers */
   12924       for (i = 0; i < nRegs; i++) {
   12925          IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
   12926          if (bL) {
   12927             putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
   12928          } else {
   12929             storeLE(addr, getDReg(dD + i));
   12930          }
   12931       }
   12932 
   12933       /* update Rn if necessary -- in case 2, we're moving it up, so
   12934          update after any memory reference, in order to keep Memcheck
   12935          and V's stack-extending logic (on linux) happy */
   12936       if (summary == 2) {
   12937          if (isT)
   12938             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
   12939          else
   12940             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
   12941       }
   12942 
   12943       const HChar* nm = bL==1 ? "ld" : "st";
   12944       switch (summary) {
   12945          case 1:  DIP("f%smx%s r%u, {d%u-d%u}\n",
   12946                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
   12947                   break;
   12948          case 2:  DIP("f%smiax%s r%u!, {d%u-d%u}\n",
   12949                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
   12950                   break;
   12951          case 3:  DIP("f%smdbx%s r%u!, {d%u-d%u}\n",
   12952                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
   12953                   break;
   12954          default: vassert(0);
   12955       }
   12956 
   12957       goto decode_success_vfp;
   12958       /* FIXME alignment constraints? */
   12959    }
   12960 
   12961   after_vfp_fldmx_fstmx:
   12962 
   12963    /* --------------------- fldmd, fstmd --------------------- */
   12964    /*
   12965                                  31   27   23   19 15 11   7   0
   12966                                          P U WL
   12967       C4-96, C5-26   1  FSTMD    cond 1100 1000 Rn Dd 1011 offset
   12968       C4-96, C5-28   2  FSTMDIA  cond 1100 1010 Rn Dd 1011 offset
   12969       C4-96, C5-30   3  FSTMDDB  cond 1101 0010 Rn Dd 1011 offset
   12970 
   12971       C4-38, C5-26   1  FLDMD    cond 1100 1001 Rn Dd 1011 offset
   12972       C4-38, C5-28   2  FLDMIAD  cond 1100 1011 Rn Dd 1011 offset
   12973       C4-38, C5-30   3  FLDMDBD  cond 1101 0011 Rn Dd 1011 offset
   12974 
   12975       Regs transferred: Dd .. D(d + (offset-2)/2)
   12976       offset must be even, must not imply a reg > 15
   12977       IA/DB: Rn is changed by (8 x # regs transferred)
   12978 
   12979       case coding:
   12980          1  at-Rn   (access at Rn)
   12981          2  ia-Rn   (access at Rn, then Rn += 8n)
   12982          3  db-Rn   (Rn -= 8n,     then access at Rn)
   12983    */
   12984    if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
   12985        && INSN(11,8) == BITS4(1,0,1,1)) {
   12986       UInt bP      = (insn28 >> 24) & 1;
   12987       UInt bU      = (insn28 >> 23) & 1;
   12988       UInt bW      = (insn28 >> 21) & 1;
   12989       UInt bL      = (insn28 >> 20) & 1;
   12990       UInt offset  = (insn28 >> 0) & 0xFF;
   12991       UInt rN      = INSN(19,16);
   12992       UInt dD      = (INSN(22,22) << 4) | INSN(15,12);
   12993       UInt nRegs   = offset / 2;
   12994       UInt summary = 0;
   12995       Int  i;
   12996 
   12997       /**/ if (bP == 0 && bU == 1 && bW == 0) {
   12998          summary = 1;
   12999       }
   13000       else if (bP == 0 && bU == 1 && bW == 1) {
   13001          summary = 2;
   13002       }
   13003       else if (bP == 1 && bU == 0 && bW == 1) {
   13004          summary = 3;
   13005       }
   13006       else goto after_vfp_fldmd_fstmd;
   13007 
   13008       /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
   13009       if (rN == 15 && (summary == 2 || summary == 3 || isT))
   13010          goto after_vfp_fldmd_fstmd;
   13011 
   13012       /* offset must be even, and specify at least one register */
   13013       if (1 == (offset & 1) || offset < 2)
   13014          goto after_vfp_fldmd_fstmd;
   13015 
   13016       /* can't transfer regs after D15 */
   13017       if (dD + nRegs - 1 >= 32)
   13018          goto after_vfp_fldmd_fstmd;
   13019 
   13020       /* Now, we can't do a conditional load or store, since that very
   13021          likely will generate an exception.  So we have to take a side
   13022          exit at this point if the condition is false. */
   13023       if (condT != IRTemp_INVALID) {
   13024          if (isT)
   13025             mk_skip_over_T32_if_cond_is_false( condT );
   13026          else
   13027             mk_skip_over_A32_if_cond_is_false( condT );
   13028          condT = IRTemp_INVALID;
   13029       }
   13030       /* Ok, now we're unconditional.  Do the load or store. */
   13031 
   13032       /* get the old Rn value */
   13033       IRTemp rnT = newTemp(Ity_I32);
   13034       assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
   13035                            rN == 15));
   13036 
   13037       /* make a new value for Rn, post-insn */
   13038       IRTemp rnTnew = IRTemp_INVALID;
   13039       if (summary == 2 || summary == 3) {
   13040          rnTnew = newTemp(Ity_I32);
   13041          assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
   13042                               mkexpr(rnT),
   13043                               mkU32(8 * nRegs)));
   13044       }
   13045 
   13046       /* decide on the base transfer address */
   13047       IRTemp taT = newTemp(Ity_I32);
   13048       assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
   13049 
   13050       /* update Rn if necessary -- in case 3, we're moving it down, so
   13051          update before any memory reference, in order to keep Memcheck
   13052          and V's stack-extending logic (on linux) happy */
   13053       if (summary == 3) {
   13054          if (isT)
   13055             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
   13056          else
   13057             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
   13058       }
   13059 
   13060       /* generate the transfers */
   13061       for (i = 0; i < nRegs; i++) {
   13062          IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
   13063          if (bL) {
   13064             putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
   13065          } else {
   13066             storeLE(addr, getDReg(dD + i));
   13067          }
   13068       }
   13069 
   13070       /* update Rn if necessary -- in case 2, we're moving it up, so
   13071          update after any memory reference, in order to keep Memcheck
   13072          and V's stack-extending logic (on linux) happy */
   13073       if (summary == 2) {
   13074          if (isT)
   13075             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
   13076          else
   13077             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
   13078       }
   13079 
   13080       const HChar* nm = bL==1 ? "ld" : "st";
   13081       switch (summary) {
   13082          case 1:  DIP("f%smd%s r%u, {d%u-d%u}\n",
   13083                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
   13084                   break;
   13085          case 2:  DIP("f%smiad%s r%u!, {d%u-d%u}\n",
   13086                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
   13087                   break;
   13088          case 3:  DIP("f%smdbd%s r%u!, {d%u-d%u}\n",
   13089                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
   13090                   break;
   13091          default: vassert(0);
   13092       }
   13093 
   13094       goto decode_success_vfp;
   13095       /* FIXME alignment constraints? */
   13096    }
   13097 
   13098   after_vfp_fldmd_fstmd:
   13099 
   13100    /* ------------------- fmrx, fmxr ------------------- */
   13101    if (BITS8(1,1,1,0,1,1,1,1) == INSN(27,20)
   13102        && BITS4(1,0,1,0) == INSN(11,8)
   13103        && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
   13104       UInt rD  = INSN(15,12);
   13105       UInt reg = INSN(19,16);
   13106       if (reg == BITS4(0,0,0,1)) {
   13107          if (rD == 15) {
   13108             IRTemp nzcvT = newTemp(Ity_I32);
   13109             /* When rD is 15, we are copying the top 4 bits of FPSCR
   13110                into CPSR.  That is, set the flags thunk to COPY and
   13111                install FPSCR[31:28] as the value to copy. */
   13112             assign(nzcvT, binop(Iop_And32,
   13113                                 IRExpr_Get(OFFB_FPSCR, Ity_I32),
   13114                                 mkU32(0xF0000000)));
   13115             setFlags_D1(ARMG_CC_OP_COPY, nzcvT, condT);
   13116             DIP("fmstat%s\n", nCC(conq));
   13117          } else {
   13118             /* Otherwise, merely transfer FPSCR to r0 .. r14. */
   13119             IRExpr* e = IRExpr_Get(OFFB_FPSCR, Ity_I32);
   13120             if (isT)
   13121                putIRegT(rD, e, condT);
   13122             else
   13123                putIRegA(rD, e, condT, Ijk_Boring);
   13124             DIP("fmrx%s r%u, fpscr\n", nCC(conq), rD);
   13125          }
   13126          goto decode_success_vfp;
   13127       }
   13128       /* fall through */
   13129    }
   13130 
   13131    if (BITS8(1,1,1,0,1,1,1,0) == INSN(27,20)
   13132        && BITS4(1,0,1,0) == INSN(11,8)
   13133        && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
   13134       UInt rD  = INSN(15,12);
   13135       UInt reg = INSN(19,16);
   13136       if (reg == BITS4(0,0,0,1)) {
   13137          putMiscReg32(OFFB_FPSCR,
   13138                       isT ? getIRegT(rD) : getIRegA(rD), condT);
   13139          DIP("fmxr%s fpscr, r%u\n", nCC(conq), rD);
   13140          goto decode_success_vfp;
   13141       }
   13142       /* fall through */
   13143    }
   13144 
   13145    /* --------------------- vmov --------------------- */
   13146    // VMOV dM, rD, rN
   13147    if (0x0C400B10 == (insn28 & 0x0FF00FD0)) {
   13148       UInt dM = INSN(3,0) | (INSN(5,5) << 4);
   13149       UInt rD = INSN(15,12); /* lo32 */
   13150       UInt rN = INSN(19,16); /* hi32 */
   13151       if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))) {
   13152          /* fall through */
   13153       } else {
   13154          putDReg(dM,
   13155                  unop(Iop_ReinterpI64asF64,
   13156                       binop(Iop_32HLto64,
   13157                             isT ? getIRegT(rN) : getIRegA(rN),
   13158                             isT ? getIRegT(rD) : getIRegA(rD))),
   13159                  condT);
   13160          DIP("vmov%s d%u, r%u, r%u\n", nCC(conq), dM, rD, rN);
   13161          goto decode_success_vfp;
   13162       }
   13163       /* fall through */
   13164    }
   13165 
   13166    // VMOV rD, rN, dM
   13167    if (0x0C500B10 == (insn28 & 0x0FF00FD0)) {
   13168       UInt dM = INSN(3,0) | (INSN(5,5) << 4);
   13169       UInt rD = INSN(15,12); /* lo32 */
   13170       UInt rN = INSN(19,16); /* hi32 */
   13171       if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))
   13172           || rD == rN) {
   13173          /* fall through */
   13174       } else {
   13175          IRTemp i64 = newTemp(Ity_I64);
   13176          assign(i64, unop(Iop_ReinterpF64asI64, getDReg(dM)));
   13177          IRExpr* hi32 = unop(Iop_64HIto32, mkexpr(i64));
   13178          IRExpr* lo32 = unop(Iop_64to32,   mkexpr(i64));
   13179          if (isT) {
   13180             putIRegT(rN, hi32, condT);
   13181             putIRegT(rD, lo32, condT);
   13182          } else {
   13183             putIRegA(rN, hi32, condT, Ijk_Boring);
   13184             putIRegA(rD, lo32, condT, Ijk_Boring);
   13185          }
   13186          DIP("vmov%s r%u, r%u, d%u\n", nCC(conq), rD, rN, dM);
   13187          goto decode_success_vfp;
   13188       }
   13189       /* fall through */
   13190    }
   13191 
   13192    // VMOV sD, sD+1, rN, rM
   13193    if (0x0C400A10 == (insn28 & 0x0FF00FD0)) {
   13194       UInt sD = (INSN(3,0) << 1) | INSN(5,5);
   13195       UInt rN = INSN(15,12);
   13196       UInt rM = INSN(19,16);
   13197       if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
   13198           || sD == 31) {
   13199          /* fall through */
   13200       } else {
   13201          putFReg(sD,
   13202                  unop(Iop_ReinterpI32asF32, isT ? getIRegT(rN) : getIRegA(rN)),
   13203                  condT);
   13204          putFReg(sD+1,
   13205                  unop(Iop_ReinterpI32asF32, isT ? getIRegT(rM) : getIRegA(rM)),
   13206                  condT);
   13207          DIP("vmov%s, s%u, s%u, r%u, r%u\n",
   13208               nCC(conq), sD, sD + 1, rN, rM);
   13209          goto decode_success_vfp;
   13210       }
   13211    }
   13212 
   13213    // VMOV rN, rM, sD, sD+1
   13214    if (0x0C500A10 == (insn28 & 0x0FF00FD0)) {
   13215       UInt sD = (INSN(3,0) << 1) | INSN(5,5);
   13216       UInt rN = INSN(15,12);
   13217       UInt rM = INSN(19,16);
   13218       if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
   13219           || sD == 31 || rN == rM) {
   13220          /* fall through */
   13221       } else {
   13222          IRExpr* res0 = unop(Iop_ReinterpF32asI32, getFReg(sD));
   13223          IRExpr* res1 = unop(Iop_ReinterpF32asI32, getFReg(sD+1));
   13224          if (isT) {
   13225             putIRegT(rN, res0, condT);
   13226             putIRegT(rM, res1, condT);
   13227          } else {
   13228             putIRegA(rN, res0, condT, Ijk_Boring);
   13229             putIRegA(rM, res1, condT, Ijk_Boring);
   13230          }
   13231          DIP("vmov%s, r%u, r%u, s%u, s%u\n",
   13232              nCC(conq), rN, rM, sD, sD + 1);
   13233          goto decode_success_vfp;
   13234       }
   13235    }
   13236 
   13237    // VMOV rD[x], rT  (ARM core register to scalar)
   13238    if (0x0E000B10 == (insn28 & 0x0F900F1F)) {
   13239       UInt rD  = (INSN(7,7) << 4) | INSN(19,16);
   13240       UInt rT  = INSN(15,12);
   13241       UInt opc = (INSN(22,21) << 2) | INSN(6,5);
   13242       UInt index;
   13243       if (rT == 15 || (isT && rT == 13)) {
   13244          /* fall through */
   13245       } else {
   13246          if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
   13247             index = opc & 7;
   13248             putDRegI64(rD, triop(Iop_SetElem8x8,
   13249                                  getDRegI64(rD),
   13250                                  mkU8(index),
   13251                                  unop(Iop_32to8,
   13252                                       isT ? getIRegT(rT) : getIRegA(rT))),
   13253                            condT);
   13254             DIP("vmov%s.8 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
   13255             goto decode_success_vfp;
   13256          }
   13257          else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
   13258             index = (opc >> 1) & 3;
   13259             putDRegI64(rD, triop(Iop_SetElem16x4,
   13260                                  getDRegI64(rD),
   13261                                  mkU8(index),
   13262                                  unop(Iop_32to16,
   13263                                       isT ? getIRegT(rT) : getIRegA(rT))),
   13264                            condT);
   13265             DIP("vmov%s.16 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
   13266             goto decode_success_vfp;
   13267          }
   13268          else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0)) {
   13269             index = (opc >> 2) & 1;
   13270             putDRegI64(rD, triop(Iop_SetElem32x2,
   13271                                  getDRegI64(rD),
   13272                                  mkU8(index),
   13273                                  isT ? getIRegT(rT) : getIRegA(rT)),
   13274                            condT);
   13275             DIP("vmov%s.32 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
   13276             goto decode_success_vfp;
   13277          } else {
   13278             /* fall through */
   13279          }
   13280       }
   13281    }
   13282 
   13283    // VMOV (scalar to ARM core register)
   13284    // VMOV rT, rD[x]
   13285    if (0x0E100B10 == (insn28 & 0x0F100F1F)) {
   13286       UInt rN  = (INSN(7,7) << 4) | INSN(19,16);
   13287       UInt rT  = INSN(15,12);
   13288       UInt U   = INSN(23,23);
   13289       UInt opc = (INSN(22,21) << 2) | INSN(6,5);
   13290       UInt index;
   13291       if (rT == 15 || (isT && rT == 13)) {
   13292          /* fall through */
   13293       } else {
   13294          if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
   13295             index = opc & 7;
   13296             IRExpr* e = unop(U ? Iop_8Uto32 : Iop_8Sto32,
   13297                              binop(Iop_GetElem8x8,
   13298                                    getDRegI64(rN),
   13299                                    mkU8(index)));
   13300             if (isT)
   13301                putIRegT(rT, e, condT);
   13302             else
   13303                putIRegA(rT, e, condT, Ijk_Boring);
   13304             DIP("vmov%s.%c8 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
   13305                   rT, rN, index);
   13306             goto decode_success_vfp;
   13307          }
   13308          else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
   13309             index = (opc >> 1) & 3;
   13310             IRExpr* e = unop(U ? Iop_16Uto32 : Iop_16Sto32,
   13311                              binop(Iop_GetElem16x4,
   13312                                    getDRegI64(rN),
   13313                                    mkU8(index)));
   13314             if (isT)
   13315                putIRegT(rT, e, condT);
   13316             else
   13317                putIRegA(rT, e, condT, Ijk_Boring);
   13318             DIP("vmov%s.%c16 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
   13319                   rT, rN, index);
   13320             goto decode_success_vfp;
   13321          }
   13322          else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0) && U == 0) {
   13323             index = (opc >> 2) & 1;
   13324             IRExpr* e = binop(Iop_GetElem32x2, getDRegI64(rN), mkU8(index));
   13325             if (isT)
   13326                putIRegT(rT, e, condT);
   13327             else
   13328                putIRegA(rT, e, condT, Ijk_Boring);
   13329             DIP("vmov%s.32 r%u, d%u[%u]\n", nCC(conq), rT, rN, index);
   13330             goto decode_success_vfp;
   13331          } else {
   13332             /* fall through */
   13333          }
   13334       }
   13335    }
   13336 
   13337    // VMOV.F32 sD, #imm
   13338    // FCONSTS sD, #imm
   13339    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   13340        && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,0)) {
   13341       UInt rD   = (INSN(15,12) << 1) | INSN(22,22);
   13342       UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
   13343       UInt b    = (imm8 >> 6) & 1;
   13344       UInt imm;
   13345       imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,(imm8 >> 5) & 1) << 8)
   13346              | ((imm8 & 0x1f) << 3);
   13347       imm <<= 16;
   13348       putFReg(rD, unop(Iop_ReinterpI32asF32, mkU32(imm)), condT);
   13349       DIP("fconsts%s s%u #%u", nCC(conq), rD, imm8);
   13350       goto decode_success_vfp;
   13351    }
   13352 
   13353    // VMOV.F64 dD, #imm
   13354    // FCONSTD dD, #imm
   13355    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   13356        && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,1)) {
   13357       UInt rD   = INSN(15,12) | (INSN(22,22) << 4);
   13358       UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
   13359       UInt b    = (imm8 >> 6) & 1;
   13360       ULong imm;
   13361       imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,b) << 8)
   13362              | BITS8(b,b,0,0,0,0,0,0) | (imm8 & 0x3f);
   13363       imm <<= 48;
   13364       putDReg(rD, unop(Iop_ReinterpI64asF64, mkU64(imm)), condT);
   13365       DIP("fconstd%s d%u #%u", nCC(conq), rD, imm8);
   13366       goto decode_success_vfp;
   13367    }
   13368 
   13369    /* ---------------------- vdup ------------------------- */
   13370    // VDUP dD, rT
   13371    // VDUP qD, rT
   13372    if (BITS8(1,1,1,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,1))
   13373        && BITS4(1,0,1,1) == INSN(11,8) && INSN(6,6) == 0 && INSN(4,4) == 1) {
   13374       UInt rD   = (INSN(7,7) << 4) | INSN(19,16);
   13375       UInt rT   = INSN(15,12);
   13376       UInt Q    = INSN(21,21);
   13377       UInt size = (INSN(22,22) << 1) | INSN(5,5);
   13378       if (rT == 15 || (isT && rT == 13) || size == 3 || (Q && (rD & 1))) {
   13379          /* fall through */
   13380       } else {
   13381          IRExpr* e = isT ? getIRegT(rT) : getIRegA(rT);
   13382          if (Q) {
   13383             rD >>= 1;
   13384             switch (size) {
   13385                case 0:
   13386                   putQReg(rD, unop(Iop_Dup32x4, e), condT);
   13387                   break;
   13388                case 1:
   13389                   putQReg(rD, unop(Iop_Dup16x8, unop(Iop_32to16, e)),
   13390                               condT);
   13391                   break;
   13392                case 2:
   13393                   putQReg(rD, unop(Iop_Dup8x16, unop(Iop_32to8, e)),
   13394                               condT);
   13395                   break;
   13396                default:
   13397                   vassert(0);
   13398             }
   13399             DIP("vdup.%u q%u, r%u\n", 32 / (1<<size), rD, rT);
   13400          } else {
   13401             switch (size) {
   13402                case 0:
   13403                   putDRegI64(rD, unop(Iop_Dup32x2, e), condT);
   13404                   break;
   13405                case 1:
   13406                   putDRegI64(rD, unop(Iop_Dup16x4, unop(Iop_32to16, e)),
   13407                                condT);
   13408                   break;
   13409                case 2:
   13410                   putDRegI64(rD, unop(Iop_Dup8x8, unop(Iop_32to8, e)),
   13411                                condT);
   13412                   break;
   13413                default:
   13414                   vassert(0);
   13415             }
   13416             DIP("vdup.%u d%u, r%u\n", 32 / (1<<size), rD, rT);
   13417          }
   13418          goto decode_success_vfp;
   13419       }
   13420    }
   13421 
   13422    /* --------------------- f{ld,st}d --------------------- */
   13423    // FLDD, FSTD
   13424    if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
   13425        && BITS4(1,0,1,1) == INSN(11,8)) {
   13426       UInt dD     = INSN(15,12) | (INSN(22,22) << 4);
   13427       UInt rN     = INSN(19,16);
   13428       UInt offset = (insn28 & 0xFF) << 2;
   13429       UInt bU     = (insn28 >> 23) & 1; /* 1: +offset  0: -offset */
   13430       UInt bL     = (insn28 >> 20) & 1; /* 1: load  0: store */
   13431       /* make unconditional */
   13432       if (condT != IRTemp_INVALID) {
   13433          if (isT)
   13434             mk_skip_over_T32_if_cond_is_false( condT );
   13435          else
   13436             mk_skip_over_A32_if_cond_is_false( condT );
   13437          condT = IRTemp_INVALID;
   13438       }
   13439       IRTemp ea = newTemp(Ity_I32);
   13440       assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
   13441                        align4if(isT ? getIRegT(rN) : getIRegA(rN),
   13442                                 rN == 15),
   13443                        mkU32(offset)));
   13444       if (bL) {
   13445          putDReg(dD, loadLE(Ity_F64,mkexpr(ea)), IRTemp_INVALID);
   13446       } else {
   13447          storeLE(mkexpr(ea), getDReg(dD));
   13448       }
   13449       DIP("f%sd%s d%u, [r%u, %c#%u]\n",
   13450           bL ? "ld" : "st", nCC(conq), dD, rN,
   13451           bU ? '+' : '-', offset);
   13452       goto decode_success_vfp;
   13453    }
   13454 
   13455    /* --------------------- dp insns (D) --------------------- */
   13456    if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
   13457        && BITS4(1,0,1,1) == INSN(11,8)
   13458        && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
   13459       UInt    dM  = INSN(3,0)   | (INSN(5,5) << 4);       /* argR */
   13460       UInt    dD  = INSN(15,12) | (INSN(22,22) << 4);   /* dst/acc */
   13461       UInt    dN  = INSN(19,16) | (INSN(7,7) << 4);     /* argL */
   13462       UInt    bP  = (insn28 >> 23) & 1;
   13463       UInt    bQ  = (insn28 >> 21) & 1;
   13464       UInt    bR  = (insn28 >> 20) & 1;
   13465       UInt    bS  = (insn28 >> 6) & 1;
   13466       UInt    opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
   13467       IRExpr* rm  = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
   13468       switch (opc) {
   13469          case BITS4(0,0,0,0): /* MAC: d + n * m */
   13470             putDReg(dD, triop(Iop_AddF64, rm,
   13471                               getDReg(dD),
   13472                               triop(Iop_MulF64, rm, getDReg(dN),
   13473                                                     getDReg(dM))),
   13474                         condT);
   13475             DIP("fmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13476             goto decode_success_vfp;
   13477          case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
   13478             putDReg(dD, triop(Iop_AddF64, rm,
   13479                               getDReg(dD),
   13480                               unop(Iop_NegF64,
   13481                                    triop(Iop_MulF64, rm, getDReg(dN),
   13482                                                          getDReg(dM)))),
   13483                         condT);
   13484             DIP("fnmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13485             goto decode_success_vfp;
   13486          case BITS4(0,0,1,0): /* MSC: - d + n * m */
   13487             putDReg(dD, triop(Iop_AddF64, rm,
   13488                               unop(Iop_NegF64, getDReg(dD)),
   13489                               triop(Iop_MulF64, rm, getDReg(dN),
   13490                                                     getDReg(dM))),
   13491                         condT);
   13492             DIP("fmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13493             goto decode_success_vfp;
   13494          case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
   13495             putDReg(dD, triop(Iop_AddF64, rm,
   13496                               unop(Iop_NegF64, getDReg(dD)),
   13497                               unop(Iop_NegF64,
   13498                                    triop(Iop_MulF64, rm, getDReg(dN),
   13499                                                          getDReg(dM)))),
   13500                         condT);
   13501             DIP("fnmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13502             goto decode_success_vfp;
   13503          case BITS4(0,1,0,0): /* MUL: n * m */
   13504             putDReg(dD, triop(Iop_MulF64, rm, getDReg(dN), getDReg(dM)),
   13505                         condT);
   13506             DIP("fmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13507             goto decode_success_vfp;
   13508          case BITS4(0,1,0,1): /* NMUL: - n * m */
   13509             putDReg(dD, unop(Iop_NegF64,
   13510                              triop(Iop_MulF64, rm, getDReg(dN),
   13511                                                    getDReg(dM))),
   13512                     condT);
   13513             DIP("fnmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13514             goto decode_success_vfp;
   13515          case BITS4(0,1,1,0): /* ADD: n + m */
   13516             putDReg(dD, triop(Iop_AddF64, rm, getDReg(dN), getDReg(dM)),
   13517                         condT);
   13518             DIP("faddd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13519             goto decode_success_vfp;
   13520          case BITS4(0,1,1,1): /* SUB: n - m */
   13521             putDReg(dD, triop(Iop_SubF64, rm, getDReg(dN), getDReg(dM)),
   13522                         condT);
   13523             DIP("fsubd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13524             goto decode_success_vfp;
   13525          case BITS4(1,0,0,0): /* DIV: n / m */
   13526             putDReg(dD, triop(Iop_DivF64, rm, getDReg(dN), getDReg(dM)),
   13527                         condT);
   13528             DIP("fdivd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13529             goto decode_success_vfp;
   13530          case BITS4(1,1,0,0): /* VFMA: d + n * m (fused) */
   13531             /* XXXROUNDINGFIXME look up ARM reference for fused
   13532                multiply-add rounding */
   13533             putDReg(dD, triop(Iop_AddF64, rm,
   13534                               getDReg(dD),
   13535                               triop(Iop_MulF64, rm, getDReg(dN),
   13536                                                     getDReg(dM))),
   13537                         condT);
   13538             DIP("vfmad%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13539             goto decode_success_vfp;
   13540          case BITS4(1,1,0,1): /* VFMS: d + (-n * m) (fused) */
   13541             /* XXXROUNDINGFIXME look up ARM reference for fused
   13542                multiply-add rounding */
   13543             putDReg(dD, triop(Iop_AddF64, rm,
   13544                               getDReg(dD),
   13545                               triop(Iop_MulF64, rm,
   13546                                     unop(Iop_NegF64, getDReg(dN)),
   13547                                     getDReg(dM))),
   13548                         condT);
   13549             DIP("vfmsd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13550             goto decode_success_vfp;
   13551          default:
   13552             break;
   13553       }
   13554    }
   13555 
   13556    /* --------------------- compares (D) --------------------- */
   13557    /*          31   27   23   19   15 11   7    3
   13558                  28   24   20   16 12    8    4    0
   13559       FCMPD    cond 1110 1D11 0100 Dd 1011 0100 Dm
   13560       FCMPED   cond 1110 1D11 0100 Dd 1011 1100 Dm
   13561       FCMPZD   cond 1110 1D11 0101 Dd 1011 0100 0000
   13562       FCMPZED  cond 1110 1D11 0101 Dd 1011 1100 0000
   13563                                  Z         N
   13564 
   13565       Z=0 Compare Dd vs Dm     and set FPSCR 31:28 accordingly
   13566       Z=1 Compare Dd vs zero
   13567 
   13568       N=1 generates Invalid Operation exn if either arg is any kind of NaN
   13569       N=0 generates Invalid Operation exn if either arg is a signalling NaN
   13570       (Not that we pay any attention to N here)
   13571    */
   13572    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   13573        && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
   13574        && BITS4(1,0,1,1) == INSN(11,8)
   13575        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   13576       UInt bZ = (insn28 >> 16) & 1;
   13577       UInt bN = (insn28 >> 7) & 1;
   13578       UInt dD = INSN(15,12) | (INSN(22,22) << 4);
   13579       UInt dM = INSN(3,0) | (INSN(5,5) << 4);
   13580       if (bZ && INSN(3,0) != 0) {
   13581          /* does not decode; fall through */
   13582       } else {
   13583          IRTemp argL = newTemp(Ity_F64);
   13584          IRTemp argR = newTemp(Ity_F64);
   13585          IRTemp irRes = newTemp(Ity_I32);
   13586          assign(argL, getDReg(dD));
   13587          assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0)) : getDReg(dM));
   13588          assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
   13589 
   13590          IRTemp nzcv     = IRTemp_INVALID;
   13591          IRTemp oldFPSCR = newTemp(Ity_I32);
   13592          IRTemp newFPSCR = newTemp(Ity_I32);
   13593 
   13594          /* This is where the fun starts.  We have to convert 'irRes'
   13595             from an IR-convention return result (IRCmpF64Result) to an
   13596             ARM-encoded (N,Z,C,V) group.  The final result is in the
   13597             bottom 4 bits of 'nzcv'. */
   13598          /* Map compare result from IR to ARM(nzcv) */
   13599          /*
   13600             FP cmp result | IR   | ARM(nzcv)
   13601             --------------------------------
   13602             UN              0x45   0011
   13603             LT              0x01   1000
   13604             GT              0x00   0010
   13605             EQ              0x40   0110
   13606          */
   13607          nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
   13608 
   13609          /* And update FPSCR accordingly */
   13610          assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
   13611          assign(newFPSCR,
   13612                 binop(Iop_Or32,
   13613                       binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
   13614                       binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
   13615 
   13616          putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
   13617 
   13618          if (bZ) {
   13619             DIP("fcmpz%sd%s d%u\n", bN ? "e" : "", nCC(conq), dD);
   13620          } else {
   13621             DIP("fcmp%sd%s d%u, d%u\n", bN ? "e" : "", nCC(conq), dD, dM);
   13622          }
   13623          goto decode_success_vfp;
   13624       }
   13625       /* fall through */
   13626    }
   13627 
   13628    /* --------------------- unary (D) --------------------- */
   13629    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   13630        && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
   13631        && BITS4(1,0,1,1) == INSN(11,8)
   13632        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   13633       UInt dD  = INSN(15,12) | (INSN(22,22) << 4);
   13634       UInt dM  = INSN(3,0) | (INSN(5,5) << 4);
   13635       UInt b16 = (insn28 >> 16) & 1;
   13636       UInt b7  = (insn28 >> 7) & 1;
   13637       /**/ if (b16 == 0 && b7 == 0) {
   13638          // FCPYD
   13639          putDReg(dD, getDReg(dM), condT);
   13640          DIP("fcpyd%s d%u, d%u\n", nCC(conq), dD, dM);
   13641          goto decode_success_vfp;
   13642       }
   13643       else if (b16 == 0 && b7 == 1) {
   13644          // FABSD
   13645          putDReg(dD, unop(Iop_AbsF64, getDReg(dM)), condT);
   13646          DIP("fabsd%s d%u, d%u\n", nCC(conq), dD, dM);
   13647          goto decode_success_vfp;
   13648       }
   13649       else if (b16 == 1 && b7 == 0) {
   13650          // FNEGD
   13651          putDReg(dD, unop(Iop_NegF64, getDReg(dM)), condT);
   13652          DIP("fnegd%s d%u, d%u\n", nCC(conq), dD, dM);
   13653          goto decode_success_vfp;
   13654       }
   13655       else if (b16 == 1 && b7 == 1) {
   13656          // FSQRTD
   13657          IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
   13658          putDReg(dD, binop(Iop_SqrtF64, rm, getDReg(dM)), condT);
   13659          DIP("fsqrtd%s d%u, d%u\n", nCC(conq), dD, dM);
   13660          goto decode_success_vfp;
   13661       }
   13662       else
   13663          vassert(0);
   13664 
   13665       /* fall through */
   13666    }
   13667 
   13668    /* ----------------- I <-> D conversions ----------------- */
   13669 
   13670    // F{S,U}ITOD dD, fM
   13671    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   13672        && BITS4(1,0,0,0) == (INSN(19,16) & BITS4(1,1,1,1))
   13673        && BITS4(1,0,1,1) == INSN(11,8)
   13674        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   13675       UInt bM    = (insn28 >> 5) & 1;
   13676       UInt fM    = (INSN(3,0) << 1) | bM;
   13677       UInt dD    = INSN(15,12) | (INSN(22,22) << 4);
   13678       UInt syned = (insn28 >> 7) & 1;
   13679       if (syned) {
   13680          // FSITOD
   13681          putDReg(dD, unop(Iop_I32StoF64,
   13682                           unop(Iop_ReinterpF32asI32, getFReg(fM))),
   13683                  condT);
   13684          DIP("fsitod%s d%u, s%u\n", nCC(conq), dD, fM);
   13685       } else {
   13686          // FUITOD
   13687          putDReg(dD, unop(Iop_I32UtoF64,
   13688                           unop(Iop_ReinterpF32asI32, getFReg(fM))),
   13689                  condT);
   13690          DIP("fuitod%s d%u, s%u\n", nCC(conq), dD, fM);
   13691       }
   13692       goto decode_success_vfp;
   13693    }
   13694 
   13695    // FTO{S,U}ID fD, dM
   13696    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   13697        && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
   13698        && BITS4(1,0,1,1) == INSN(11,8)
   13699        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   13700       UInt   bD    = (insn28 >> 22) & 1;
   13701       UInt   fD    = (INSN(15,12) << 1) | bD;
   13702       UInt   dM    = INSN(3,0) | (INSN(5,5) << 4);
   13703       UInt   bZ    = (insn28 >> 7) & 1;
   13704       UInt   syned = (insn28 >> 16) & 1;
   13705       IRTemp rmode = newTemp(Ity_I32);
   13706       assign(rmode, bZ ? mkU32(Irrm_ZERO)
   13707                        : mkexpr(mk_get_IR_rounding_mode()));
   13708       if (syned) {
   13709          // FTOSID
   13710          putFReg(fD, unop(Iop_ReinterpI32asF32,
   13711                           binop(Iop_F64toI32S, mkexpr(rmode),
   13712                                 getDReg(dM))),
   13713                  condT);
   13714          DIP("ftosi%sd%s s%u, d%u\n", bZ ? "z" : "",
   13715              nCC(conq), fD, dM);
   13716       } else {
   13717          // FTOUID
   13718          putFReg(fD, unop(Iop_ReinterpI32asF32,
   13719                           binop(Iop_F64toI32U, mkexpr(rmode),
   13720                                 getDReg(dM))),
   13721                  condT);
   13722          DIP("ftoui%sd%s s%u, d%u\n", bZ ? "z" : "",
   13723              nCC(conq), fD, dM);
   13724       }
   13725       goto decode_success_vfp;
   13726    }
   13727 
   13728    /* ----------------------------------------------------------- */
   13729    /* -- VFP instructions -- single precision                  -- */
   13730    /* ----------------------------------------------------------- */
   13731 
   13732    /* --------------------- fldms, fstms --------------------- */
   13733    /*
   13734                                  31   27   23   19 15 11   7   0
   13735                                          P UDWL
   13736       C4-98, C5-26   1  FSTMD    cond 1100 1x00 Rn Fd 1010 offset
   13737       C4-98, C5-28   2  FSTMDIA  cond 1100 1x10 Rn Fd 1010 offset
   13738       C4-98, C5-30   3  FSTMDDB  cond 1101 0x10 Rn Fd 1010 offset
   13739 
   13740       C4-40, C5-26   1  FLDMD    cond 1100 1x01 Rn Fd 1010 offset
   13741       C4-40, C5-26   2  FLDMIAD  cond 1100 1x11 Rn Fd 1010 offset
   13742       C4-40, C5-26   3  FLDMDBD  cond 1101 0x11 Rn Fd 1010 offset
   13743 
   13744       Regs transferred: F(Fd:D) .. F(Fd:d + offset)
   13745       offset must not imply a reg > 15
   13746       IA/DB: Rn is changed by (4 x # regs transferred)
   13747 
   13748       case coding:
   13749          1  at-Rn   (access at Rn)
   13750          2  ia-Rn   (access at Rn, then Rn += 4n)
   13751          3  db-Rn   (Rn -= 4n,     then access at Rn)
   13752    */
   13753    if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
   13754        && INSN(11,8) == BITS4(1,0,1,0)) {
   13755       UInt bP      = (insn28 >> 24) & 1;
   13756       UInt bU      = (insn28 >> 23) & 1;
   13757       UInt bW      = (insn28 >> 21) & 1;
   13758       UInt bL      = (insn28 >> 20) & 1;
   13759       UInt bD      = (insn28 >> 22) & 1;
   13760       UInt offset  = (insn28 >> 0) & 0xFF;
   13761       UInt rN      = INSN(19,16);
   13762       UInt fD      = (INSN(15,12) << 1) | bD;
   13763       UInt nRegs   = offset;
   13764       UInt summary = 0;
   13765       Int  i;
   13766 
   13767       /**/ if (bP == 0 && bU == 1 && bW == 0) {
   13768          summary = 1;
   13769       }
   13770       else if (bP == 0 && bU == 1 && bW == 1) {
   13771          summary = 2;
   13772       }
   13773       else if (bP == 1 && bU == 0 && bW == 1) {
   13774          summary = 3;
   13775       }
   13776       else goto after_vfp_fldms_fstms;
   13777 
   13778       /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
   13779       if (rN == 15 && (summary == 2 || summary == 3 || isT))
   13780          goto after_vfp_fldms_fstms;
   13781 
   13782       /* offset must specify at least one register */
   13783       if (offset < 1)
   13784          goto after_vfp_fldms_fstms;
   13785 
   13786       /* can't transfer regs after S31 */
   13787       if (fD + nRegs - 1 >= 32)
   13788          goto after_vfp_fldms_fstms;
   13789 
   13790       /* Now, we can't do a conditional load or store, since that very
   13791          likely will generate an exception.  So we have to take a side
   13792          exit at this point if the condition is false. */
   13793       if (condT != IRTemp_INVALID) {
   13794          if (isT)
   13795             mk_skip_over_T32_if_cond_is_false( condT );
   13796          else
   13797             mk_skip_over_A32_if_cond_is_false( condT );
   13798          condT = IRTemp_INVALID;
   13799       }
   13800       /* Ok, now we're unconditional.  Do the load or store. */
   13801 
   13802       /* get the old Rn value */
   13803       IRTemp rnT = newTemp(Ity_I32);
   13804       assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
   13805                            rN == 15));
   13806 
   13807       /* make a new value for Rn, post-insn */
   13808       IRTemp rnTnew = IRTemp_INVALID;
   13809       if (summary == 2 || summary == 3) {
   13810          rnTnew = newTemp(Ity_I32);
   13811          assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
   13812                               mkexpr(rnT),
   13813                               mkU32(4 * nRegs)));
   13814       }
   13815 
   13816       /* decide on the base transfer address */
   13817       IRTemp taT = newTemp(Ity_I32);
   13818       assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
   13819 
   13820       /* update Rn if necessary -- in case 3, we're moving it down, so
   13821          update before any memory reference, in order to keep Memcheck
   13822          and V's stack-extending logic (on linux) happy */
   13823       if (summary == 3) {
   13824          if (isT)
   13825             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
   13826          else
   13827             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
   13828       }
   13829 
   13830       /* generate the transfers */
   13831       for (i = 0; i < nRegs; i++) {
   13832          IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(4*i));
   13833          if (bL) {
   13834             putFReg(fD + i, loadLE(Ity_F32, addr), IRTemp_INVALID);
   13835          } else {
   13836             storeLE(addr, getFReg(fD + i));
   13837          }
   13838       }
   13839 
   13840       /* update Rn if necessary -- in case 2, we're moving it up, so
   13841          update after any memory reference, in order to keep Memcheck
   13842          and V's stack-extending logic (on linux) happy */
   13843       if (summary == 2) {
   13844          if (isT)
   13845             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
   13846          else
   13847             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
   13848       }
   13849 
   13850       const HChar* nm = bL==1 ? "ld" : "st";
   13851       switch (summary) {
   13852          case 1:  DIP("f%sms%s r%u, {s%u-s%u}\n",
   13853                       nm, nCC(conq), rN, fD, fD + nRegs - 1);
   13854                   break;
   13855          case 2:  DIP("f%smias%s r%u!, {s%u-s%u}\n",
   13856                       nm, nCC(conq), rN, fD, fD + nRegs - 1);
   13857                   break;
   13858          case 3:  DIP("f%smdbs%s r%u!, {s%u-s%u}\n",
   13859                       nm, nCC(conq), rN, fD, fD + nRegs - 1);
   13860                   break;
   13861          default: vassert(0);
   13862       }
   13863 
   13864       goto decode_success_vfp;
   13865       /* FIXME alignment constraints? */
   13866    }
   13867 
   13868   after_vfp_fldms_fstms:
   13869 
   13870    /* --------------------- fmsr, fmrs --------------------- */
   13871    if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
   13872        && BITS4(1,0,1,0) == INSN(11,8)
   13873        && BITS4(0,0,0,0) == INSN(3,0)
   13874        && BITS4(0,0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
   13875       UInt rD  = INSN(15,12);
   13876       UInt b7  = (insn28 >> 7) & 1;
   13877       UInt fN  = (INSN(19,16) << 1) | b7;
   13878       UInt b20 = (insn28 >> 20) & 1;
   13879       if (rD == 15) {
   13880          /* fall through */
   13881          /* Let's assume that no sane person would want to do
   13882             floating-point transfers to or from the program counter,
   13883             and simply decline to decode the instruction.  The ARM ARM
   13884             doesn't seem to explicitly disallow this case, though. */
   13885       } else {
   13886          if (b20) {
   13887             IRExpr* res = unop(Iop_ReinterpF32asI32, getFReg(fN));
   13888             if (isT)
   13889                putIRegT(rD, res, condT);
   13890             else
   13891                putIRegA(rD, res, condT, Ijk_Boring);
   13892             DIP("fmrs%s r%u, s%u\n", nCC(conq), rD, fN);
   13893          } else {
   13894             putFReg(fN, unop(Iop_ReinterpI32asF32,
   13895                              isT ? getIRegT(rD) : getIRegA(rD)),
   13896                         condT);
   13897             DIP("fmsr%s s%u, r%u\n", nCC(conq), fN, rD);
   13898          }
   13899          goto decode_success_vfp;
   13900       }
   13901       /* fall through */
   13902    }
   13903 
   13904    /* --------------------- f{ld,st}s --------------------- */
   13905    // FLDS, FSTS
   13906    if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
   13907        && BITS4(1,0,1,0) == INSN(11,8)) {
   13908       UInt bD     = (insn28 >> 22) & 1;
   13909       UInt fD     = (INSN(15,12) << 1) | bD;
   13910       UInt rN     = INSN(19,16);
   13911       UInt offset = (insn28 & 0xFF) << 2;
   13912       UInt bU     = (insn28 >> 23) & 1; /* 1: +offset  0: -offset */
   13913       UInt bL     = (insn28 >> 20) & 1; /* 1: load  0: store */
   13914       /* make unconditional */
   13915       if (condT != IRTemp_INVALID) {
   13916          if (isT)
   13917             mk_skip_over_T32_if_cond_is_false( condT );
   13918          else
   13919             mk_skip_over_A32_if_cond_is_false( condT );
   13920          condT = IRTemp_INVALID;
   13921       }
   13922       IRTemp ea = newTemp(Ity_I32);
   13923       assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
   13924                        align4if(isT ? getIRegT(rN) : getIRegA(rN),
   13925                                 rN == 15),
   13926                        mkU32(offset)));
   13927       if (bL) {
   13928          putFReg(fD, loadLE(Ity_F32,mkexpr(ea)), IRTemp_INVALID);
   13929       } else {
   13930          storeLE(mkexpr(ea), getFReg(fD));
   13931       }
   13932       DIP("f%ss%s s%u, [r%u, %c#%u]\n",
   13933           bL ? "ld" : "st", nCC(conq), fD, rN,
   13934           bU ? '+' : '-', offset);
   13935       goto decode_success_vfp;
   13936    }
   13937 
   13938    /* --------------------- dp insns (F) --------------------- */
   13939    if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
   13940        && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
   13941        && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
   13942       UInt    bM  = (insn28 >> 5) & 1;
   13943       UInt    bD  = (insn28 >> 22) & 1;
   13944       UInt    bN  = (insn28 >> 7) & 1;
   13945       UInt    fM  = (INSN(3,0) << 1) | bM;   /* argR */
   13946       UInt    fD  = (INSN(15,12) << 1) | bD; /* dst/acc */
   13947       UInt    fN  = (INSN(19,16) << 1) | bN; /* argL */
   13948       UInt    bP  = (insn28 >> 23) & 1;
   13949       UInt    bQ  = (insn28 >> 21) & 1;
   13950       UInt    bR  = (insn28 >> 20) & 1;
   13951       UInt    bS  = (insn28 >> 6) & 1;
   13952       UInt    opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
   13953       IRExpr* rm  = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
   13954       switch (opc) {
   13955          case BITS4(0,0,0,0): /* MAC: d + n * m */
   13956             putFReg(fD, triop(Iop_AddF32, rm,
   13957                               getFReg(fD),
   13958                               triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
   13959                         condT);
   13960             DIP("fmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   13961             goto decode_success_vfp;
   13962          case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
   13963             putFReg(fD, triop(Iop_AddF32, rm,
   13964                               getFReg(fD),
   13965                               unop(Iop_NegF32,
   13966                                    triop(Iop_MulF32, rm, getFReg(fN),
   13967                                                          getFReg(fM)))),
   13968                         condT);
   13969             DIP("fnmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   13970             goto decode_success_vfp;
   13971          case BITS4(0,0,1,0): /* MSC: - d + n * m */
   13972             putFReg(fD, triop(Iop_AddF32, rm,
   13973                               unop(Iop_NegF32, getFReg(fD)),
   13974                               triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
   13975                         condT);
   13976             DIP("fmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   13977             goto decode_success_vfp;
   13978          case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
   13979             putFReg(fD, triop(Iop_AddF32, rm,
   13980                               unop(Iop_NegF32, getFReg(fD)),
   13981                               unop(Iop_NegF32,
   13982                                    triop(Iop_MulF32, rm,
   13983                                                      getFReg(fN),
   13984                                                     getFReg(fM)))),
   13985                         condT);
   13986             DIP("fnmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   13987             goto decode_success_vfp;
   13988          case BITS4(0,1,0,0): /* MUL: n * m */
   13989             putFReg(fD, triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM)),
   13990                         condT);
   13991             DIP("fmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   13992             goto decode_success_vfp;
   13993          case BITS4(0,1,0,1): /* NMUL: - n * m */
   13994             putFReg(fD, unop(Iop_NegF32,
   13995                              triop(Iop_MulF32, rm, getFReg(fN),
   13996                                                    getFReg(fM))),
   13997                     condT);
   13998             DIP("fnmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   13999             goto decode_success_vfp;
   14000          case BITS4(0,1,1,0): /* ADD: n + m */
   14001             putFReg(fD, triop(Iop_AddF32, rm, getFReg(fN), getFReg(fM)),
   14002                         condT);
   14003             DIP("fadds%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   14004             goto decode_success_vfp;
   14005          case BITS4(0,1,1,1): /* SUB: n - m */
   14006             putFReg(fD, triop(Iop_SubF32, rm, getFReg(fN), getFReg(fM)),
   14007                         condT);
   14008             DIP("fsubs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   14009             goto decode_success_vfp;
   14010          case BITS4(1,0,0,0): /* DIV: n / m */
   14011             putFReg(fD, triop(Iop_DivF32, rm, getFReg(fN), getFReg(fM)),
   14012                         condT);
   14013             DIP("fdivs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   14014             goto decode_success_vfp;
   14015          case BITS4(1,1,0,0): /* VFMA: d + n * m (fused) */
   14016             /* XXXROUNDINGFIXME look up ARM reference for fused
   14017                multiply-add rounding */
   14018             putFReg(fD, triop(Iop_AddF32, rm,
   14019                               getFReg(fD),
   14020                               triop(Iop_MulF32, rm, getFReg(fN),
   14021                                                     getFReg(fM))),
   14022                         condT);
   14023             DIP("vfmas%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   14024             goto decode_success_vfp;
   14025          case BITS4(1,1,0,1): /* VFMS: d + (-n * m) (fused) */
   14026             /* XXXROUNDINGFIXME look up ARM reference for fused
   14027                multiply-add rounding */
   14028             putFReg(fD, triop(Iop_AddF32, rm,
   14029                               getFReg(fD),
   14030                               triop(Iop_MulF32, rm,
   14031                                     unop(Iop_NegF32, getFReg(fN)),
   14032                                     getFReg(fM))),
   14033                         condT);
   14034             DIP("vfmss%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   14035             goto decode_success_vfp;
   14036          default:
   14037             break;
   14038       }
   14039    }
   14040 
   14041    /* --------------------- compares (S) --------------------- */
   14042    /*          31   27   23   19   15 11   7    3
   14043                  28   24   20   16 12    8    4    0
   14044       FCMPS    cond 1110 1D11 0100 Fd 1010 01M0 Fm
   14045       FCMPES   cond 1110 1D11 0100 Fd 1010 11M0 Fm
   14046       FCMPZS   cond 1110 1D11 0101 Fd 1010 0100 0000
   14047       FCMPZED  cond 1110 1D11 0101 Fd 1010 1100 0000
   14048                                  Z         N
   14049 
   14050       Z=0 Compare Fd:D vs Fm:M     and set FPSCR 31:28 accordingly
   14051       Z=1 Compare Fd:D vs zero
   14052 
   14053       N=1 generates Invalid Operation exn if either arg is any kind of NaN
   14054       N=0 generates Invalid Operation exn if either arg is a signalling NaN
   14055       (Not that we pay any attention to N here)
   14056    */
   14057    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   14058        && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
   14059        && BITS4(1,0,1,0) == INSN(11,8)
   14060        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   14061       UInt bZ = (insn28 >> 16) & 1;
   14062       UInt bN = (insn28 >> 7) & 1;
   14063       UInt bD = (insn28 >> 22) & 1;
   14064       UInt bM = (insn28 >> 5) & 1;
   14065       UInt fD = (INSN(15,12) << 1) | bD;
   14066       UInt fM = (INSN(3,0) << 1) | bM;
   14067       if (bZ && (INSN(3,0) != 0 || (INSN(7,4) & 3) != 0)) {
   14068          /* does not decode; fall through */
   14069       } else {
   14070          IRTemp argL = newTemp(Ity_F64);
   14071          IRTemp argR = newTemp(Ity_F64);
   14072          IRTemp irRes = newTemp(Ity_I32);
   14073 
   14074          assign(argL, unop(Iop_F32toF64, getFReg(fD)));
   14075          assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0))
   14076                          : unop(Iop_F32toF64, getFReg(fM)));
   14077          assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
   14078 
   14079          IRTemp nzcv     = IRTemp_INVALID;
   14080          IRTemp oldFPSCR = newTemp(Ity_I32);
   14081          IRTemp newFPSCR = newTemp(Ity_I32);
   14082 
   14083          /* This is where the fun starts.  We have to convert 'irRes'
   14084             from an IR-convention return result (IRCmpF64Result) to an
   14085             ARM-encoded (N,Z,C,V) group.  The final result is in the
   14086             bottom 4 bits of 'nzcv'. */
   14087          /* Map compare result from IR to ARM(nzcv) */
   14088          /*
   14089             FP cmp result | IR   | ARM(nzcv)
   14090             --------------------------------
   14091             UN              0x45   0011
   14092             LT              0x01   1000
   14093             GT              0x00   0010
   14094             EQ              0x40   0110
   14095          */
   14096          nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
   14097 
   14098          /* And update FPSCR accordingly */
   14099          assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
   14100          assign(newFPSCR,
   14101                 binop(Iop_Or32,
   14102                       binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
   14103                       binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
   14104 
   14105          putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
   14106 
   14107          if (bZ) {
   14108             DIP("fcmpz%ss%s s%u\n", bN ? "e" : "", nCC(conq), fD);
   14109          } else {
   14110             DIP("fcmp%ss%s s%u, s%u\n", bN ? "e" : "",
   14111                 nCC(conq), fD, fM);
   14112          }
   14113          goto decode_success_vfp;
   14114       }
   14115       /* fall through */
   14116    }
   14117 
   14118    /* --------------------- unary (S) --------------------- */
   14119    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   14120        && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
   14121        && BITS4(1,0,1,0) == INSN(11,8)
   14122        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   14123       UInt bD = (insn28 >> 22) & 1;
   14124       UInt bM = (insn28 >> 5) & 1;
   14125       UInt fD  = (INSN(15,12) << 1) | bD;
   14126       UInt fM  = (INSN(3,0) << 1) | bM;
   14127       UInt b16 = (insn28 >> 16) & 1;
   14128       UInt b7  = (insn28 >> 7) & 1;
   14129       /**/ if (b16 == 0 && b7 == 0) {
   14130          // FCPYS
   14131          putFReg(fD, getFReg(fM), condT);
   14132          DIP("fcpys%s s%u, s%u\n", nCC(conq), fD, fM);
   14133          goto decode_success_vfp;
   14134       }
   14135       else if (b16 == 0 && b7 == 1) {
   14136          // FABSS
   14137          putFReg(fD, unop(Iop_AbsF32, getFReg(fM)), condT);
   14138          DIP("fabss%s s%u, s%u\n", nCC(conq), fD, fM);
   14139          goto decode_success_vfp;
   14140       }
   14141       else if (b16 == 1 && b7 == 0) {
   14142          // FNEGS
   14143          putFReg(fD, unop(Iop_NegF32, getFReg(fM)), condT);
   14144          DIP("fnegs%s s%u, s%u\n", nCC(conq), fD, fM);
   14145          goto decode_success_vfp;
   14146       }
   14147       else if (b16 == 1 && b7 == 1) {
   14148          // FSQRTS
   14149          IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
   14150          putFReg(fD, binop(Iop_SqrtF32, rm, getFReg(fM)), condT);
   14151          DIP("fsqrts%s s%u, s%u\n", nCC(conq), fD, fM);
   14152          goto decode_success_vfp;
   14153       }
   14154       else
   14155          vassert(0);
   14156 
   14157       /* fall through */
   14158    }
   14159 
   14160    /* ----------------- I <-> S conversions ----------------- */
   14161 
   14162    // F{S,U}ITOS fD, fM
   14163    /* These are more complex than FSITOD/FUITOD.  In the D cases, a 32
   14164       bit int will always fit within the 53 bit mantissa, so there's
   14165       no possibility of a loss of precision, but that's obviously not
   14166       the case here.  Hence this case possibly requires rounding, and
   14167       so it drags in the current rounding mode. */
   14168    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   14169        && BITS4(1,0,0,0) == INSN(19,16)
   14170        && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
   14171        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   14172       UInt bM    = (insn28 >> 5) & 1;
   14173       UInt bD    = (insn28 >> 22) & 1;
   14174       UInt fM    = (INSN(3,0) << 1) | bM;
   14175       UInt fD    = (INSN(15,12) << 1) | bD;
   14176       UInt syned = (insn28 >> 7) & 1;
   14177       IRTemp rmode = newTemp(Ity_I32);
   14178       assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
   14179       if (syned) {
   14180          // FSITOS
   14181          putFReg(fD, binop(Iop_F64toF32,
   14182                            mkexpr(rmode),
   14183                            unop(Iop_I32StoF64,
   14184                                 unop(Iop_ReinterpF32asI32, getFReg(fM)))),
   14185                  condT);
   14186          DIP("fsitos%s s%u, s%u\n", nCC(conq), fD, fM);
   14187       } else {
   14188          // FUITOS
   14189          putFReg(fD, binop(Iop_F64toF32,
   14190                            mkexpr(rmode),
   14191                            unop(Iop_I32UtoF64,
   14192                                 unop(Iop_ReinterpF32asI32, getFReg(fM)))),
   14193                  condT);
   14194          DIP("fuitos%s s%u, s%u\n", nCC(conq), fD, fM);
   14195       }
   14196       goto decode_success_vfp;
   14197    }
   14198 
   14199    // FTO{S,U}IS fD, fM
   14200    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   14201        && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
   14202        && BITS4(1,0,1,0) == INSN(11,8)
   14203        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   14204       UInt   bM    = (insn28 >> 5) & 1;
   14205       UInt   bD    = (insn28 >> 22) & 1;
   14206       UInt   fD    = (INSN(15,12) << 1) | bD;
   14207       UInt   fM    = (INSN(3,0) << 1) | bM;
   14208       UInt   bZ    = (insn28 >> 7) & 1;
   14209       UInt   syned = (insn28 >> 16) & 1;
   14210       IRTemp rmode = newTemp(Ity_I32);
   14211       assign(rmode, bZ ? mkU32(Irrm_ZERO)
   14212                        : mkexpr(mk_get_IR_rounding_mode()));
   14213       if (syned) {
   14214          // FTOSIS
   14215          putFReg(fD, unop(Iop_ReinterpI32asF32,
   14216                           binop(Iop_F64toI32S, mkexpr(rmode),
   14217                                 unop(Iop_F32toF64, getFReg(fM)))),
   14218                  condT);
   14219          DIP("ftosi%ss%s s%u, d%u\n", bZ ? "z" : "",
   14220              nCC(conq), fD, fM);
   14221          goto decode_success_vfp;
   14222       } else {
   14223          // FTOUIS
   14224          putFReg(fD, unop(Iop_ReinterpI32asF32,
   14225                           binop(Iop_F64toI32U, mkexpr(rmode),
   14226                                 unop(Iop_F32toF64, getFReg(fM)))),
   14227                  condT);
   14228          DIP("ftoui%ss%s s%u, d%u\n", bZ ? "z" : "",
   14229              nCC(conq), fD, fM);
   14230          goto decode_success_vfp;
   14231       }
   14232    }
   14233 
   14234    /* ----------------- S <-> D conversions ----------------- */
   14235 
   14236    // FCVTDS
   14237    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   14238        && BITS4(0,1,1,1) == INSN(19,16)
   14239        && BITS4(1,0,1,0) == INSN(11,8)
   14240        && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
   14241       UInt dD = INSN(15,12) | (INSN(22,22) << 4);
   14242       UInt bM = (insn28 >> 5) & 1;
   14243       UInt fM = (INSN(3,0) << 1) | bM;
   14244       putDReg(dD, unop(Iop_F32toF64, getFReg(fM)), condT);
   14245       DIP("fcvtds%s d%u, s%u\n", nCC(conq), dD, fM);
   14246       goto decode_success_vfp;
   14247    }
   14248 
   14249    // FCVTSD
   14250    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   14251        && BITS4(0,1,1,1) == INSN(19,16)
   14252        && BITS4(1,0,1,1) == INSN(11,8)
   14253        && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
   14254       UInt   bD    = (insn28 >> 22) & 1;
   14255       UInt   fD    = (INSN(15,12) << 1) | bD;
   14256       UInt   dM    = INSN(3,0) | (INSN(5,5) << 4);
   14257       IRTemp rmode = newTemp(Ity_I32);
   14258       assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
   14259       putFReg(fD, binop(Iop_F64toF32, mkexpr(rmode), getDReg(dM)),
   14260                   condT);
   14261       DIP("fcvtsd%s s%u, d%u\n", nCC(conq), fD, dM);
   14262       goto decode_success_vfp;
   14263    }
   14264 
   14265    /* --------------- VCVT fixed<->floating, VFP --------------- */
   14266    /*          31   27   23   19   15 11   7    3
   14267                  28   24   20   16 12    8    4    0
   14268 
   14269                cond 1110 1D11 1p1U Vd 101f x1i0 imm4
   14270 
   14271       VCVT<c>.<Td>.F64 <Dd>, <Dd>, #fbits
   14272       VCVT<c>.<Td>.F32 <Dd>, <Dd>, #fbits
   14273       VCVT<c>.F64.<Td> <Dd>, <Dd>, #fbits
   14274       VCVT<c>.F32.<Td> <Dd>, <Dd>, #fbits
   14275       are of this form.  We only handle a subset of the cases though.
   14276    */
   14277    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   14278        && BITS4(1,0,1,0) == (INSN(19,16) & BITS4(1,0,1,0))
   14279        && BITS3(1,0,1) == INSN(11,9)
   14280        && BITS3(1,0,0) == (INSN(6,4) & BITS3(1,0,1))) {
   14281       UInt bD        = INSN(22,22);
   14282       UInt bOP       = INSN(18,18);
   14283       UInt bU        = INSN(16,16);
   14284       UInt Vd        = INSN(15,12);
   14285       UInt bSF       = INSN(8,8);
   14286       UInt bSX       = INSN(7,7);
   14287       UInt bI        = INSN(5,5);
   14288       UInt imm4      = INSN(3,0);
   14289       Bool to_fixed  = bOP == 1;
   14290       Bool dp_op     = bSF == 1;
   14291       Bool unsyned   = bU == 1;
   14292       UInt size      = bSX == 0 ? 16 : 32;
   14293       Int  frac_bits = size - ((imm4 << 1) | bI);
   14294       UInt d         = dp_op  ? ((bD << 4) | Vd)  : ((Vd << 1) | bD);
   14295       if (frac_bits >= 1 && frac_bits <= 32 && !to_fixed && !dp_op
   14296                                             && size == 32) {
   14297          /* VCVT.F32.{S,U}32 S[d], S[d], #frac_bits */
   14298          /* This generates really horrible code.  We could potentially
   14299             do much better. */
   14300          IRTemp rmode = newTemp(Ity_I32);
   14301          assign(rmode, mkU32(Irrm_NEAREST)); // per the spec
   14302          IRTemp src32 = newTemp(Ity_I32);
   14303          assign(src32,  unop(Iop_ReinterpF32asI32, getFReg(d)));
   14304          IRExpr* as_F64 = unop( unsyned ? Iop_I32UtoF64 : Iop_I32StoF64,
   14305                                 mkexpr(src32 ) );
   14306          IRTemp scale = newTemp(Ity_F64);
   14307          assign(scale, unop(Iop_I32UtoF64, mkU32( 1 << (frac_bits-1) )));
   14308          IRExpr* rm     = mkU32(Irrm_NEAREST);
   14309          IRExpr* resF64 = triop(Iop_DivF64,
   14310                                 rm, as_F64,
   14311                                 triop(Iop_AddF64, rm, mkexpr(scale),
   14312                                                       mkexpr(scale)));
   14313          IRExpr* resF32 = binop(Iop_F64toF32, mkexpr(rmode), resF64);
   14314          putFReg(d, resF32, condT);
   14315          DIP("vcvt.f32.%c32, s%u, s%u, #%d\n",
   14316              unsyned ? 'u' : 's', d, d, frac_bits);
   14317          goto decode_success_vfp;
   14318       }
   14319       if (frac_bits >= 1 && frac_bits <= 32 && !to_fixed && dp_op
   14320                                             && size == 32) {
   14321          /* VCVT.F64.{S,U}32 D[d], D[d], #frac_bits */
   14322          /* This generates really horrible code.  We could potentially
   14323             do much better. */
   14324          IRTemp src32 = newTemp(Ity_I32);
   14325          assign(src32, unop(Iop_64to32, getDRegI64(d)));
   14326          IRExpr* as_F64 = unop( unsyned ? Iop_I32UtoF64 : Iop_I32StoF64,
   14327                                 mkexpr(src32 ) );
   14328          IRTemp scale = newTemp(Ity_F64);
   14329          assign(scale, unop(Iop_I32UtoF64, mkU32( 1 << (frac_bits-1) )));
   14330          IRExpr* rm     = mkU32(Irrm_NEAREST);
   14331          IRExpr* resF64 = triop(Iop_DivF64,
   14332                                 rm, as_F64,
   14333                                 triop(Iop_AddF64, rm, mkexpr(scale),
   14334                                                       mkexpr(scale)));
   14335          putDReg(d, resF64, condT);
   14336          DIP("vcvt.f64.%c32, d%u, d%u, #%d\n",
   14337              unsyned ? 'u' : 's', d, d, frac_bits);
   14338          goto decode_success_vfp;
   14339       }
   14340       if (frac_bits >= 1 && frac_bits <= 32 && to_fixed && dp_op
   14341                                             && size == 32) {
   14342          /* VCVT.{S,U}32.F64 D[d], D[d], #frac_bits */
   14343          IRTemp srcF64 = newTemp(Ity_F64);
   14344          assign(srcF64, getDReg(d));
   14345          IRTemp scale = newTemp(Ity_F64);
   14346          assign(scale, unop(Iop_I32UtoF64, mkU32( 1 << (frac_bits-1) )));
   14347          IRTemp scaledF64 = newTemp(Ity_F64);
   14348          IRExpr* rm = mkU32(Irrm_NEAREST);
   14349          assign(scaledF64, triop(Iop_MulF64,
   14350                                  rm, mkexpr(srcF64),
   14351                                  triop(Iop_AddF64, rm, mkexpr(scale),
   14352                                                        mkexpr(scale))));
   14353          IRTemp rmode = newTemp(Ity_I32);
   14354          assign(rmode, mkU32(Irrm_ZERO)); // as per the spec
   14355          IRTemp asI32 = newTemp(Ity_I32);
   14356          assign(asI32, binop(unsyned ? Iop_F64toI32U : Iop_F64toI32S,
   14357                              mkexpr(rmode), mkexpr(scaledF64)));
   14358          putDRegI64(d, unop(unsyned ? Iop_32Uto64 : Iop_32Sto64,
   14359                             mkexpr(asI32)), condT);
   14360          goto decode_success_vfp;
   14361       }
   14362       /* fall through */
   14363    }
   14364 
   14365    /* FAILURE */
   14366    return False;
   14367 
   14368   decode_success_vfp:
   14369    /* Check that any accepted insn really is a CP10 or CP11 insn, iow,
   14370       assert that we aren't accepting, in this fn, insns that actually
   14371       should be handled somewhere else. */
   14372    vassert(INSN(11,9) == BITS3(1,0,1)); // 11:8 = 1010 or 1011
   14373    return True;
   14374 
   14375 #  undef INSN
   14376 }
   14377 
   14378 
   14379 /*------------------------------------------------------------*/
   14380 /*--- Instructions in NV (never) space                     ---*/
   14381 /*------------------------------------------------------------*/
   14382 
   14383 /* ARM only */
   14384 /* Translate a NV space instruction.  If successful, returns True and
   14385    *dres may or may not be updated.  If failure, returns False and
   14386    doesn't change *dres nor create any IR.
   14387 
   14388    Note that all NEON instructions (in ARM mode) are handled through
   14389    here, since they are all in NV space.
   14390 */
   14391 static Bool decode_NV_instruction ( /*MOD*/DisResult* dres,
   14392                                     VexArchInfo* archinfo,
   14393                                     UInt insn )
   14394 {
   14395 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   14396 #  define INSN_COND          SLICE_UInt(insn, 31, 28)
   14397 
   14398    HChar dis_buf[128];
   14399 
   14400    // Should only be called for NV instructions
   14401    vassert(BITS4(1,1,1,1) == INSN_COND);
   14402 
   14403    /* ------------------------ pld ------------------------ */
   14404    if (BITS8(0,1,0,1, 0, 1,0,1) == (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1))
   14405        && BITS4(1,1,1,1) == INSN(15,12)) {
   14406       UInt rN    = INSN(19,16);
   14407       UInt imm12 = INSN(11,0);
   14408       UInt bU    = INSN(23,23);
   14409       DIP("pld [r%u, #%c%u]\n", rN, bU ? '+' : '-', imm12);
   14410       return True;
   14411    }
   14412 
   14413    if (BITS8(0,1,1,1, 0, 1,0,1) == (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1))
   14414        && BITS4(1,1,1,1) == INSN(15,12)
   14415        && 0 == INSN(4,4)) {
   14416       UInt rN   = INSN(19,16);
   14417       UInt rM   = INSN(3,0);
   14418       UInt imm5 = INSN(11,7);
   14419       UInt sh2  = INSN(6,5);
   14420       UInt bU   = INSN(23,23);
   14421       if (rM != 15) {
   14422          IRExpr* eaE = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
   14423                                                        sh2, imm5, dis_buf);
   14424          IRTemp eaT = newTemp(Ity_I32);
   14425          /* Bind eaE to a temp merely for debugging-vex purposes, so we
   14426             can check it's a plausible decoding.  It will get removed
   14427             by iropt a little later on. */
   14428          vassert(eaE);
   14429          assign(eaT, eaE);
   14430          DIP("pld %s\n", dis_buf);
   14431          return True;
   14432       }
   14433       /* fall through */
   14434    }
   14435 
   14436    /* ------------------------ pli ------------------------ */
   14437    if (BITS8(0,1,0,0, 0, 1,0,1) == (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1))
   14438        && BITS4(1,1,1,1) == INSN(15,12)) {
   14439       UInt rN    = INSN(19,16);
   14440       UInt imm12 = INSN(11,0);
   14441       UInt bU    = INSN(23,23);
   14442       DIP("pli [r%u, #%c%u]\n", rN, bU ? '+' : '-', imm12);
   14443       return True;
   14444    }
   14445 
   14446    /* --------------------- Interworking branches --------------------- */
   14447 
   14448    // BLX (1), viz, unconditional branch and link to R15+simm24
   14449    // and set CPSR.T = 1, that is, switch to Thumb mode
   14450    if (INSN(31,25) == BITS7(1,1,1,1,1,0,1)) {
   14451       UInt bitH   = INSN(24,24);
   14452       Int  uimm24 = INSN(23,0);
   14453       Int  simm24 = (((uimm24 << 8) >> 8) << 2) + (bitH << 1);
   14454       /* Now this is a bit tricky.  Since we're decoding an ARM insn,
   14455          it is implies that CPSR.T == 0.  Hence the current insn's
   14456          address is guaranteed to be of the form X--(30)--X00.  So, no
   14457          need to mask any bits off it.  But need to set the lowest bit
   14458          to 1 to denote we're in Thumb mode after this, since
   14459          guest_R15T has CPSR.T as the lowest bit.  And we can't chase
   14460          into the call, so end the block at this point. */
   14461       UInt dst = guest_R15_curr_instr_notENC + 8 + (simm24 | 1);
   14462       putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
   14463                     IRTemp_INVALID/*because AL*/, Ijk_Boring );
   14464       llPutIReg(15, mkU32(dst));
   14465       dres->jk_StopHere = Ijk_Call;
   14466       dres->whatNext    = Dis_StopHere;
   14467       DIP("blx 0x%x (and switch to Thumb mode)\n", dst - 1);
   14468       return True;
   14469    }
   14470 
   14471    /* ------------------- v7 barrier insns ------------------- */
   14472    switch (insn) {
   14473       case 0xF57FF06F: /* ISB */
   14474          stmt( IRStmt_MBE(Imbe_Fence) );
   14475          DIP("ISB\n");
   14476          return True;
   14477       case 0xF57FF04F: /* DSB sy */
   14478       case 0xF57FF04E: /* DSB st */
   14479       case 0xF57FF04B: /* DSB ish */
   14480       case 0xF57FF04A: /* DSB ishst */
   14481       case 0xF57FF047: /* DSB nsh */
   14482       case 0xF57FF046: /* DSB nshst */
   14483       case 0xF57FF043: /* DSB osh */
   14484       case 0xF57FF042: /* DSB oshst */
   14485          stmt( IRStmt_MBE(Imbe_Fence) );
   14486          DIP("DSB\n");
   14487          return True;
   14488       case 0xF57FF05F: /* DMB sy */
   14489       case 0xF57FF05E: /* DMB st */
   14490       case 0xF57FF05B: /* DMB ish */
   14491       case 0xF57FF05A: /* DMB ishst */
   14492       case 0xF57FF057: /* DMB nsh */
   14493       case 0xF57FF056: /* DMB nshst */
   14494       case 0xF57FF053: /* DMB osh */
   14495       case 0xF57FF052: /* DMB oshst */
   14496          stmt( IRStmt_MBE(Imbe_Fence) );
   14497          DIP("DMB\n");
   14498          return True;
   14499       default:
   14500          break;
   14501    }
   14502 
   14503    /* ------------------- CLREX ------------------ */
   14504    if (insn == 0xF57FF01F) {
   14505       /* AFAICS, this simply cancels a (all?) reservations made by a
   14506          (any?) preceding LDREX(es).  Arrange to hand it through to
   14507          the back end. */
   14508       stmt( IRStmt_MBE(Imbe_CancelReservation) );
   14509       DIP("clrex\n");
   14510       return True;
   14511    }
   14512 
   14513    /* ------------------- NEON ------------------- */
   14514    if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
   14515       Bool ok_neon = decode_NEON_instruction(
   14516                         dres, insn, IRTemp_INVALID/*unconditional*/,
   14517                         False/*!isT*/
   14518                      );
   14519       if (ok_neon)
   14520          return True;
   14521    }
   14522 
   14523    // unrecognised
   14524    return False;
   14525 
   14526 #  undef INSN_COND
   14527 #  undef INSN
   14528 }
   14529 
   14530 
   14531 /*------------------------------------------------------------*/
   14532 /*--- Disassemble a single ARM instruction                 ---*/
   14533 /*------------------------------------------------------------*/
   14534 
   14535 /* Disassemble a single ARM instruction into IR.  The instruction is
   14536    located in host memory at guest_instr, and has (decoded) guest IP
   14537    of guest_R15_curr_instr_notENC, which will have been set before the
   14538    call here. */
   14539 
   14540 static
   14541 DisResult disInstr_ARM_WRK (
   14542              Bool         (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
   14543              Bool         resteerCisOk,
   14544              void*        callback_opaque,
   14545              UChar*       guest_instr,
   14546              VexArchInfo* archinfo,
   14547              VexAbiInfo*  abiinfo,
   14548              Bool         sigill_diag
   14549           )
   14550 {
   14551    // A macro to fish bits out of 'insn'.
   14552 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   14553 #  define INSN_COND          SLICE_UInt(insn, 31, 28)
   14554 
   14555    DisResult dres;
   14556    UInt      insn;
   14557    //Bool      allow_VFP = False;
   14558    //UInt      hwcaps = archinfo->hwcaps;
   14559    IRTemp    condT; /* :: Ity_I32 */
   14560    UInt      summary;
   14561    HChar     dis_buf[128];  // big enough to hold LDMIA etc text
   14562 
   14563    /* What insn variants are we supporting today? */
   14564    //allow_VFP  = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
   14565    // etc etc
   14566 
   14567    /* Set result defaults. */
   14568    dres.whatNext    = Dis_Continue;
   14569    dres.len         = 4;
   14570    dres.continueAt  = 0;
   14571    dres.jk_StopHere = Ijk_INVALID;
   14572 
   14573    /* Set default actions for post-insn handling of writes to r15, if
   14574       required. */
   14575    r15written = False;
   14576    r15guard   = IRTemp_INVALID; /* unconditional */
   14577    r15kind    = Ijk_Boring;
   14578 
   14579    /* At least this is simple on ARM: insns are all 4 bytes long, and
   14580       4-aligned.  So just fish the whole thing out of memory right now
   14581       and have done. */
   14582    insn = getUIntLittleEndianly( guest_instr );
   14583 
   14584    if (0) vex_printf("insn: 0x%x\n", insn);
   14585 
   14586    DIP("\t(arm) 0x%x:  ", (UInt)guest_R15_curr_instr_notENC);
   14587 
   14588    vassert(0 == (guest_R15_curr_instr_notENC & 3));
   14589 
   14590    /* ----------------------------------------------------------- */
   14591 
   14592    /* Spot "Special" instructions (see comment at top of file). */
   14593    {
   14594       UChar* code = (UChar*)guest_instr;
   14595       /* Spot the 16-byte preamble:
   14596 
   14597          e1a0c1ec  mov r12, r12, ROR #3
   14598          e1a0c6ec  mov r12, r12, ROR #13
   14599          e1a0ceec  mov r12, r12, ROR #29
   14600          e1a0c9ec  mov r12, r12, ROR #19
   14601       */
   14602       UInt word1 = 0xE1A0C1EC;
   14603       UInt word2 = 0xE1A0C6EC;
   14604       UInt word3 = 0xE1A0CEEC;
   14605       UInt word4 = 0xE1A0C9EC;
   14606       if (getUIntLittleEndianly(code+ 0) == word1 &&
   14607           getUIntLittleEndianly(code+ 4) == word2 &&
   14608           getUIntLittleEndianly(code+ 8) == word3 &&
   14609           getUIntLittleEndianly(code+12) == word4) {
   14610          /* Got a "Special" instruction preamble.  Which one is it? */
   14611          if (getUIntLittleEndianly(code+16) == 0xE18AA00A
   14612                                                /* orr r10,r10,r10 */) {
   14613             /* R3 = client_request ( R4 ) */
   14614             DIP("r3 = client_request ( %%r4 )\n");
   14615             llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 ));
   14616             dres.jk_StopHere = Ijk_ClientReq;
   14617             dres.whatNext    = Dis_StopHere;
   14618             goto decode_success;
   14619          }
   14620          else
   14621          if (getUIntLittleEndianly(code+16) == 0xE18BB00B
   14622                                                /* orr r11,r11,r11 */) {
   14623             /* R3 = guest_NRADDR */
   14624             DIP("r3 = guest_NRADDR\n");
   14625             dres.len = 20;
   14626             llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
   14627             goto decode_success;
   14628          }
   14629          else
   14630          if (getUIntLittleEndianly(code+16) == 0xE18CC00C
   14631                                                /* orr r12,r12,r12 */) {
   14632             /*  branch-and-link-to-noredir R4 */
   14633             DIP("branch-and-link-to-noredir r4\n");
   14634             llPutIReg(14, mkU32( guest_R15_curr_instr_notENC + 20) );
   14635             llPutIReg(15, llGetIReg(4));
   14636             dres.jk_StopHere = Ijk_NoRedir;
   14637             dres.whatNext    = Dis_StopHere;
   14638             goto decode_success;
   14639          }
   14640          else
   14641          if (getUIntLittleEndianly(code+16) == 0xE1899009
   14642                                                /* orr r9,r9,r9 */) {
   14643             /* IR injection */
   14644             DIP("IR injection\n");
   14645             vex_inject_ir(irsb, Iend_LE);
   14646             // Invalidate the current insn. The reason is that the IRop we're
   14647             // injecting here can change. In which case the translation has to
   14648             // be redone. For ease of handling, we simply invalidate all the
   14649             // time.
   14650             stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_R15_curr_instr_notENC)));
   14651             stmt(IRStmt_Put(OFFB_CMLEN,   mkU32(20)));
   14652             llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 ));
   14653             dres.whatNext    = Dis_StopHere;
   14654             dres.jk_StopHere = Ijk_InvalICache;
   14655             goto decode_success;
   14656          }
   14657          /* We don't know what it is.  Set opc1/opc2 so decode_failure
   14658             can print the insn following the Special-insn preamble. */
   14659          insn = getUIntLittleEndianly(code+16);
   14660          goto decode_failure;
   14661          /*NOTREACHED*/
   14662       }
   14663 
   14664    }
   14665 
   14666    /* ----------------------------------------------------------- */
   14667 
   14668    /* Main ARM instruction decoder starts here. */
   14669 
   14670    /* Deal with the condition.  Strategy is to merely generate a
   14671       condition temporary at this point (or IRTemp_INVALID, meaning
   14672       unconditional).  We leave it to lower-level instruction decoders
   14673       to decide whether they can generate straight-line code, or
   14674       whether they must generate a side exit before the instruction.
   14675       condT :: Ity_I32 and is always either zero or one. */
   14676    condT = IRTemp_INVALID;
   14677    switch ( (ARMCondcode)INSN_COND ) {
   14678       case ARMCondNV: {
   14679          // Illegal instruction prior to v5 (see ARM ARM A3-5), but
   14680          // some cases are acceptable
   14681          Bool ok = decode_NV_instruction(&dres, archinfo, insn);
   14682          if (ok)
   14683             goto decode_success;
   14684          else
   14685             goto decode_failure;
   14686       }
   14687       case ARMCondAL: // Always executed
   14688          break;
   14689       case ARMCondEQ: case ARMCondNE: case ARMCondHS: case ARMCondLO:
   14690       case ARMCondMI: case ARMCondPL: case ARMCondVS: case ARMCondVC:
   14691       case ARMCondHI: case ARMCondLS: case ARMCondGE: case ARMCondLT:
   14692       case ARMCondGT: case ARMCondLE:
   14693          condT = newTemp(Ity_I32);
   14694          assign( condT, mk_armg_calculate_condition( INSN_COND ));
   14695          break;
   14696    }
   14697 
   14698    /* ----------------------------------------------------------- */
   14699    /* -- ARMv5 integer instructions                            -- */
   14700    /* ----------------------------------------------------------- */
   14701 
   14702    /* ---------------- Data processing ops ------------------- */
   14703 
   14704    if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0))
   14705        && !(INSN(25,25) == 0 && INSN(7,7) == 1 && INSN(4,4) == 1)) {
   14706       IRTemp  shop = IRTemp_INVALID; /* shifter operand */
   14707       IRTemp  shco = IRTemp_INVALID; /* shifter carry out */
   14708       UInt    rD   = (insn >> 12) & 0xF; /* 15:12 */
   14709       UInt    rN   = (insn >> 16) & 0xF; /* 19:16 */
   14710       UInt    bitS = (insn >> 20) & 1; /* 20:20 */
   14711       IRTemp  rNt  = IRTemp_INVALID;
   14712       IRTemp  res  = IRTemp_INVALID;
   14713       IRTemp  oldV = IRTemp_INVALID;
   14714       IRTemp  oldC = IRTemp_INVALID;
   14715       const HChar*  name = NULL;
   14716       IROp    op   = Iop_INVALID;
   14717       Bool    ok;
   14718 
   14719       switch (INSN(24,21)) {
   14720 
   14721          /* --------- ADD, SUB, AND, OR --------- */
   14722          case BITS4(0,1,0,0): /* ADD:  Rd = Rn + shifter_operand */
   14723             name = "add"; op = Iop_Add32; goto rd_eq_rn_op_SO;
   14724          case BITS4(0,0,1,0): /* SUB:  Rd = Rn - shifter_operand */
   14725             name = "sub"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
   14726          case BITS4(0,0,1,1): /* RSB:  Rd = shifter_operand - Rn */
   14727             name = "rsb"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
   14728          case BITS4(0,0,0,0): /* AND:  Rd = Rn & shifter_operand */
   14729             name = "and"; op = Iop_And32; goto rd_eq_rn_op_SO;
   14730          case BITS4(1,1,0,0): /* OR:   Rd = Rn | shifter_operand */
   14731             name = "orr"; op = Iop_Or32; goto rd_eq_rn_op_SO;
   14732          case BITS4(0,0,0,1): /* EOR:  Rd = Rn ^ shifter_operand */
   14733             name = "eor"; op = Iop_Xor32; goto rd_eq_rn_op_SO;
   14734          case BITS4(1,1,1,0): /* BIC:  Rd = Rn & ~shifter_operand */
   14735             name = "bic"; op = Iop_And32; goto rd_eq_rn_op_SO;
   14736          rd_eq_rn_op_SO: {
   14737             Bool isRSB = False;
   14738             Bool isBIC = False;
   14739             switch (INSN(24,21)) {
   14740                case BITS4(0,0,1,1):
   14741                   vassert(op == Iop_Sub32); isRSB = True; break;
   14742                case BITS4(1,1,1,0):
   14743                   vassert(op == Iop_And32); isBIC = True; break;
   14744                default:
   14745                   break;
   14746             }
   14747             rNt = newTemp(Ity_I32);
   14748             assign(rNt, getIRegA(rN));
   14749             ok = mk_shifter_operand(
   14750                     INSN(25,25), INSN(11,0),
   14751                     &shop, bitS ? &shco : NULL, dis_buf
   14752                  );
   14753             if (!ok)
   14754                break;
   14755             res = newTemp(Ity_I32);
   14756             // compute the main result
   14757             if (isRSB) {
   14758                // reverse-subtract: shifter_operand - Rn
   14759                vassert(op == Iop_Sub32);
   14760                assign(res, binop(op, mkexpr(shop), mkexpr(rNt)) );
   14761             } else if (isBIC) {
   14762                // andn: shifter_operand & ~Rn
   14763                vassert(op == Iop_And32);
   14764                assign(res, binop(op, mkexpr(rNt),
   14765                                      unop(Iop_Not32, mkexpr(shop))) );
   14766             } else {
   14767                // normal: Rn op shifter_operand
   14768                assign(res, binop(op, mkexpr(rNt), mkexpr(shop)) );
   14769             }
   14770             // but don't commit it until after we've finished
   14771             // all necessary reads from the guest state
   14772             if (bitS
   14773                 && (op == Iop_And32 || op == Iop_Or32 || op == Iop_Xor32)) {
   14774                oldV = newTemp(Ity_I32);
   14775                assign( oldV, mk_armg_calculate_flag_v() );
   14776             }
   14777             // can't safely read guest state after here
   14778             // now safe to put the main result
   14779             putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
   14780             // XXXX!! not safe to read any guest state after
   14781             // this point (I think the code below doesn't do that).
   14782             if (!bitS)
   14783                vassert(shco == IRTemp_INVALID);
   14784             /* Update the flags thunk if necessary */
   14785             if (bitS) {
   14786                vassert(shco != IRTemp_INVALID);
   14787                switch (op) {
   14788                   case Iop_Add32:
   14789                      setFlags_D1_D2( ARMG_CC_OP_ADD, rNt, shop, condT );
   14790                      break;
   14791                   case Iop_Sub32:
   14792                      if (isRSB) {
   14793                         setFlags_D1_D2( ARMG_CC_OP_SUB, shop, rNt, condT );
   14794                      } else {
   14795                         setFlags_D1_D2( ARMG_CC_OP_SUB, rNt, shop, condT );
   14796                      }
   14797                      break;
   14798                   case Iop_And32: /* BIC and AND set the flags the same */
   14799                   case Iop_Or32:
   14800                   case Iop_Xor32:
   14801                      // oldV has been read just above
   14802                      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
   14803                                         res, shco, oldV, condT );
   14804                      break;
   14805                   default:
   14806                      vassert(0);
   14807                }
   14808             }
   14809             DIP("%s%s%s r%u, r%u, %s\n",
   14810                 name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
   14811             goto decode_success;
   14812          }
   14813 
   14814          /* --------- MOV, MVN --------- */
   14815          case BITS4(1,1,0,1):   /* MOV: Rd = shifter_operand */
   14816          case BITS4(1,1,1,1): { /* MVN: Rd = not(shifter_operand) */
   14817             Bool isMVN = INSN(24,21) == BITS4(1,1,1,1);
   14818             IRTemp jk = Ijk_Boring;
   14819             if (rN != 0)
   14820                break; /* rN must be zero */
   14821             ok = mk_shifter_operand(
   14822                     INSN(25,25), INSN(11,0),
   14823                     &shop, bitS ? &shco : NULL, dis_buf
   14824                  );
   14825             if (!ok)
   14826                break;
   14827             res = newTemp(Ity_I32);
   14828             assign( res, isMVN ? unop(Iop_Not32, mkexpr(shop))
   14829                                : mkexpr(shop) );
   14830             if (bitS) {
   14831                vassert(shco != IRTemp_INVALID);
   14832                oldV = newTemp(Ity_I32);
   14833                assign( oldV, mk_armg_calculate_flag_v() );
   14834             } else {
   14835                vassert(shco == IRTemp_INVALID);
   14836             }
   14837             /* According to the Cortex A8 TRM Sec. 5.2.1, MOV PC, r14 is a
   14838                 return for purposes of branch prediction. */
   14839             if (!isMVN && INSN(11,0) == 14) {
   14840               jk = Ijk_Ret;
   14841             }
   14842             // can't safely read guest state after here
   14843             putIRegA( rD, mkexpr(res), condT, jk );
   14844             /* Update the flags thunk if necessary */
   14845             if (bitS) {
   14846                setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
   14847                                   res, shco, oldV, condT );
   14848             }
   14849             DIP("%s%s%s r%u, %s\n",
   14850                 isMVN ? "mvn" : "mov",
   14851                 nCC(INSN_COND), bitS ? "s" : "", rD, dis_buf );
   14852             goto decode_success;
   14853          }
   14854 
   14855          /* --------- CMP --------- */
   14856          case BITS4(1,0,1,0):   /* CMP:  (void) Rn - shifter_operand */
   14857          case BITS4(1,0,1,1): { /* CMN:  (void) Rn + shifter_operand */
   14858             Bool isCMN = INSN(24,21) == BITS4(1,0,1,1);
   14859             if (rD != 0)
   14860                break; /* rD must be zero */
   14861             if (bitS == 0)
   14862                break; /* if S (bit 20) is not set, it's not CMP/CMN */
   14863             rNt = newTemp(Ity_I32);
   14864             assign(rNt, getIRegA(rN));
   14865             ok = mk_shifter_operand(
   14866                     INSN(25,25), INSN(11,0),
   14867                     &shop, NULL, dis_buf
   14868                  );
   14869             if (!ok)
   14870                break;
   14871             // can't safely read guest state after here
   14872             /* Update the flags thunk. */
   14873             setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
   14874                             rNt, shop, condT );
   14875             DIP("%s%s r%u, %s\n",
   14876                 isCMN ? "cmn" : "cmp",
   14877                 nCC(INSN_COND), rN, dis_buf );
   14878             goto decode_success;
   14879          }
   14880 
   14881          /* --------- TST --------- */
   14882          case BITS4(1,0,0,0):   /* TST:  (void) Rn & shifter_operand */
   14883          case BITS4(1,0,0,1): { /* TEQ:  (void) Rn ^ shifter_operand */
   14884             Bool isTEQ = INSN(24,21) == BITS4(1,0,0,1);
   14885             if (rD != 0)
   14886                break; /* rD must be zero */
   14887             if (bitS == 0)
   14888                break; /* if S (bit 20) is not set, it's not TST/TEQ */
   14889             rNt = newTemp(Ity_I32);
   14890             assign(rNt, getIRegA(rN));
   14891             ok = mk_shifter_operand(
   14892                     INSN(25,25), INSN(11,0),
   14893                     &shop, &shco, dis_buf
   14894                  );
   14895             if (!ok)
   14896                break;
   14897             /* Update the flags thunk. */
   14898             res = newTemp(Ity_I32);
   14899             assign( res, binop(isTEQ ? Iop_Xor32 : Iop_And32,
   14900                                mkexpr(rNt), mkexpr(shop)) );
   14901             oldV = newTemp(Ity_I32);
   14902             assign( oldV, mk_armg_calculate_flag_v() );
   14903             // can't safely read guest state after here
   14904             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
   14905                                res, shco, oldV, condT );
   14906             DIP("%s%s r%u, %s\n",
   14907                 isTEQ ? "teq" : "tst",
   14908                 nCC(INSN_COND), rN, dis_buf );
   14909             goto decode_success;
   14910          }
   14911 
   14912          /* --------- ADC, SBC, RSC --------- */
   14913          case BITS4(0,1,0,1): /* ADC:  Rd = Rn + shifter_operand + oldC */
   14914             name = "adc"; goto rd_eq_rn_op_SO_op_oldC;
   14915          case BITS4(0,1,1,0): /* SBC:  Rd = Rn - shifter_operand - (oldC ^ 1) */
   14916             name = "sbc"; goto rd_eq_rn_op_SO_op_oldC;
   14917          case BITS4(0,1,1,1): /* RSC:  Rd = shifter_operand - Rn - (oldC ^ 1) */
   14918             name = "rsc"; goto rd_eq_rn_op_SO_op_oldC;
   14919          rd_eq_rn_op_SO_op_oldC: {
   14920             // FIXME: shco isn't used for anything.  Get rid of it.
   14921             rNt = newTemp(Ity_I32);
   14922             assign(rNt, getIRegA(rN));
   14923             ok = mk_shifter_operand(
   14924                     INSN(25,25), INSN(11,0),
   14925                     &shop, bitS ? &shco : NULL, dis_buf
   14926                  );
   14927             if (!ok)
   14928                break;
   14929             oldC = newTemp(Ity_I32);
   14930             assign( oldC, mk_armg_calculate_flag_c() );
   14931             res = newTemp(Ity_I32);
   14932             // compute the main result
   14933             switch (INSN(24,21)) {
   14934                case BITS4(0,1,0,1): /* ADC */
   14935                   assign(res,
   14936                          binop(Iop_Add32,
   14937                                binop(Iop_Add32, mkexpr(rNt), mkexpr(shop)),
   14938                                mkexpr(oldC) ));
   14939                   break;
   14940                case BITS4(0,1,1,0): /* SBC */
   14941                   assign(res,
   14942                          binop(Iop_Sub32,
   14943                                binop(Iop_Sub32, mkexpr(rNt), mkexpr(shop)),
   14944                                binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
   14945                   break;
   14946                case BITS4(0,1,1,1): /* RSC */
   14947                   assign(res,
   14948                          binop(Iop_Sub32,
   14949                                binop(Iop_Sub32, mkexpr(shop), mkexpr(rNt)),
   14950                                binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
   14951                   break;
   14952                default:
   14953                   vassert(0);
   14954             }
   14955             // but don't commit it until after we've finished
   14956             // all necessary reads from the guest state
   14957             // now safe to put the main result
   14958             putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
   14959             // XXXX!! not safe to read any guest state after
   14960             // this point (I think the code below doesn't do that).
   14961             if (!bitS)
   14962                vassert(shco == IRTemp_INVALID);
   14963             /* Update the flags thunk if necessary */
   14964             if (bitS) {
   14965                vassert(shco != IRTemp_INVALID);
   14966                switch (INSN(24,21)) {
   14967                   case BITS4(0,1,0,1): /* ADC */
   14968                      setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
   14969                                         rNt, shop, oldC, condT );
   14970                      break;
   14971                   case BITS4(0,1,1,0): /* SBC */
   14972                      setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
   14973                                         rNt, shop, oldC, condT );
   14974                      break;
   14975                   case BITS4(0,1,1,1): /* RSC */
   14976                      setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
   14977                                         shop, rNt, oldC, condT );
   14978                      break;
   14979                   default:
   14980                      vassert(0);
   14981                }
   14982             }
   14983             DIP("%s%s%s r%u, r%u, %s\n",
   14984                 name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
   14985             goto decode_success;
   14986          }
   14987 
   14988          default:
   14989             vassert(0);
   14990       }
   14991    } /* if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0)) */
   14992 
   14993    /* --------------------- Load/store (ubyte & word) -------- */
   14994    // LDR STR LDRB STRB
   14995    /*                 31   27   23   19 15 11    6   4 3  # highest bit
   14996                         28   24   20 16 12
   14997       A5-20   1 | 16  cond 0101 UB0L Rn Rd imm12
   14998       A5-22   1 | 32  cond 0111 UBOL Rn Rd imm5  sh2 0 Rm
   14999       A5-24   2 | 16  cond 0101 UB1L Rn Rd imm12
   15000       A5-26   2 | 32  cond 0111 UB1L Rn Rd imm5  sh2 0 Rm
   15001       A5-28   3 | 16  cond 0100 UB0L Rn Rd imm12
   15002       A5-32   3 | 32  cond 0110 UB0L Rn Rd imm5  sh2 0 Rm
   15003    */
   15004    /* case coding:
   15005              1   at-ea               (access at ea)
   15006              2   at-ea-then-upd      (access at ea, then Rn = ea)
   15007              3   at-Rn-then-upd      (access at Rn, then Rn = ea)
   15008       ea coding
   15009              16  Rn +/- imm12
   15010              32  Rn +/- Rm sh2 imm5
   15011    */
   15012    /* Quickly skip over all of this for hopefully most instructions */
   15013    if ((INSN(27,24) & BITS4(1,1,0,0)) != BITS4(0,1,0,0))
   15014       goto after_load_store_ubyte_or_word;
   15015 
   15016    summary = 0;
   15017 
   15018    /**/ if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 0) {
   15019       summary = 1 | 16;
   15020    }
   15021    else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 0
   15022                                           && INSN(4,4) == 0) {
   15023       summary = 1 | 32;
   15024    }
   15025    else if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 1) {
   15026       summary = 2 | 16;
   15027    }
   15028    else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 1
   15029                                           && INSN(4,4) == 0) {
   15030       summary = 2 | 32;
   15031    }
   15032    else if (INSN(27,24) == BITS4(0,1,0,0) && INSN(21,21) == 0) {
   15033       summary = 3 | 16;
   15034    }
   15035    else if (INSN(27,24) == BITS4(0,1,1,0) && INSN(21,21) == 0
   15036                                           && INSN(4,4) == 0) {
   15037       summary = 3 | 32;
   15038    }
   15039    else goto after_load_store_ubyte_or_word;
   15040 
   15041    { UInt rN = (insn >> 16) & 0xF; /* 19:16 */
   15042      UInt rD = (insn >> 12) & 0xF; /* 15:12 */
   15043      UInt rM = (insn >> 0)  & 0xF; /*  3:0  */
   15044      UInt bU = (insn >> 23) & 1;      /* 23 */
   15045      UInt bB = (insn >> 22) & 1;      /* 22 */
   15046      UInt bL = (insn >> 20) & 1;      /* 20 */
   15047      UInt imm12 = (insn >> 0) & 0xFFF; /* 11:0 */
   15048      UInt imm5  = (insn >> 7) & 0x1F;  /* 11:7 */
   15049      UInt sh2   = (insn >> 5) & 3;     /* 6:5 */
   15050 
   15051      /* Skip some invalid cases, which would lead to two competing
   15052         updates to the same register, or which are otherwise
   15053         disallowed by the spec. */
   15054      switch (summary) {
   15055         case 1 | 16:
   15056            break;
   15057         case 1 | 32:
   15058            if (rM == 15) goto after_load_store_ubyte_or_word;
   15059            break;
   15060         case 2 | 16: case 3 | 16:
   15061            if (rN == 15) goto after_load_store_ubyte_or_word;
   15062            if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
   15063            break;
   15064         case 2 | 32: case 3 | 32:
   15065            if (rM == 15) goto after_load_store_ubyte_or_word;
   15066            if (rN == 15) goto after_load_store_ubyte_or_word;
   15067            if (rN == rM) goto after_load_store_ubyte_or_word;
   15068            if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
   15069            break;
   15070         default:
   15071            vassert(0);
   15072      }
   15073 
   15074      /* compute the effective address.  Bind it to a tmp since we
   15075         may need to use it twice. */
   15076      IRExpr* eaE = NULL;
   15077      switch (summary & 0xF0) {
   15078         case 16:
   15079            eaE = mk_EA_reg_plusminus_imm12( rN, bU, imm12, dis_buf );
   15080            break;
   15081         case 32:
   15082            eaE = mk_EA_reg_plusminus_shifted_reg( rN, bU, rM, sh2, imm5,
   15083                                                   dis_buf );
   15084            break;
   15085      }
   15086      vassert(eaE);
   15087      IRTemp eaT = newTemp(Ity_I32);
   15088      assign(eaT, eaE);
   15089 
   15090      /* get the old Rn value */
   15091      IRTemp rnT = newTemp(Ity_I32);
   15092      assign(rnT, getIRegA(rN));
   15093 
   15094      /* decide on the transfer address */
   15095      IRTemp taT = IRTemp_INVALID;
   15096      switch (summary & 0x0F) {
   15097         case 1: case 2: taT = eaT; break;
   15098         case 3:         taT = rnT; break;
   15099      }
   15100      vassert(taT != IRTemp_INVALID);
   15101 
   15102      if (bL == 0) {
   15103        /* Store.  If necessary, update the base register before the
   15104           store itself, so that the common idiom of "str rX, [sp,
   15105           #-4]!" (store rX at sp-4, then do new sp = sp-4, a.k.a "push
   15106           rX") doesn't cause Memcheck to complain that the access is
   15107           below the stack pointer.  Also, not updating sp before the
   15108           store confuses Valgrind's dynamic stack-extending logic.  So
   15109           do it before the store.  Hence we need to snarf the store
   15110           data before doing the basereg update. */
   15111 
   15112         /* get hold of the data to be stored */
   15113         IRTemp rDt = newTemp(Ity_I32);
   15114         assign(rDt, getIRegA(rD));
   15115 
   15116         /* Update Rn if necessary. */
   15117         switch (summary & 0x0F) {
   15118            case 2: case 3:
   15119               putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
   15120               break;
   15121         }
   15122 
   15123         /* generate the transfer */
   15124         if (bB == 0) { // word store
   15125            storeGuardedLE( mkexpr(taT), mkexpr(rDt), condT );
   15126         } else { // byte store
   15127            vassert(bB == 1);
   15128            storeGuardedLE( mkexpr(taT), unop(Iop_32to8, mkexpr(rDt)), condT );
   15129         }
   15130 
   15131      } else {
   15132         /* Load */
   15133         vassert(bL == 1);
   15134 
   15135         /* generate the transfer */
   15136         if (bB == 0) { // word load
   15137            IRTemp jk = Ijk_Boring;
   15138            /* According to the Cortex A8 TRM Sec. 5.2.1, LDR(1) with r13 as the
   15139                base register and PC as the destination register is a return for
   15140                purposes of branch prediction.
   15141               The ARM ARM Sec. C9.10.1 further specifies that it must use a
   15142                post-increment by immediate addressing mode to be counted in
   15143                event 0x0E (Procedure return).*/
   15144            if (rN == 13 && summary == (3 | 16) && bB == 0) {
   15145               jk = Ijk_Ret;
   15146            }
   15147            IRTemp tD = newTemp(Ity_I32);
   15148            loadGuardedLE( tD, ILGop_Ident32,
   15149                           mkexpr(taT), llGetIReg(rD), condT );
   15150            /* "rD == 15 ? condT : IRTemp_INVALID": simply
   15151               IRTemp_INVALID would be correct in all cases here, and
   15152               for the non-r15 case it generates better code, by
   15153               avoiding two tests of the cond (since it is already
   15154               tested by loadGuardedLE).  However, the logic at the end
   15155               of this function, that deals with writes to r15, has an
   15156               optimisation which depends on seeing whether or not the
   15157               write is conditional.  Hence in this particular case we
   15158               let it "see" the guard condition. */
   15159            putIRegA( rD, mkexpr(tD),
   15160                      rD == 15 ? condT : IRTemp_INVALID, jk );
   15161         } else { // byte load
   15162            vassert(bB == 1);
   15163            IRTemp tD = newTemp(Ity_I32);
   15164            loadGuardedLE( tD, ILGop_8Uto32, mkexpr(taT), llGetIReg(rD), condT );
   15165            /* No point in similar 3rd arg complexity here, since we
   15166               can't sanely write anything to r15 like this. */
   15167            putIRegA( rD, mkexpr(tD), IRTemp_INVALID, Ijk_Boring );
   15168         }
   15169 
   15170         /* Update Rn if necessary. */
   15171         switch (summary & 0x0F) {
   15172            case 2: case 3:
   15173               // should be assured by logic above:
   15174               if (bL == 1)
   15175                  vassert(rD != rN); /* since we just wrote rD */
   15176               putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
   15177               break;
   15178         }
   15179      }
   15180 
   15181      switch (summary & 0x0F) {
   15182         case 1:  DIP("%sr%s%s r%u, %s\n",
   15183                      bL == 0 ? "st" : "ld",
   15184                      bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
   15185                  break;
   15186         case 2:  DIP("%sr%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
   15187                      bL == 0 ? "st" : "ld",
   15188                      bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
   15189                  break;
   15190         case 3:  DIP("%sr%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
   15191                      bL == 0 ? "st" : "ld",
   15192                      bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
   15193                  break;
   15194         default: vassert(0);
   15195      }
   15196 
   15197      /* XXX deal with alignment constraints */
   15198 
   15199      goto decode_success;
   15200 
   15201      /* Complications:
   15202 
   15203         For all loads: if the Amode specifies base register
   15204         writeback, and the same register is specified for Rd and Rn,
   15205         the results are UNPREDICTABLE.
   15206 
   15207         For all loads and stores: if R15 is written, branch to
   15208         that address afterwards.
   15209 
   15210         STRB: straightforward
   15211         LDRB: loaded data is zero extended
   15212         STR:  lowest 2 bits of address are ignored
   15213         LDR:  if the lowest 2 bits of the address are nonzero
   15214               then the loaded value is rotated right by 8 * the lowest 2 bits
   15215      */
   15216    }
   15217 
   15218   after_load_store_ubyte_or_word:
   15219 
   15220    /* --------------------- Load/store (sbyte & hword) -------- */
   15221    // LDRH LDRSH STRH LDRSB
   15222    /*                 31   27   23   19 15 11   7    3     # highest bit
   15223                         28   24   20 16 12    8    4    0
   15224       A5-36   1 | 16  cond 0001 U10L Rn Rd im4h 1SH1 im4l
   15225       A5-38   1 | 32  cond 0001 U00L Rn Rd 0000 1SH1 Rm
   15226       A5-40   2 | 16  cond 0001 U11L Rn Rd im4h 1SH1 im4l
   15227       A5-42   2 | 32  cond 0001 U01L Rn Rd 0000 1SH1 Rm
   15228       A5-44   3 | 16  cond 0000 U10L Rn Rd im4h 1SH1 im4l
   15229       A5-46   3 | 32  cond 0000 U00L Rn Rd 0000 1SH1 Rm
   15230    */
   15231    /* case coding:
   15232              1   at-ea               (access at ea)
   15233              2   at-ea-then-upd      (access at ea, then Rn = ea)
   15234              3   at-Rn-then-upd      (access at Rn, then Rn = ea)
   15235       ea coding
   15236              16  Rn +/- imm8
   15237              32  Rn +/- Rm
   15238    */
   15239    /* Quickly skip over all of this for hopefully most instructions */
   15240    if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
   15241       goto after_load_store_sbyte_or_hword;
   15242 
   15243    /* Check the "1SH1" thing. */
   15244    if ((INSN(7,4) & BITS4(1,0,0,1)) != BITS4(1,0,0,1))
   15245       goto after_load_store_sbyte_or_hword;
   15246 
   15247    summary = 0;
   15248 
   15249    /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,0)) {
   15250       summary = 1 | 16;
   15251    }
   15252    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,0)) {
   15253       summary = 1 | 32;
   15254    }
   15255    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,1)) {
   15256       summary = 2 | 16;
   15257    }
   15258    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,1)) {
   15259       summary = 2 | 32;
   15260    }
   15261    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(1,0)) {
   15262       summary = 3 | 16;
   15263    }
   15264    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(0,0)) {
   15265       summary = 3 | 32;
   15266    }
   15267    else goto after_load_store_sbyte_or_hword;
   15268 
   15269    { UInt rN   = (insn >> 16) & 0xF; /* 19:16 */
   15270      UInt rD   = (insn >> 12) & 0xF; /* 15:12 */
   15271      UInt rM   = (insn >> 0)  & 0xF; /*  3:0  */
   15272      UInt bU   = (insn >> 23) & 1;   /* 23 U=1 offset+, U=0 offset- */
   15273      UInt bL   = (insn >> 20) & 1;   /* 20 L=1 load, L=0 store */
   15274      UInt bH   = (insn >> 5) & 1;    /* H=1 halfword, H=0 byte */
   15275      UInt bS   = (insn >> 6) & 1;    /* S=1 signed, S=0 unsigned */
   15276      UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
   15277 
   15278      /* Skip combinations that are either meaningless or already
   15279         handled by main word-or-unsigned-byte load-store
   15280         instructions. */
   15281      if (bS == 0 && bH == 0) /* "unsigned byte" */
   15282         goto after_load_store_sbyte_or_hword;
   15283      if (bS == 1 && bL == 0) /* "signed store" */
   15284         goto after_load_store_sbyte_or_hword;
   15285 
   15286      /* Require 11:8 == 0 for Rn +/- Rm cases */
   15287      if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
   15288         goto after_load_store_sbyte_or_hword;
   15289 
   15290      /* Skip some invalid cases, which would lead to two competing
   15291         updates to the same register, or which are otherwise
   15292         disallowed by the spec. */
   15293      switch (summary) {
   15294         case 1 | 16:
   15295            break;
   15296         case 1 | 32:
   15297            if (rM == 15) goto after_load_store_sbyte_or_hword;
   15298            break;
   15299         case 2 | 16: case 3 | 16:
   15300            if (rN == 15) goto after_load_store_sbyte_or_hword;
   15301            if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
   15302            break;
   15303         case 2 | 32: case 3 | 32:
   15304            if (rM == 15) goto after_load_store_sbyte_or_hword;
   15305            if (rN == 15) goto after_load_store_sbyte_or_hword;
   15306            if (rN == rM) goto after_load_store_sbyte_or_hword;
   15307            if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
   15308            break;
   15309         default:
   15310            vassert(0);
   15311      }
   15312 
   15313      /* If this is a branch, make it unconditional at this point.
   15314         Doing conditional branches in-line is too complex (for now).
   15315         Note that you'd have to be insane to use any of these loads to
   15316         do a branch, since they only load 16 bits at most, but we
   15317         handle it just in case. */
   15318      if (bL == 1 && rD == 15 && condT != IRTemp_INVALID) {
   15319         // go uncond
   15320         mk_skip_over_A32_if_cond_is_false( condT );
   15321         condT = IRTemp_INVALID;
   15322         // now uncond
   15323      }
   15324 
   15325      /* compute the effective address.  Bind it to a tmp since we
   15326         may need to use it twice. */
   15327      IRExpr* eaE = NULL;
   15328      switch (summary & 0xF0) {
   15329         case 16:
   15330            eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
   15331            break;
   15332         case 32:
   15333            eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
   15334            break;
   15335      }
   15336      vassert(eaE);
   15337      IRTemp eaT = newTemp(Ity_I32);
   15338      assign(eaT, eaE);
   15339 
   15340      /* get the old Rn value */
   15341      IRTemp rnT = newTemp(Ity_I32);
   15342      assign(rnT, getIRegA(rN));
   15343 
   15344      /* decide on the transfer address */
   15345      IRTemp taT = IRTemp_INVALID;
   15346      switch (summary & 0x0F) {
   15347         case 1: case 2: taT = eaT; break;
   15348         case 3:         taT = rnT; break;
   15349      }
   15350      vassert(taT != IRTemp_INVALID);
   15351 
   15352      /* ll previous value of rD, for dealing with conditional loads */
   15353      IRTemp llOldRd = newTemp(Ity_I32);
   15354      assign(llOldRd, llGetIReg(rD));
   15355 
   15356      /* halfword store  H 1  L 0  S 0
   15357         uhalf load      H 1  L 1  S 0
   15358         shalf load      H 1  L 1  S 1
   15359         sbyte load      H 0  L 1  S 1
   15360      */
   15361      const HChar* name = NULL;
   15362      /* generate the transfer */
   15363      /**/ if (bH == 1 && bL == 0 && bS == 0) { // halfword store
   15364         storeGuardedLE( mkexpr(taT),
   15365                         unop(Iop_32to16, getIRegA(rD)), condT );
   15366         name = "strh";
   15367      }
   15368      else if (bH == 1 && bL == 1 && bS == 0) { // uhalf load
   15369         IRTemp newRd = newTemp(Ity_I32);
   15370         loadGuardedLE( newRd, ILGop_16Uto32,
   15371                        mkexpr(taT), mkexpr(llOldRd), condT );
   15372         putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
   15373         name = "ldrh";
   15374      }
   15375      else if (bH == 1 && bL == 1 && bS == 1) { // shalf load
   15376         IRTemp newRd = newTemp(Ity_I32);
   15377         loadGuardedLE( newRd, ILGop_16Sto32,
   15378                        mkexpr(taT), mkexpr(llOldRd), condT );
   15379         putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
   15380         name = "ldrsh";
   15381      }
   15382      else if (bH == 0 && bL == 1 && bS == 1) { // sbyte load
   15383         IRTemp newRd = newTemp(Ity_I32);
   15384         loadGuardedLE( newRd, ILGop_8Sto32,
   15385                        mkexpr(taT), mkexpr(llOldRd), condT );
   15386         putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
   15387         name = "ldrsb";
   15388      }
   15389      else
   15390         vassert(0); // should be assured by logic above
   15391 
   15392      /* Update Rn if necessary. */
   15393      switch (summary & 0x0F) {
   15394         case 2: case 3:
   15395            // should be assured by logic above:
   15396            if (bL == 1)
   15397               vassert(rD != rN); /* since we just wrote rD */
   15398            putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
   15399            break;
   15400      }
   15401 
   15402      switch (summary & 0x0F) {
   15403         case 1:  DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
   15404                  break;
   15405         case 2:  DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
   15406                      name, nCC(INSN_COND), rD, dis_buf);
   15407                  break;
   15408         case 3:  DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
   15409                      name, nCC(INSN_COND), rD, dis_buf);
   15410                  break;
   15411         default: vassert(0);
   15412      }
   15413 
   15414      /* XXX deal with alignment constraints */
   15415 
   15416      goto decode_success;
   15417 
   15418      /* Complications:
   15419 
   15420         For all loads: if the Amode specifies base register
   15421         writeback, and the same register is specified for Rd and Rn,
   15422         the results are UNPREDICTABLE.
   15423 
   15424         For all loads and stores: if R15 is written, branch to
   15425         that address afterwards.
   15426 
   15427         Misaligned halfword stores => Unpredictable
   15428         Misaligned halfword loads  => Unpredictable
   15429      */
   15430    }
   15431 
   15432   after_load_store_sbyte_or_hword:
   15433 
   15434    /* --------------------- Load/store multiple -------------- */
   15435    // LD/STMIA LD/STMIB LD/STMDA LD/STMDB
   15436    // Remarkably complex and difficult to get right
   15437    // match 27:20 as 100XX0WL
   15438    if (BITS8(1,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,1,0,0))) {
   15439       // A5-50 LD/STMIA  cond 1000 10WL Rn RegList
   15440       // A5-51 LD/STMIB  cond 1001 10WL Rn RegList
   15441       // A5-53 LD/STMDA  cond 1000 00WL Rn RegList
   15442       // A5-53 LD/STMDB  cond 1001 00WL Rn RegList
   15443       //                   28   24   20 16       0
   15444 
   15445       UInt bINC    = (insn >> 23) & 1;
   15446       UInt bBEFORE = (insn >> 24) & 1;
   15447 
   15448       UInt bL      = (insn >> 20) & 1;  /* load=1, store=0 */
   15449       UInt bW      = (insn >> 21) & 1;  /* Rn wback=1, no wback=0 */
   15450       UInt rN      = (insn >> 16) & 0xF;
   15451       UInt regList = insn & 0xFFFF;
   15452       /* Skip some invalid cases, which would lead to two competing
   15453          updates to the same register, or which are otherwise
   15454          disallowed by the spec.  Note the test above has required
   15455          that S == 0, since that looks like a kernel-mode only thing.
   15456          Done by forcing the real pattern, viz 100XXSWL to actually be
   15457          100XX0WL. */
   15458       if (rN == 15) goto after_load_store_multiple;
   15459       // reglist can't be empty
   15460       if (regList == 0) goto after_load_store_multiple;
   15461       // if requested to writeback Rn, and this is a load instruction,
   15462       // then Rn can't appear in RegList, since we'd have two competing
   15463       // new values for Rn.  We do however accept this case for store
   15464       // instructions.
   15465       if (bW == 1 && bL == 1 && ((1 << rN) & regList) > 0)
   15466          goto after_load_store_multiple;
   15467 
   15468       /* Now, we can't do a conditional load or store, since that very
   15469          likely will generate an exception.  So we have to take a side
   15470          exit at this point if the condition is false. */
   15471       if (condT != IRTemp_INVALID) {
   15472          mk_skip_over_A32_if_cond_is_false( condT );
   15473          condT = IRTemp_INVALID;
   15474       }
   15475 
   15476       /* Ok, now we're unconditional.  Generate the IR. */
   15477       mk_ldm_stm( True/*arm*/, rN, bINC, bBEFORE, bW, bL, regList );
   15478 
   15479       DIP("%sm%c%c%s r%u%s, {0x%04x}\n",
   15480           bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
   15481           nCC(INSN_COND),
   15482           rN, bW ? "!" : "", regList);
   15483 
   15484       goto decode_success;
   15485    }
   15486 
   15487   after_load_store_multiple:
   15488 
   15489    /* --------------------- Control flow --------------------- */
   15490    // B, BL (Branch, or Branch-and-Link, to immediate offset)
   15491    //
   15492    if (BITS8(1,0,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))) {
   15493       UInt link   = (insn >> 24) & 1;
   15494       UInt uimm24 = insn & ((1<<24)-1);
   15495       Int  simm24 = (Int)uimm24;
   15496       UInt dst    = guest_R15_curr_instr_notENC + 8
   15497                     + (((simm24 << 8) >> 8) << 2);
   15498       IRJumpKind jk = link ? Ijk_Call : Ijk_Boring;
   15499       if (link) {
   15500          putIRegA(14, mkU32(guest_R15_curr_instr_notENC + 4),
   15501                       condT, Ijk_Boring);
   15502       }
   15503       if (condT == IRTemp_INVALID) {
   15504          /* unconditional transfer to 'dst'.  See if we can simply
   15505             continue tracing at the destination. */
   15506          if (resteerOkFn( callback_opaque, (Addr64)dst )) {
   15507             /* yes */
   15508             dres.whatNext   = Dis_ResteerU;
   15509             dres.continueAt = (Addr64)dst;
   15510          } else {
   15511             /* no; terminate the SB at this point. */
   15512             llPutIReg(15, mkU32(dst));
   15513             dres.jk_StopHere = jk;
   15514             dres.whatNext    = Dis_StopHere;
   15515          }
   15516          DIP("b%s 0x%x\n", link ? "l" : "", dst);
   15517       } else {
   15518          /* conditional transfer to 'dst' */
   15519          const HChar* comment = "";
   15520 
   15521          /* First see if we can do some speculative chasing into one
   15522             arm or the other.  Be conservative and only chase if
   15523             !link, that is, this is a normal conditional branch to a
   15524             known destination. */
   15525          if (!link
   15526              && resteerCisOk
   15527              && vex_control.guest_chase_cond
   15528              && dst < guest_R15_curr_instr_notENC
   15529              && resteerOkFn( callback_opaque, (Addr64)(Addr32)dst) ) {
   15530             /* Speculation: assume this backward branch is taken.  So
   15531                we need to emit a side-exit to the insn following this
   15532                one, on the negation of the condition, and continue at
   15533                the branch target address (dst). */
   15534             stmt( IRStmt_Exit( unop(Iop_Not1,
   15535                                     unop(Iop_32to1, mkexpr(condT))),
   15536                                Ijk_Boring,
   15537                                IRConst_U32(guest_R15_curr_instr_notENC+4),
   15538                                OFFB_R15T ));
   15539             dres.whatNext   = Dis_ResteerC;
   15540             dres.continueAt = (Addr64)(Addr32)dst;
   15541             comment = "(assumed taken)";
   15542          }
   15543          else
   15544          if (!link
   15545              && resteerCisOk
   15546              && vex_control.guest_chase_cond
   15547              && dst >= guest_R15_curr_instr_notENC
   15548              && resteerOkFn( callback_opaque,
   15549                              (Addr64)(Addr32)
   15550                                      (guest_R15_curr_instr_notENC+4)) ) {
   15551             /* Speculation: assume this forward branch is not taken.
   15552                So we need to emit a side-exit to dst (the dest) and
   15553                continue disassembling at the insn immediately
   15554                following this one. */
   15555             stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
   15556                                Ijk_Boring,
   15557                                IRConst_U32(dst),
   15558                                OFFB_R15T ));
   15559             dres.whatNext   = Dis_ResteerC;
   15560             dres.continueAt = (Addr64)(Addr32)
   15561                                       (guest_R15_curr_instr_notENC+4);
   15562             comment = "(assumed not taken)";
   15563          }
   15564          else {
   15565             /* Conservative default translation - end the block at
   15566                this point. */
   15567             stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
   15568                                jk, IRConst_U32(dst), OFFB_R15T ));
   15569             llPutIReg(15, mkU32(guest_R15_curr_instr_notENC + 4));
   15570             dres.jk_StopHere = Ijk_Boring;
   15571             dres.whatNext    = Dis_StopHere;
   15572          }
   15573          DIP("b%s%s 0x%x %s\n", link ? "l" : "", nCC(INSN_COND),
   15574              dst, comment);
   15575       }
   15576       goto decode_success;
   15577    }
   15578 
   15579    // B, BL (Branch, or Branch-and-Link, to a register)
   15580    // NB: interworking branch
   15581    if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
   15582        && INSN(19,12) == BITS8(1,1,1,1,1,1,1,1)
   15583        && (INSN(11,4) == BITS8(1,1,1,1,0,0,1,1)
   15584            || INSN(11,4) == BITS8(1,1,1,1,0,0,0,1))) {
   15585       IRTemp  dst = newTemp(Ity_I32);
   15586       UInt    link = (INSN(11,4) >> 1) & 1;
   15587       UInt    rM   = INSN(3,0);
   15588       // we don't decode the case (link && rM == 15), as that's
   15589       // Unpredictable.
   15590       if (!(link && rM == 15)) {
   15591          if (condT != IRTemp_INVALID) {
   15592             mk_skip_over_A32_if_cond_is_false( condT );
   15593          }
   15594          // rM contains an interworking address exactly as we require
   15595          // (with continuation CPSR.T in bit 0), so we can use it
   15596          // as-is, with no masking.
   15597          assign( dst, getIRegA(rM) );
   15598          if (link) {
   15599             putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
   15600                       IRTemp_INVALID/*because AL*/, Ijk_Boring );
   15601          }
   15602          llPutIReg(15, mkexpr(dst));
   15603          dres.jk_StopHere = link ? Ijk_Call
   15604                                  : (rM == 14 ? Ijk_Ret : Ijk_Boring);
   15605          dres.whatNext    = Dis_StopHere;
   15606          if (condT == IRTemp_INVALID) {
   15607             DIP("b%sx r%u\n", link ? "l" : "", rM);
   15608          } else {
   15609             DIP("b%sx%s r%u\n", link ? "l" : "", nCC(INSN_COND), rM);
   15610          }
   15611          goto decode_success;
   15612       }
   15613       /* else: (link && rM == 15): just fall through */
   15614    }
   15615 
   15616    /* --- NB: ARM interworking branches are in NV space, hence
   15617       are handled elsewhere by decode_NV_instruction.
   15618       ---
   15619    */
   15620 
   15621    /* --------------------- Clz --------------------- */
   15622    // CLZ
   15623    if (INSN(27,20) == BITS8(0,0,0,1,0,1,1,0)
   15624        && INSN(19,16) == BITS4(1,1,1,1)
   15625        && INSN(11,4) == BITS8(1,1,1,1,0,0,0,1)) {
   15626       UInt rD = INSN(15,12);
   15627       UInt rM = INSN(3,0);
   15628       IRTemp arg = newTemp(Ity_I32);
   15629       IRTemp res = newTemp(Ity_I32);
   15630       assign(arg, getIRegA(rM));
   15631       assign(res, IRExpr_ITE(
   15632                      binop(Iop_CmpEQ32, mkexpr(arg), mkU32(0)),
   15633                      mkU32(32),
   15634                      unop(Iop_Clz32, mkexpr(arg))
   15635             ));
   15636       putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   15637       DIP("clz%s r%u, r%u\n", nCC(INSN_COND), rD, rM);
   15638       goto decode_success;
   15639    }
   15640 
   15641    /* --------------------- Mul etc --------------------- */
   15642    // MUL
   15643    if (BITS8(0,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
   15644        && INSN(15,12) == BITS4(0,0,0,0)
   15645        && INSN(7,4) == BITS4(1,0,0,1)) {
   15646       UInt bitS = (insn >> 20) & 1; /* 20:20 */
   15647       UInt rD = INSN(19,16);
   15648       UInt rS = INSN(11,8);
   15649       UInt rM = INSN(3,0);
   15650       if (rD == 15 || rM == 15 || rS == 15) {
   15651          /* Unpredictable; don't decode; fall through */
   15652       } else {
   15653          IRTemp argL = newTemp(Ity_I32);
   15654          IRTemp argR = newTemp(Ity_I32);
   15655          IRTemp res  = newTemp(Ity_I32);
   15656          IRTemp oldC = IRTemp_INVALID;
   15657          IRTemp oldV = IRTemp_INVALID;
   15658          assign( argL, getIRegA(rM));
   15659          assign( argR, getIRegA(rS));
   15660          assign( res, binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) );
   15661          if (bitS) {
   15662             oldC = newTemp(Ity_I32);
   15663             assign(oldC, mk_armg_calculate_flag_c());
   15664             oldV = newTemp(Ity_I32);
   15665             assign(oldV, mk_armg_calculate_flag_v());
   15666          }
   15667          // now update guest state
   15668          putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
   15669          if (bitS) {
   15670             IRTemp pair = newTemp(Ity_I32);
   15671             assign( pair, binop(Iop_Or32,
   15672                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
   15673                                 mkexpr(oldV)) );
   15674             setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
   15675          }
   15676          DIP("mul%c%s r%u, r%u, r%u\n",
   15677              bitS ? 's' : ' ', nCC(INSN_COND), rD, rM, rS);
   15678          goto decode_success;
   15679       }
   15680       /* fall through */
   15681    }
   15682 
   15683    /* --------------------- Integer Divides --------------------- */
   15684    // SDIV
   15685    if (BITS8(0,1,1,1,0,0,0,1) == INSN(27,20)
   15686        && INSN(15,12) == BITS4(1,1,1,1)
   15687        && INSN(7,4) == BITS4(0,0,0,1)) {
   15688       UInt rD = INSN(19,16);
   15689       UInt rM = INSN(11,8);
   15690       UInt rN = INSN(3,0);
   15691       if (rD == 15 || rM == 15 || rN == 15) {
   15692          /* Unpredictable; don't decode; fall through */
   15693       } else {
   15694          IRTemp res  = newTemp(Ity_I32);
   15695          IRTemp argL = newTemp(Ity_I32);
   15696          IRTemp argR = newTemp(Ity_I32);
   15697          assign(argL, getIRegA(rN));
   15698          assign(argR, getIRegA(rM));
   15699          assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
   15700          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   15701          DIP("sdiv r%u, r%u, r%u\n", rD, rN, rM);
   15702          goto decode_success;
   15703       }
   15704     }
   15705 
   15706    // UDIV
   15707    if (BITS8(0,1,1,1,0,0,1,1) == INSN(27,20)
   15708        && INSN(15,12) == BITS4(1,1,1,1)
   15709        && INSN(7,4) == BITS4(0,0,0,1)) {
   15710       UInt rD = INSN(19,16);
   15711       UInt rM = INSN(11,8);
   15712       UInt rN = INSN(3,0);
   15713       if (rD == 15 || rM == 15 || rN == 15) {
   15714          /* Unpredictable; don't decode; fall through */
   15715       } else {
   15716          IRTemp res  = newTemp(Ity_I32);
   15717          IRTemp argL = newTemp(Ity_I32);
   15718          IRTemp argR = newTemp(Ity_I32);
   15719          assign(argL, getIRegA(rN));
   15720          assign(argR, getIRegA(rM));
   15721          assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
   15722          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   15723          DIP("udiv r%u, r%u, r%u\n", rD, rN, rM);
   15724          goto decode_success;
   15725       }
   15726    }
   15727 
   15728    // MLA, MLS
   15729    if (BITS8(0,0,0,0,0,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
   15730        && INSN(7,4) == BITS4(1,0,0,1)) {
   15731       UInt bitS  = (insn >> 20) & 1; /* 20:20 */
   15732       UInt isMLS = (insn >> 22) & 1; /* 22:22 */
   15733       UInt rD = INSN(19,16);
   15734       UInt rN = INSN(15,12);
   15735       UInt rS = INSN(11,8);
   15736       UInt rM = INSN(3,0);
   15737       if (bitS == 1 && isMLS == 1) {
   15738          /* This isn't allowed (MLS that sets flags).  don't decode;
   15739             fall through */
   15740       }
   15741       else
   15742       if (rD == 15 || rM == 15 || rS == 15 || rN == 15) {
   15743          /* Unpredictable; don't decode; fall through */
   15744       } else {
   15745          IRTemp argL = newTemp(Ity_I32);
   15746          IRTemp argR = newTemp(Ity_I32);
   15747          IRTemp argP = newTemp(Ity_I32);
   15748          IRTemp res  = newTemp(Ity_I32);
   15749          IRTemp oldC = IRTemp_INVALID;
   15750          IRTemp oldV = IRTemp_INVALID;
   15751          assign( argL, getIRegA(rM));
   15752          assign( argR, getIRegA(rS));
   15753          assign( argP, getIRegA(rN));
   15754          assign( res, binop(isMLS ? Iop_Sub32 : Iop_Add32,
   15755                             mkexpr(argP),
   15756                             binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) ));
   15757          if (bitS) {
   15758             vassert(!isMLS); // guaranteed above
   15759             oldC = newTemp(Ity_I32);
   15760             assign(oldC, mk_armg_calculate_flag_c());
   15761             oldV = newTemp(Ity_I32);
   15762             assign(oldV, mk_armg_calculate_flag_v());
   15763          }
   15764          // now update guest state
   15765          putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
   15766          if (bitS) {
   15767             IRTemp pair = newTemp(Ity_I32);
   15768             assign( pair, binop(Iop_Or32,
   15769                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
   15770                                 mkexpr(oldV)) );
   15771             setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
   15772          }
   15773          DIP("ml%c%c%s r%u, r%u, r%u, r%u\n",
   15774              isMLS ? 's' : 'a', bitS ? 's' : ' ',
   15775              nCC(INSN_COND), rD, rM, rS, rN);
   15776          goto decode_success;
   15777       }
   15778       /* fall through */
   15779    }
   15780 
   15781    // SMULL, UMULL
   15782    if (BITS8(0,0,0,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
   15783        && INSN(7,4) == BITS4(1,0,0,1)) {
   15784       UInt bitS = (insn >> 20) & 1; /* 20:20 */
   15785       UInt rDhi = INSN(19,16);
   15786       UInt rDlo = INSN(15,12);
   15787       UInt rS   = INSN(11,8);
   15788       UInt rM   = INSN(3,0);
   15789       UInt isS  = (INSN(27,20) >> 2) & 1; /* 22:22 */
   15790       if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo)  {
   15791          /* Unpredictable; don't decode; fall through */
   15792       } else {
   15793          IRTemp argL  = newTemp(Ity_I32);
   15794          IRTemp argR  = newTemp(Ity_I32);
   15795          IRTemp res   = newTemp(Ity_I64);
   15796          IRTemp resHi = newTemp(Ity_I32);
   15797          IRTemp resLo = newTemp(Ity_I32);
   15798          IRTemp oldC  = IRTemp_INVALID;
   15799          IRTemp oldV  = IRTemp_INVALID;
   15800          IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
   15801          assign( argL, getIRegA(rM));
   15802          assign( argR, getIRegA(rS));
   15803          assign( res, binop(mulOp, mkexpr(argL), mkexpr(argR)) );
   15804          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
   15805          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
   15806          if (bitS) {
   15807             oldC = newTemp(Ity_I32);
   15808             assign(oldC, mk_armg_calculate_flag_c());
   15809             oldV = newTemp(Ity_I32);
   15810             assign(oldV, mk_armg_calculate_flag_v());
   15811          }
   15812          // now update guest state
   15813          putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
   15814          putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
   15815          if (bitS) {
   15816             IRTemp pair = newTemp(Ity_I32);
   15817             assign( pair, binop(Iop_Or32,
   15818                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
   15819                                 mkexpr(oldV)) );
   15820             setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
   15821          }
   15822          DIP("%cmull%c%s r%u, r%u, r%u, r%u\n",
   15823              isS ? 's' : 'u', bitS ? 's' : ' ',
   15824              nCC(INSN_COND), rDlo, rDhi, rM, rS);
   15825          goto decode_success;
   15826       }
   15827       /* fall through */
   15828    }
   15829 
   15830    // SMLAL, UMLAL
   15831    if (BITS8(0,0,0,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
   15832        && INSN(7,4) == BITS4(1,0,0,1)) {
   15833       UInt bitS = (insn >> 20) & 1; /* 20:20 */
   15834       UInt rDhi = INSN(19,16);
   15835       UInt rDlo = INSN(15,12);
   15836       UInt rS   = INSN(11,8);
   15837       UInt rM   = INSN(3,0);
   15838       UInt isS  = (INSN(27,20) >> 2) & 1; /* 22:22 */
   15839       if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo)  {
   15840          /* Unpredictable; don't decode; fall through */
   15841       } else {
   15842          IRTemp argL  = newTemp(Ity_I32);
   15843          IRTemp argR  = newTemp(Ity_I32);
   15844          IRTemp old   = newTemp(Ity_I64);
   15845          IRTemp res   = newTemp(Ity_I64);
   15846          IRTemp resHi = newTemp(Ity_I32);
   15847          IRTemp resLo = newTemp(Ity_I32);
   15848          IRTemp oldC  = IRTemp_INVALID;
   15849          IRTemp oldV  = IRTemp_INVALID;
   15850          IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
   15851          assign( argL, getIRegA(rM));
   15852          assign( argR, getIRegA(rS));
   15853          assign( old, binop(Iop_32HLto64, getIRegA(rDhi), getIRegA(rDlo)) );
   15854          assign( res, binop(Iop_Add64,
   15855                             mkexpr(old),
   15856                             binop(mulOp, mkexpr(argL), mkexpr(argR))) );
   15857          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
   15858          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
   15859          if (bitS) {
   15860             oldC = newTemp(Ity_I32);
   15861             assign(oldC, mk_armg_calculate_flag_c());
   15862             oldV = newTemp(Ity_I32);
   15863             assign(oldV, mk_armg_calculate_flag_v());
   15864          }
   15865          // now update guest state
   15866          putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
   15867          putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
   15868          if (bitS) {
   15869             IRTemp pair = newTemp(Ity_I32);
   15870             assign( pair, binop(Iop_Or32,
   15871                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
   15872                                 mkexpr(oldV)) );
   15873             setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
   15874          }
   15875          DIP("%cmlal%c%s r%u, r%u, r%u, r%u\n",
   15876              isS ? 's' : 'u', bitS ? 's' : ' ', nCC(INSN_COND),
   15877              rDlo, rDhi, rM, rS);
   15878          goto decode_success;
   15879       }
   15880       /* fall through */
   15881    }
   15882 
   15883    // UMAAL
   15884    if (BITS8(0,0,0,0,0,1,0,0) == INSN(27,20) && INSN(7,4) == BITS4(1,0,0,1)) {
   15885       UInt rDhi = INSN(19,16);
   15886       UInt rDlo = INSN(15,12);
   15887       UInt rM   = INSN(11,8);
   15888       UInt rN   = INSN(3,0);
   15889       if (rDlo == 15 || rDhi == 15 || rN == 15 || rM == 15 || rDhi == rDlo)  {
   15890          /* Unpredictable; don't decode; fall through */
   15891       } else {
   15892          IRTemp argN   = newTemp(Ity_I32);
   15893          IRTemp argM   = newTemp(Ity_I32);
   15894          IRTemp argDhi = newTemp(Ity_I32);
   15895          IRTemp argDlo = newTemp(Ity_I32);
   15896          IRTemp res    = newTemp(Ity_I64);
   15897          IRTemp resHi  = newTemp(Ity_I32);
   15898          IRTemp resLo  = newTemp(Ity_I32);
   15899          assign( argN,   getIRegA(rN) );
   15900          assign( argM,   getIRegA(rM) );
   15901          assign( argDhi, getIRegA(rDhi) );
   15902          assign( argDlo, getIRegA(rDlo) );
   15903          assign( res,
   15904                  binop(Iop_Add64,
   15905                        binop(Iop_Add64,
   15906                              binop(Iop_MullU32, mkexpr(argN), mkexpr(argM)),
   15907                              unop(Iop_32Uto64, mkexpr(argDhi))),
   15908                        unop(Iop_32Uto64, mkexpr(argDlo))) );
   15909          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
   15910          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
   15911          // now update guest state
   15912          putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
   15913          putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
   15914          DIP("umaal %s r%u, r%u, r%u, r%u\n",
   15915              nCC(INSN_COND), rDlo, rDhi, rN, rM);
   15916          goto decode_success;
   15917       }
   15918       /* fall through */
   15919    }
   15920 
   15921    /* --------------------- Msr etc --------------------- */
   15922 
   15923    // MSR apsr, #imm
   15924    if (INSN(27,20) == BITS8(0,0,1,1,0,0,1,0)
   15925        && INSN(17,12) == BITS6(0,0,1,1,1,1)) {
   15926       UInt write_ge    = INSN(18,18);
   15927       UInt write_nzcvq = INSN(19,19);
   15928       if (write_nzcvq || write_ge) {
   15929          UInt   imm = (INSN(11,0) >> 0) & 0xFF;
   15930          UInt   rot = 2 * ((INSN(11,0) >> 8) & 0xF);
   15931          IRTemp immT = newTemp(Ity_I32);
   15932          vassert(rot <= 30);
   15933          imm = ROR32(imm, rot);
   15934          assign(immT, mkU32(imm));
   15935          desynthesise_APSR( write_nzcvq, write_ge, immT, condT );
   15936          DIP("msr%s cpsr%s%sf, #0x%08x\n", nCC(INSN_COND),
   15937              write_nzcvq ? "f" : "", write_ge ? "g" : "", imm);
   15938          goto decode_success;
   15939       }
   15940       /* fall through */
   15941    }
   15942 
   15943    // MSR apsr, reg
   15944    if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
   15945        && INSN(17,12) == BITS6(0,0,1,1,1,1)
   15946        && INSN(11,4) == BITS8(0,0,0,0,0,0,0,0)) {
   15947       UInt rN          = INSN(3,0);
   15948       UInt write_ge    = INSN(18,18);
   15949       UInt write_nzcvq = INSN(19,19);
   15950       if (rN != 15 && (write_nzcvq || write_ge)) {
   15951          IRTemp rNt = newTemp(Ity_I32);
   15952          assign(rNt, getIRegA(rN));
   15953          desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
   15954          DIP("msr%s cpsr_%s%s, r%u\n", nCC(INSN_COND),
   15955              write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
   15956          goto decode_success;
   15957       }
   15958       /* fall through */
   15959    }
   15960 
   15961    // MRS rD, cpsr
   15962    if ((insn & 0x0FFF0FFF) == 0x010F0000) {
   15963       UInt rD   = INSN(15,12);
   15964       if (rD != 15) {
   15965          IRTemp apsr = synthesise_APSR();
   15966          putIRegA( rD, mkexpr(apsr), condT, Ijk_Boring );
   15967          DIP("mrs%s r%u, cpsr\n", nCC(INSN_COND), rD);
   15968          goto decode_success;
   15969       }
   15970       /* fall through */
   15971    }
   15972 
   15973    /* --------------------- Svc --------------------- */
   15974    if (BITS8(1,1,1,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))) {
   15975       UInt imm24 = (insn >> 0) & 0xFFFFFF;
   15976       if (imm24 == 0) {
   15977          /* A syscall.  We can't do this conditionally, hence: */
   15978          if (condT != IRTemp_INVALID) {
   15979             mk_skip_over_A32_if_cond_is_false( condT );
   15980          }
   15981          // AL after here
   15982          llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 4 ));
   15983          dres.jk_StopHere = Ijk_Sys_syscall;
   15984          dres.whatNext    = Dis_StopHere;
   15985          DIP("svc%s #0x%08x\n", nCC(INSN_COND), imm24);
   15986          goto decode_success;
   15987       }
   15988       /* fall through */
   15989    }
   15990 
   15991    /* ------------------------ swp ------------------------ */
   15992 
   15993    // SWP, SWPB
   15994    if (BITS8(0,0,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   15995        && BITS4(0,0,0,0) == INSN(11,8)
   15996        && BITS4(1,0,0,1) == INSN(7,4)) {
   15997       UInt   rN   = INSN(19,16);
   15998       UInt   rD   = INSN(15,12);
   15999       UInt   rM   = INSN(3,0);
   16000       IRTemp tRn  = newTemp(Ity_I32);
   16001       IRTemp tNew = newTemp(Ity_I32);
   16002       IRTemp tOld = IRTemp_INVALID;
   16003       IRTemp tSC1 = newTemp(Ity_I1);
   16004       UInt   isB  = (insn >> 22) & 1;
   16005 
   16006       if (rD == 15 || rN == 15 || rM == 15 || rN == rM || rN == rD) {
   16007          /* undecodable; fall through */
   16008       } else {
   16009          /* make unconditional */
   16010          if (condT != IRTemp_INVALID) {
   16011             mk_skip_over_A32_if_cond_is_false( condT );
   16012             condT = IRTemp_INVALID;
   16013          }
   16014          /* Ok, now we're unconditional.  Generate a LL-SC loop. */
   16015          assign(tRn, getIRegA(rN));
   16016          assign(tNew, getIRegA(rM));
   16017          if (isB) {
   16018             /* swpb */
   16019             tOld = newTemp(Ity_I8);
   16020             stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
   16021                               NULL/*=>isLL*/) );
   16022             stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
   16023                               unop(Iop_32to8, mkexpr(tNew))) );
   16024          } else {
   16025             /* swp */
   16026             tOld = newTemp(Ity_I32);
   16027             stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
   16028                               NULL/*=>isLL*/) );
   16029             stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
   16030                               mkexpr(tNew)) );
   16031          }
   16032          stmt( IRStmt_Exit(unop(Iop_Not1, mkexpr(tSC1)),
   16033                            /*Ijk_NoRedir*/Ijk_Boring,
   16034                            IRConst_U32(guest_R15_curr_instr_notENC),
   16035                            OFFB_R15T ));
   16036          putIRegA(rD, isB ? unop(Iop_8Uto32, mkexpr(tOld)) : mkexpr(tOld),
   16037                       IRTemp_INVALID, Ijk_Boring);
   16038          DIP("swp%s%s r%u, r%u, [r%u]\n",
   16039              isB ? "b" : "", nCC(INSN_COND), rD, rM, rN);
   16040          goto decode_success;
   16041       }
   16042       /* fall through */
   16043    }
   16044 
   16045    /* ----------------------------------------------------------- */
   16046    /* -- ARMv6 instructions                                    -- */
   16047    /* ----------------------------------------------------------- */
   16048 
   16049    /* ------------------- {ldr,str}ex{,b,h,d} ------------------- */
   16050 
   16051    // LDREXD, LDREX, LDREXH, LDREXB
   16052    if (0x01900F9F == (insn & 0x0F900FFF)) {
   16053       UInt   rT    = INSN(15,12);
   16054       UInt   rN    = INSN(19,16);
   16055       IRType ty    = Ity_INVALID;
   16056       IROp   widen = Iop_INVALID;
   16057       const HChar* nm = NULL;
   16058       Bool   valid = True;
   16059       switch (INSN(22,21)) {
   16060          case 0: nm = "";  ty = Ity_I32; break;
   16061          case 1: nm = "d"; ty = Ity_I64; break;
   16062          case 2: nm = "b"; ty = Ity_I8;  widen = Iop_8Uto32; break;
   16063          case 3: nm = "h"; ty = Ity_I16; widen = Iop_16Uto32; break;
   16064          default: vassert(0);
   16065       }
   16066       if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
   16067          if (rT == 15 || rN == 15)
   16068             valid = False;
   16069       } else {
   16070          vassert(ty == Ity_I64);
   16071          if ((rT & 1) == 1 || rT == 14 || rN == 15)
   16072             valid = False;
   16073       }
   16074       if (valid) {
   16075          IRTemp res;
   16076          /* make unconditional */
   16077          if (condT != IRTemp_INVALID) {
   16078            mk_skip_over_A32_if_cond_is_false( condT );
   16079            condT = IRTemp_INVALID;
   16080          }
   16081          /* Ok, now we're unconditional.  Do the load. */
   16082          res = newTemp(ty);
   16083          // FIXME: assumes little-endian guest
   16084          stmt( IRStmt_LLSC(Iend_LE, res, getIRegA(rN),
   16085                            NULL/*this is a load*/) );
   16086          if (ty == Ity_I64) {
   16087             // FIXME: assumes little-endian guest
   16088             putIRegA(rT+0, unop(Iop_64to32, mkexpr(res)),
   16089                            IRTemp_INVALID, Ijk_Boring);
   16090             putIRegA(rT+1, unop(Iop_64HIto32, mkexpr(res)),
   16091                            IRTemp_INVALID, Ijk_Boring);
   16092             DIP("ldrex%s%s r%u, r%u, [r%u]\n",
   16093                 nm, nCC(INSN_COND), rT+0, rT+1, rN);
   16094          } else {
   16095             putIRegA(rT, widen == Iop_INVALID
   16096                             ? mkexpr(res) : unop(widen, mkexpr(res)),
   16097                      IRTemp_INVALID, Ijk_Boring);
   16098             DIP("ldrex%s%s r%u, [r%u]\n", nm, nCC(INSN_COND), rT, rN);
   16099          }
   16100          goto decode_success;
   16101       }
   16102       /* undecodable; fall through */
   16103    }
   16104 
   16105    // STREXD, STREX, STREXH, STREXB
   16106    if (0x01800F90 == (insn & 0x0F900FF0)) {
   16107       UInt   rT     = INSN(3,0);
   16108       UInt   rN     = INSN(19,16);
   16109       UInt   rD     = INSN(15,12);
   16110       IRType ty     = Ity_INVALID;
   16111       IROp   narrow = Iop_INVALID;
   16112       const HChar* nm = NULL;
   16113       Bool   valid  = True;
   16114       switch (INSN(22,21)) {
   16115          case 0: nm = "";  ty = Ity_I32; break;
   16116          case 1: nm = "d"; ty = Ity_I64; break;
   16117          case 2: nm = "b"; ty = Ity_I8;  narrow = Iop_32to8; break;
   16118          case 3: nm = "h"; ty = Ity_I16; narrow = Iop_32to16; break;
   16119          default: vassert(0);
   16120       }
   16121       if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
   16122          if (rD == 15 || rN == 15 || rT == 15
   16123              || rD == rN || rD == rT)
   16124             valid = False;
   16125       } else {
   16126          vassert(ty == Ity_I64);
   16127          if (rD == 15 || (rT & 1) == 1 || rT == 14 || rN == 15
   16128              || rD == rN || rD == rT || rD == rT+1)
   16129             valid = False;
   16130       }
   16131       if (valid) {
   16132          IRTemp resSC1, resSC32, data;
   16133          /* make unconditional */
   16134          if (condT != IRTemp_INVALID) {
   16135             mk_skip_over_A32_if_cond_is_false( condT );
   16136             condT = IRTemp_INVALID;
   16137          }
   16138          /* Ok, now we're unconditional.  Do the store. */
   16139          data = newTemp(ty);
   16140          assign(data,
   16141                 ty == Ity_I64
   16142                    // FIXME: assumes little-endian guest
   16143                    ? binop(Iop_32HLto64, getIRegA(rT+1), getIRegA(rT+0))
   16144                    : narrow == Iop_INVALID
   16145                       ? getIRegA(rT)
   16146                       : unop(narrow, getIRegA(rT)));
   16147          resSC1 = newTemp(Ity_I1);
   16148          // FIXME: assumes little-endian guest
   16149          stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegA(rN), mkexpr(data)) );
   16150 
   16151          /* Set rD to 1 on failure, 0 on success.  Currently we have
   16152             resSC1 == 0 on failure, 1 on success. */
   16153          resSC32 = newTemp(Ity_I32);
   16154          assign(resSC32,
   16155                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
   16156 
   16157          putIRegA(rD, mkexpr(resSC32),
   16158                       IRTemp_INVALID, Ijk_Boring);
   16159          if (ty == Ity_I64) {
   16160             DIP("strex%s%s r%u, r%u, r%u, [r%u]\n",
   16161                 nm, nCC(INSN_COND), rD, rT, rT+1, rN);
   16162          } else {
   16163             DIP("strex%s%s r%u, r%u, [r%u]\n",
   16164                 nm, nCC(INSN_COND), rD, rT, rN);
   16165          }
   16166          goto decode_success;
   16167       }
   16168       /* fall through */
   16169    }
   16170 
   16171    /* --------------------- movw, movt --------------------- */
   16172    if (0x03000000 == (insn & 0x0FF00000)
   16173        || 0x03400000 == (insn & 0x0FF00000)) /* pray for CSE */ {
   16174       UInt rD    = INSN(15,12);
   16175       UInt imm16 = (insn & 0xFFF) | ((insn >> 4) & 0x0000F000);
   16176       UInt isT   = (insn >> 22) & 1;
   16177       if (rD == 15) {
   16178          /* forget it */
   16179       } else {
   16180          if (isT) {
   16181             putIRegA(rD,
   16182                      binop(Iop_Or32,
   16183                            binop(Iop_And32, getIRegA(rD), mkU32(0xFFFF)),
   16184                            mkU32(imm16 << 16)),
   16185                      condT, Ijk_Boring);
   16186             DIP("movt%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
   16187             goto decode_success;
   16188          } else {
   16189             putIRegA(rD, mkU32(imm16), condT, Ijk_Boring);
   16190             DIP("movw%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
   16191             goto decode_success;
   16192          }
   16193       }
   16194       /* fall through */
   16195    }
   16196 
   16197    /* ----------- uxtb, sxtb, uxth, sxth, uxtb16, sxtb16 ----------- */
   16198    /* FIXME: this is an exact duplicate of the Thumb version.  They
   16199       should be commoned up. */
   16200    if (BITS8(0,1,1,0,1, 0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,0))
   16201        && BITS4(1,1,1,1) == INSN(19,16)
   16202        && BITS4(0,1,1,1) == INSN(7,4)
   16203        && BITS4(0,0, 0,0) == (INSN(11,8) & BITS4(0,0,1,1))) {
   16204       UInt subopc = INSN(27,20) & BITS8(0,0,0,0,0, 1,1,1);
   16205       if (subopc != BITS4(0,0,0,1) && subopc != BITS4(0,1,0,1)) {
   16206          Int    rot  = (INSN(11,8) >> 2) & 3;
   16207          UInt   rM   = INSN(3,0);
   16208          UInt   rD   = INSN(15,12);
   16209          IRTemp srcT = newTemp(Ity_I32);
   16210          IRTemp rotT = newTemp(Ity_I32);
   16211          IRTemp dstT = newTemp(Ity_I32);
   16212          const HChar* nm = "???";
   16213          assign(srcT, getIRegA(rM));
   16214          assign(rotT, genROR32(srcT, 8 * rot)); /* 0, 8, 16 or 24 only */
   16215          switch (subopc) {
   16216             case BITS4(0,1,1,0): // UXTB
   16217                assign(dstT, unop(Iop_8Uto32, unop(Iop_32to8, mkexpr(rotT))));
   16218                nm = "uxtb";
   16219                break;
   16220             case BITS4(0,0,1,0): // SXTB
   16221                assign(dstT, unop(Iop_8Sto32, unop(Iop_32to8, mkexpr(rotT))));
   16222                nm = "sxtb";
   16223                break;
   16224             case BITS4(0,1,1,1): // UXTH
   16225                assign(dstT, unop(Iop_16Uto32, unop(Iop_32to16, mkexpr(rotT))));
   16226                nm = "uxth";
   16227                break;
   16228             case BITS4(0,0,1,1): // SXTH
   16229                assign(dstT, unop(Iop_16Sto32, unop(Iop_32to16, mkexpr(rotT))));
   16230                nm = "sxth";
   16231                break;
   16232             case BITS4(0,1,0,0): // UXTB16
   16233                assign(dstT, binop(Iop_And32, mkexpr(rotT), mkU32(0x00FF00FF)));
   16234                nm = "uxtb16";
   16235                break;
   16236             case BITS4(0,0,0,0): { // SXTB16
   16237                IRTemp lo32 = newTemp(Ity_I32);
   16238                IRTemp hi32 = newTemp(Ity_I32);
   16239                assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
   16240                assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
   16241                assign(
   16242                   dstT,
   16243                   binop(Iop_Or32,
   16244                         binop(Iop_And32,
   16245                               unop(Iop_8Sto32,
   16246                                    unop(Iop_32to8, mkexpr(lo32))),
   16247                               mkU32(0xFFFF)),
   16248                         binop(Iop_Shl32,
   16249                               unop(Iop_8Sto32,
   16250                                    unop(Iop_32to8, mkexpr(hi32))),
   16251                               mkU8(16))
   16252                ));
   16253                nm = "sxtb16";
   16254                break;
   16255             }
   16256             default:
   16257                vassert(0); // guarded by "if" above
   16258          }
   16259          putIRegA(rD, mkexpr(dstT), condT, Ijk_Boring);
   16260          DIP("%s%s r%u, r%u, ROR #%u\n", nm, nCC(INSN_COND), rD, rM, rot);
   16261          goto decode_success;
   16262       }
   16263       /* fall through */
   16264    }
   16265 
   16266    /* ------------------- bfi, bfc ------------------- */
   16267    if (BITS8(0,1,1,1,1,1,0, 0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
   16268        && BITS4(0, 0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
   16269       UInt rD  = INSN(15,12);
   16270       UInt rN  = INSN(3,0);
   16271       UInt msb = (insn >> 16) & 0x1F; /* 20:16 */
   16272       UInt lsb = (insn >> 7) & 0x1F;  /* 11:7 */
   16273       if (rD == 15 || msb < lsb) {
   16274          /* undecodable; fall through */
   16275       } else {
   16276          IRTemp src    = newTemp(Ity_I32);
   16277          IRTemp olddst = newTemp(Ity_I32);
   16278          IRTemp newdst = newTemp(Ity_I32);
   16279          UInt   mask = 1 << (msb - lsb);
   16280          mask = (mask - 1) + mask;
   16281          vassert(mask != 0); // guaranteed by "msb < lsb" check above
   16282          mask <<= lsb;
   16283 
   16284          assign(src, rN == 15 ? mkU32(0) : getIRegA(rN));
   16285          assign(olddst, getIRegA(rD));
   16286          assign(newdst,
   16287                 binop(Iop_Or32,
   16288                    binop(Iop_And32,
   16289                          binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
   16290                          mkU32(mask)),
   16291                    binop(Iop_And32,
   16292                          mkexpr(olddst),
   16293                          mkU32(~mask)))
   16294                );
   16295 
   16296          putIRegA(rD, mkexpr(newdst), condT, Ijk_Boring);
   16297 
   16298          if (rN == 15) {
   16299             DIP("bfc%s r%u, #%u, #%u\n",
   16300                 nCC(INSN_COND), rD, lsb, msb-lsb+1);
   16301          } else {
   16302             DIP("bfi%s r%u, r%u, #%u, #%u\n",
   16303                 nCC(INSN_COND), rD, rN, lsb, msb-lsb+1);
   16304          }
   16305          goto decode_success;
   16306       }
   16307       /* fall through */
   16308    }
   16309 
   16310    /* ------------------- {u,s}bfx ------------------- */
   16311    if (BITS8(0,1,1,1,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
   16312        && BITS4(0,1,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
   16313       UInt rD  = INSN(15,12);
   16314       UInt rN  = INSN(3,0);
   16315       UInt wm1 = (insn >> 16) & 0x1F; /* 20:16 */
   16316       UInt lsb = (insn >> 7) & 0x1F;  /* 11:7 */
   16317       UInt msb = lsb + wm1;
   16318       UInt isU = (insn >> 22) & 1;    /* 22:22 */
   16319       if (rD == 15 || rN == 15 || msb >= 32) {
   16320          /* undecodable; fall through */
   16321       } else {
   16322          IRTemp src  = newTemp(Ity_I32);
   16323          IRTemp tmp  = newTemp(Ity_I32);
   16324          IRTemp res  = newTemp(Ity_I32);
   16325          UInt   mask = ((1 << wm1) - 1) + (1 << wm1);
   16326          vassert(msb >= 0 && msb <= 31);
   16327          vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
   16328 
   16329          assign(src, getIRegA(rN));
   16330          assign(tmp, binop(Iop_And32,
   16331                            binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
   16332                            mkU32(mask)));
   16333          assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
   16334                            binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
   16335                            mkU8(31-wm1)));
   16336 
   16337          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   16338 
   16339          DIP("%s%s r%u, r%u, #%u, #%u\n",
   16340              isU ? "ubfx" : "sbfx",
   16341              nCC(INSN_COND), rD, rN, lsb, wm1 + 1);
   16342          goto decode_success;
   16343       }
   16344       /* fall through */
   16345    }
   16346 
   16347    /* --------------------- Load/store doubleword ------------- */
   16348    // LDRD STRD
   16349    /*                 31   27   23   19 15 11   7    3     # highest bit
   16350                         28   24   20 16 12    8    4    0
   16351       A5-36   1 | 16  cond 0001 U100 Rn Rd im4h 11S1 im4l
   16352       A5-38   1 | 32  cond 0001 U000 Rn Rd 0000 11S1 Rm
   16353       A5-40   2 | 16  cond 0001 U110 Rn Rd im4h 11S1 im4l
   16354       A5-42   2 | 32  cond 0001 U010 Rn Rd 0000 11S1 Rm
   16355       A5-44   3 | 16  cond 0000 U100 Rn Rd im4h 11S1 im4l
   16356       A5-46   3 | 32  cond 0000 U000 Rn Rd 0000 11S1 Rm
   16357    */
   16358    /* case coding:
   16359              1   at-ea               (access at ea)
   16360              2   at-ea-then-upd      (access at ea, then Rn = ea)
   16361              3   at-Rn-then-upd      (access at Rn, then Rn = ea)
   16362       ea coding
   16363              16  Rn +/- imm8
   16364              32  Rn +/- Rm
   16365    */
   16366    /* Quickly skip over all of this for hopefully most instructions */
   16367    if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
   16368       goto after_load_store_doubleword;
   16369 
   16370    /* Check the "11S1" thing. */
   16371    if ((INSN(7,4) & BITS4(1,1,0,1)) != BITS4(1,1,0,1))
   16372       goto after_load_store_doubleword;
   16373 
   16374    summary = 0;
   16375 
   16376    /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,0,0)) {
   16377       summary = 1 | 16;
   16378    }
   16379    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,0,0)) {
   16380       summary = 1 | 32;
   16381    }
   16382    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,1,0)) {
   16383       summary = 2 | 16;
   16384    }
   16385    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,1,0)) {
   16386       summary = 2 | 32;
   16387    }
   16388    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(1,0,0)) {
   16389       summary = 3 | 16;
   16390    }
   16391    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(0,0,0)) {
   16392       summary = 3 | 32;
   16393    }
   16394    else goto after_load_store_doubleword;
   16395 
   16396    { UInt rN   = (insn >> 16) & 0xF; /* 19:16 */
   16397      UInt rD   = (insn >> 12) & 0xF; /* 15:12 */
   16398      UInt rM   = (insn >> 0)  & 0xF; /*  3:0  */
   16399      UInt bU   = (insn >> 23) & 1;   /* 23 U=1 offset+, U=0 offset- */
   16400      UInt bS   = (insn >> 5) & 1;    /* S=1 store, S=0 load */
   16401      UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
   16402 
   16403      /* Require rD to be an even numbered register */
   16404      if ((rD & 1) != 0)
   16405         goto after_load_store_doubleword;
   16406 
   16407      /* Require 11:8 == 0 for Rn +/- Rm cases */
   16408      if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
   16409         goto after_load_store_doubleword;
   16410 
   16411      /* Skip some invalid cases, which would lead to two competing
   16412         updates to the same register, or which are otherwise
   16413         disallowed by the spec. */
   16414      switch (summary) {
   16415         case 1 | 16:
   16416            break;
   16417         case 1 | 32:
   16418            if (rM == 15) goto after_load_store_doubleword;
   16419            break;
   16420         case 2 | 16: case 3 | 16:
   16421            if (rN == 15) goto after_load_store_doubleword;
   16422            if (bS == 0 && (rN == rD || rN == rD+1))
   16423               goto after_load_store_doubleword;
   16424            break;
   16425         case 2 | 32: case 3 | 32:
   16426            if (rM == 15) goto after_load_store_doubleword;
   16427            if (rN == 15) goto after_load_store_doubleword;
   16428            if (rN == rM) goto after_load_store_doubleword;
   16429            if (bS == 0 && (rN == rD || rN == rD+1))
   16430               goto after_load_store_doubleword;
   16431            break;
   16432         default:
   16433            vassert(0);
   16434      }
   16435 
   16436      /* If this is a branch, make it unconditional at this point.
   16437         Doing conditional branches in-line is too complex (for
   16438         now). */
   16439      vassert((rD & 1) == 0); /* from tests above */
   16440      if (bS == 0 && rD+1 == 15 && condT != IRTemp_INVALID) {
   16441         // go uncond
   16442         mk_skip_over_A32_if_cond_is_false( condT );
   16443         condT = IRTemp_INVALID;
   16444         // now uncond
   16445      }
   16446 
   16447      /* compute the effective address.  Bind it to a tmp since we
   16448         may need to use it twice. */
   16449      IRExpr* eaE = NULL;
   16450      switch (summary & 0xF0) {
   16451         case 16:
   16452            eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
   16453            break;
   16454         case 32:
   16455            eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
   16456            break;
   16457      }
   16458      vassert(eaE);
   16459      IRTemp eaT = newTemp(Ity_I32);
   16460      assign(eaT, eaE);
   16461 
   16462      /* get the old Rn value */
   16463      IRTemp rnT = newTemp(Ity_I32);
   16464      assign(rnT, getIRegA(rN));
   16465 
   16466      /* decide on the transfer address */
   16467      IRTemp taT = IRTemp_INVALID;
   16468      switch (summary & 0x0F) {
   16469         case 1: case 2: taT = eaT; break;
   16470         case 3:         taT = rnT; break;
   16471      }
   16472      vassert(taT != IRTemp_INVALID);
   16473 
   16474      /* XXX deal with alignment constraints */
   16475      /* XXX: but the A8 doesn't seem to trap for misaligned loads, so,
   16476         ignore alignment issues for the time being. */
   16477 
   16478      /* For almost all cases, we do the writeback after the transfers.
   16479         However, that leaves the stack "uncovered" in this case:
   16480            strd    rD, [sp, #-8]
   16481         In which case, do the writeback to SP now, instead of later.
   16482         This is bad in that it makes the insn non-restartable if the
   16483         accesses fault, but at least keeps Memcheck happy. */
   16484      Bool writeback_already_done = False;
   16485      if (bS == 1 /*store*/ && summary == (2 | 16)
   16486          && rN == 13 && rN != rD && rN != rD+1
   16487          && bU == 0/*minus*/ && imm8 == 8) {
   16488         putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
   16489         writeback_already_done = True;
   16490      }
   16491 
   16492      /* doubleword store  S 1
   16493         doubleword load   S 0
   16494      */
   16495      const HChar* name = NULL;
   16496      /* generate the transfers */
   16497      if (bS == 1) { // doubleword store
   16498         storeGuardedLE( binop(Iop_Add32, mkexpr(taT), mkU32(0)),
   16499                         getIRegA(rD+0), condT );
   16500         storeGuardedLE( binop(Iop_Add32, mkexpr(taT), mkU32(4)),
   16501                         getIRegA(rD+1), condT );
   16502         name = "strd";
   16503      } else { // doubleword load
   16504         IRTemp oldRd0 = newTemp(Ity_I32);
   16505         IRTemp oldRd1 = newTemp(Ity_I32);
   16506         assign(oldRd0, llGetIReg(rD+0));
   16507         assign(oldRd1, llGetIReg(rD+1));
   16508         IRTemp newRd0 = newTemp(Ity_I32);
   16509         IRTemp newRd1 = newTemp(Ity_I32);
   16510         loadGuardedLE( newRd0, ILGop_Ident32,
   16511                        binop(Iop_Add32, mkexpr(taT), mkU32(0)),
   16512                        mkexpr(oldRd0), condT );
   16513         putIRegA( rD+0, mkexpr(newRd0), IRTemp_INVALID, Ijk_Boring );
   16514         loadGuardedLE( newRd1, ILGop_Ident32,
   16515                        binop(Iop_Add32, mkexpr(taT), mkU32(4)),
   16516                        mkexpr(oldRd1), condT );
   16517         putIRegA( rD+1, mkexpr(newRd1), IRTemp_INVALID, Ijk_Boring );
   16518         name = "ldrd";
   16519      }
   16520 
   16521      /* Update Rn if necessary. */
   16522      switch (summary & 0x0F) {
   16523         case 2: case 3:
   16524            // should be assured by logic above:
   16525            vassert(rN != 15); /* from checks above */
   16526            if (bS == 0) {
   16527               vassert(rD+0 != rN); /* since we just wrote rD+0 */
   16528               vassert(rD+1 != rN); /* since we just wrote rD+1 */
   16529            }
   16530            if (!writeback_already_done)
   16531               putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
   16532            break;
   16533      }
   16534 
   16535      switch (summary & 0x0F) {
   16536         case 1:  DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
   16537                  break;
   16538         case 2:  DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
   16539                      name, nCC(INSN_COND), rD, dis_buf);
   16540                  break;
   16541         case 3:  DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
   16542                      name, nCC(INSN_COND), rD, dis_buf);
   16543                  break;
   16544         default: vassert(0);
   16545      }
   16546 
   16547      goto decode_success;
   16548    }
   16549 
   16550   after_load_store_doubleword:
   16551 
   16552    /* ------------------- {s,u}xtab ------------- */
   16553    if (BITS8(0,1,1,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   16554        && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
   16555        && BITS4(0,1,1,1) == INSN(7,4)) {
   16556       UInt rN  = INSN(19,16);
   16557       UInt rD  = INSN(15,12);
   16558       UInt rM  = INSN(3,0);
   16559       UInt rot = (insn >> 10) & 3;
   16560       UInt isU = INSN(22,22);
   16561       if (rN == 15/*it's {S,U}XTB*/ || rD == 15 || rM == 15) {
   16562          /* undecodable; fall through */
   16563       } else {
   16564          IRTemp srcL = newTemp(Ity_I32);
   16565          IRTemp srcR = newTemp(Ity_I32);
   16566          IRTemp res  = newTemp(Ity_I32);
   16567          assign(srcR, getIRegA(rM));
   16568          assign(srcL, getIRegA(rN));
   16569          assign(res,  binop(Iop_Add32,
   16570                             mkexpr(srcL),
   16571                             unop(isU ? Iop_8Uto32 : Iop_8Sto32,
   16572                                  unop(Iop_32to8,
   16573                                       genROR32(srcR, 8 * rot)))));
   16574          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   16575          DIP("%cxtab%s r%u, r%u, r%u, ror #%u\n",
   16576              isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
   16577          goto decode_success;
   16578       }
   16579       /* fall through */
   16580    }
   16581 
   16582    /* ------------------- {s,u}xtah ------------- */
   16583    if (BITS8(0,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   16584        && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
   16585        && BITS4(0,1,1,1) == INSN(7,4)) {
   16586       UInt rN  = INSN(19,16);
   16587       UInt rD  = INSN(15,12);
   16588       UInt rM  = INSN(3,0);
   16589       UInt rot = (insn >> 10) & 3;
   16590       UInt isU = INSN(22,22);
   16591       if (rN == 15/*it's {S,U}XTH*/ || rD == 15 || rM == 15) {
   16592          /* undecodable; fall through */
   16593       } else {
   16594          IRTemp srcL = newTemp(Ity_I32);
   16595          IRTemp srcR = newTemp(Ity_I32);
   16596          IRTemp res  = newTemp(Ity_I32);
   16597          assign(srcR, getIRegA(rM));
   16598          assign(srcL, getIRegA(rN));
   16599          assign(res,  binop(Iop_Add32,
   16600                             mkexpr(srcL),
   16601                             unop(isU ? Iop_16Uto32 : Iop_16Sto32,
   16602                                  unop(Iop_32to16,
   16603                                       genROR32(srcR, 8 * rot)))));
   16604          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   16605 
   16606          DIP("%cxtah%s r%u, r%u, r%u, ror #%u\n",
   16607              isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
   16608          goto decode_success;
   16609       }
   16610       /* fall through */
   16611    }
   16612 
   16613    /* ------------------- rev16, rev ------------------ */
   16614    if (INSN(27,16) == 0x6BF
   16615        && (INSN(11,4) == 0xFB/*rev16*/ || INSN(11,4) == 0xF3/*rev*/)) {
   16616       Bool isREV = INSN(11,4) == 0xF3;
   16617       UInt rM    = INSN(3,0);
   16618       UInt rD    = INSN(15,12);
   16619       if (rM != 15 && rD != 15) {
   16620          IRTemp rMt = newTemp(Ity_I32);
   16621          assign(rMt, getIRegA(rM));
   16622          IRTemp res = isREV ? gen_REV(rMt) : gen_REV16(rMt);
   16623          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   16624          DIP("rev%s%s r%u, r%u\n", isREV ? "" : "16",
   16625              nCC(INSN_COND), rD, rM);
   16626          goto decode_success;
   16627       }
   16628    }
   16629 
   16630    /* ------------------- revsh ----------------------- */
   16631    if (INSN(27,16) == 0x6FF && INSN(11,4) == 0xFB) {
   16632       UInt rM = INSN(3,0);
   16633       UInt rD = INSN(15,12);
   16634       if (rM != 15 && rD != 15) {
   16635          IRTemp irt_rM  = newTemp(Ity_I32);
   16636          IRTemp irt_hi  = newTemp(Ity_I32);
   16637          IRTemp irt_low = newTemp(Ity_I32);
   16638          IRTemp irt_res = newTemp(Ity_I32);
   16639          assign(irt_rM, getIRegA(rM));
   16640          assign(irt_hi,
   16641                 binop(Iop_Sar32,
   16642                       binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
   16643                       mkU8(16)
   16644                 )
   16645          );
   16646          assign(irt_low,
   16647                 binop(Iop_And32,
   16648                       binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
   16649                       mkU32(0xFF)
   16650                 )
   16651          );
   16652          assign(irt_res,
   16653                 binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
   16654          );
   16655          putIRegA(rD, mkexpr(irt_res), condT, Ijk_Boring);
   16656          DIP("revsh%s r%u, r%u\n", nCC(INSN_COND), rD, rM);
   16657          goto decode_success;
   16658       }
   16659    }
   16660 
   16661    /* ------------------- rbit ------------------ */
   16662    if (INSN(27,16) == 0x6FF && INSN(11,4) == 0xF3) {
   16663       UInt rD = INSN(15,12);
   16664       UInt rM = INSN(3,0);
   16665       if (rD != 15 && rM != 15) {
   16666          IRTemp arg = newTemp(Ity_I32);
   16667          assign(arg, getIRegA(rM));
   16668          IRTemp res = gen_BITREV(arg);
   16669          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   16670          DIP("rbit r%u, r%u\n", rD, rM);
   16671          goto decode_success;
   16672       }
   16673    }
   16674 
   16675    /* ------------------- smmul ------------------ */
   16676    if (INSN(27,20) == BITS8(0,1,1,1,0,1,0,1)
   16677        && INSN(15,12) == BITS4(1,1,1,1)
   16678        && (INSN(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
   16679       UInt bitR = INSN(5,5);
   16680       UInt rD = INSN(19,16);
   16681       UInt rM = INSN(11,8);
   16682       UInt rN = INSN(3,0);
   16683       if (rD != 15 && rM != 15 && rN != 15) {
   16684          IRExpr* res
   16685          = unop(Iop_64HIto32,
   16686                 binop(Iop_Add64,
   16687                       binop(Iop_MullS32, getIRegA(rN), getIRegA(rM)),
   16688                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
   16689          putIRegA(rD, res, condT, Ijk_Boring);
   16690          DIP("smmul%s%s r%u, r%u, r%u\n",
   16691              nCC(INSN_COND), bitR ? "r" : "", rD, rN, rM);
   16692          goto decode_success;
   16693       }
   16694    }
   16695 
   16696    /* ------------------- smmla ------------------ */
   16697    if (INSN(27,20) == BITS8(0,1,1,1,0,1,0,1)
   16698        && INSN(15,12) != BITS4(1,1,1,1)
   16699        && (INSN(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
   16700       UInt bitR = INSN(5,5);
   16701       UInt rD = INSN(19,16);
   16702       UInt rA = INSN(15,12);
   16703       UInt rM = INSN(11,8);
   16704       UInt rN = INSN(3,0);
   16705       if (rD != 15 && rM != 15 && rN != 15) {
   16706          IRExpr* res
   16707          = unop(Iop_64HIto32,
   16708                 binop(Iop_Add64,
   16709                       binop(Iop_Add64,
   16710                             binop(Iop_32HLto64, getIRegA(rA), mkU32(0)),
   16711                             binop(Iop_MullS32, getIRegA(rN), getIRegA(rM))),
   16712                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
   16713          putIRegA(rD, res, condT, Ijk_Boring);
   16714          DIP("smmla%s%s r%u, r%u, r%u, r%u\n",
   16715              nCC(INSN_COND), bitR ? "r" : "", rD, rN, rM, rA);
   16716          goto decode_success;
   16717       }
   16718    }
   16719 
   16720    /* ------------------- NOP ------------------ */
   16721    if (0x0320F000 == (insn & 0x0FFFFFFF)) {
   16722       DIP("nop%s\n", nCC(INSN_COND));
   16723       goto decode_success;
   16724    }
   16725 
   16726    /* -------------- (A1) LDRT reg+/-#imm12 -------------- */
   16727    /* Load Register Unprivileged:
   16728       ldrt<c> Rt, [Rn] {, #+/-imm12}
   16729    */
   16730    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,0,1,1) ) {
   16731       UInt rT     = INSN(15,12);
   16732       UInt rN     = INSN(19,16);
   16733       UInt imm12  = INSN(11,0);
   16734       UInt bU     = INSN(23,23);
   16735       Bool valid  = True;
   16736       if (rT == 15 || rN == 15 || rN == rT) valid = False;
   16737       if (valid) {
   16738          IRTemp newRt = newTemp(Ity_I32);
   16739          loadGuardedLE( newRt,
   16740                         ILGop_Ident32, getIRegA(rN), getIRegA(rT), condT );
   16741          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   16742          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   16743                              getIRegA(rN), mkU32(imm12));
   16744          putIRegA(rN, erN, condT, Ijk_Boring);
   16745          DIP("ldrt%s r%u, [r%u], #%c%u\n",
   16746              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
   16747          goto decode_success;
   16748       }
   16749    }
   16750 
   16751    /* -------------- (A2) LDRT reg+/-reg with shift -------------- */
   16752    /* Load Register Unprivileged:
   16753       ldrt<c> Rt, [Rn], +/-Rm{, shift}
   16754    */
   16755    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,0,1,1)
   16756         && INSN(4,4) == 0 ) {
   16757       UInt rT     = INSN(15,12);
   16758       UInt rN     = INSN(19,16);
   16759       UInt rM     = INSN(3,0);
   16760       UInt imm5   = INSN(11,7);
   16761       UInt bU     = INSN(23,23);
   16762       UInt type   = INSN(6,5);
   16763       Bool valid  = True;
   16764       if (rT == 15 || rN == 15 || rN == rT || rM == 15
   16765           /* || (ArchVersion() < 6 && rM == rN) */)
   16766          valid = False;
   16767       if (valid) {
   16768          IRTemp newRt = newTemp(Ity_I32);
   16769          loadGuardedLE( newRt,
   16770                         ILGop_Ident32, getIRegA(rN), getIRegA(rT), condT );
   16771          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   16772          // dis_buf generated is slightly bogus, in fact.
   16773          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
   16774                                                        type, imm5, dis_buf);
   16775          putIRegA(rN, erN, condT, Ijk_Boring);
   16776          DIP("ldrt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
   16777          goto decode_success;
   16778       }
   16779    }
   16780 
   16781    /* -------------- (A1) LDRBT reg+/-#imm12 -------------- */
   16782    /* Load Register Byte Unprivileged:
   16783       ldrbt<c> Rt, [Rn], #+/-imm12
   16784    */
   16785    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,1,1,1) ) {
   16786       UInt rT     = INSN(15,12);
   16787       UInt rN     = INSN(19,16);
   16788       UInt imm12  = INSN(11,0);
   16789       UInt bU     = INSN(23,23);
   16790       Bool valid  = True;
   16791       if (rT == 15 || rN == 15 || rN == rT) valid = False;
   16792       if (valid) {
   16793          IRTemp newRt = newTemp(Ity_I32);
   16794          loadGuardedLE( newRt,
   16795                         ILGop_8Uto32, getIRegA(rN), getIRegA(rT), condT );
   16796          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   16797          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   16798                              getIRegA(rN), mkU32(imm12));
   16799          putIRegA(rN, erN, condT, Ijk_Boring);
   16800          DIP("ldrbt%s r%u, [r%u], #%c%u\n",
   16801              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
   16802          goto decode_success;
   16803       }
   16804    }
   16805 
   16806    /* -------------- (A2) LDRBT reg+/-reg with shift -------------- */
   16807    /* Load Register Byte Unprivileged:
   16808       ldrbt<c> Rt, [Rn], +/-Rm{, shift}
   16809    */
   16810    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,1,1,1)
   16811         && INSN(4,4) == 0 ) {
   16812       UInt rT     = INSN(15,12);
   16813       UInt rN     = INSN(19,16);
   16814       UInt rM     = INSN(3,0);
   16815       UInt imm5   = INSN(11,7);
   16816       UInt bU     = INSN(23,23);
   16817       UInt type   = INSN(6,5);
   16818       Bool valid  = True;
   16819       if (rT == 15 || rN == 15 || rN == rT || rM == 15
   16820           /* || (ArchVersion() < 6 && rM == rN) */)
   16821          valid = False;
   16822       if (valid) {
   16823          IRTemp newRt = newTemp(Ity_I32);
   16824          loadGuardedLE( newRt,
   16825                         ILGop_8Uto32, getIRegA(rN), getIRegA(rT), condT );
   16826          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   16827          // dis_buf generated is slightly bogus, in fact.
   16828          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
   16829                                                        type, imm5, dis_buf);
   16830          putIRegA(rN, erN, condT, Ijk_Boring);
   16831          DIP("ldrbt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
   16832          goto decode_success;
   16833       }
   16834    }
   16835 
   16836    /* -------------- (A1) LDRHT reg+#imm8 -------------- */
   16837    /* Load Register Halfword Unprivileged:
   16838       ldrht<c> Rt, [Rn] {, #+/-imm8}
   16839    */
   16840    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
   16841        && INSN(7,4) == BITS4(1,0,1,1) ) {
   16842       UInt rT    = INSN(15,12);
   16843       UInt rN    = INSN(19,16);
   16844       UInt bU    = INSN(23,23);
   16845       UInt imm4H = INSN(11,8);
   16846       UInt imm4L = INSN(3,0);
   16847       UInt imm8  = (imm4H << 4) | imm4L;
   16848       Bool valid = True;
   16849       if (rT == 15 || rN == 15 || rN == rT)
   16850          valid = False;
   16851       if (valid) {
   16852          IRTemp newRt = newTemp(Ity_I32);
   16853          loadGuardedLE( newRt,
   16854                         ILGop_16Uto32, getIRegA(rN), getIRegA(rT), condT );
   16855          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   16856          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   16857                              getIRegA(rN), mkU32(imm8));
   16858          putIRegA(rN, erN, condT, Ijk_Boring);
   16859          DIP("ldrht%s r%u, [r%u], #%c%u\n",
   16860              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
   16861          goto decode_success;
   16862       }
   16863    }
   16864 
   16865    /* -------------- (A2) LDRHT reg+/-reg -------------- */
   16866    /* Load Register Halfword Unprivileged:
   16867       ldrht<c> Rt, [Rn], +/-Rm
   16868    */
   16869    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
   16870        && INSN(11,4) == BITS8(0,0,0,0,1,0,1,1) ) {
   16871       UInt rT    = INSN(15,12);
   16872       UInt rN    = INSN(19,16);
   16873       UInt rM    = INSN(3,0);
   16874       UInt bU    = INSN(23,23);
   16875       Bool valid = True;
   16876       if (rT == 15 || rN == 15 || rN == rT || rM == 15)
   16877          valid = False;
   16878       if (valid) {
   16879          IRTemp newRt = newTemp(Ity_I32);
   16880          loadGuardedLE( newRt,
   16881                         ILGop_16Uto32, getIRegA(rN), getIRegA(rT), condT );
   16882          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   16883          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   16884                              getIRegA(rN), getIRegA(rM));
   16885          putIRegA(rN, erN, condT, Ijk_Boring);
   16886          DIP("ldrht%s r%u, [r%u], %cr%u\n",
   16887              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
   16888          goto decode_success;
   16889       }
   16890    }
   16891 
   16892    /* -------------- (A1) LDRSHT reg+#imm8 -------------- */
   16893    /* Load Register Signed Halfword Unprivileged:
   16894       ldrsht<c> Rt, [Rn] {, #+/-imm8}
   16895    */
   16896    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
   16897        && INSN(7,4) == BITS4(1,1,1,1)) {
   16898       UInt rT    = INSN(15,12);
   16899       UInt rN    = INSN(19,16);
   16900       UInt bU    = INSN(23,23);
   16901       UInt imm4H = INSN(11,8);
   16902       UInt imm4L = INSN(3,0);
   16903       UInt imm8  = (imm4H << 4) | imm4L;
   16904       Bool valid = True;
   16905       if (rN == 15 || rT == 15 || rN == rT)
   16906          valid = False;
   16907       if (valid) {
   16908          IRTemp newRt = newTemp(Ity_I32);
   16909          loadGuardedLE( newRt,
   16910                         ILGop_16Sto32, getIRegA(rN), getIRegA(rT), condT );
   16911          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   16912          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   16913                              getIRegA(rN), mkU32(imm8));
   16914          putIRegA(rN, erN, condT, Ijk_Boring);
   16915          DIP("ldrsht%s r%u, [r%u], #%c%u\n",
   16916              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
   16917          goto decode_success;
   16918       }
   16919    }
   16920 
   16921    /* -------------- (A2) LDRSHT reg+/-reg -------------- */
   16922    /* Load Register Signed Halfword Unprivileged:
   16923       ldrsht<c> Rt, [Rn], +/-Rm
   16924    */
   16925    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
   16926        && INSN(11,4) == BITS8(0,0,0,0,1,1,1,1)) {
   16927       UInt rT    = INSN(15,12);
   16928       UInt rN    = INSN(19,16);
   16929       UInt rM    = INSN(3,0);
   16930       UInt bU    = INSN(23,23);
   16931       Bool valid = True;
   16932       if (rN == 15 || rT == 15 || rN == rT || rM == 15)
   16933          valid = False;
   16934       if (valid) {
   16935          IRTemp newRt = newTemp(Ity_I32);
   16936          loadGuardedLE( newRt,
   16937                         ILGop_16Sto32, getIRegA(rN), getIRegA(rT), condT );
   16938          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   16939          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   16940                              getIRegA(rN), getIRegA(rM));
   16941          putIRegA(rN, erN, condT, Ijk_Boring);
   16942          DIP("ldrsht%s r%u, [r%u], %cr%u\n",
   16943              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
   16944          goto decode_success;
   16945       }
   16946    }
   16947 
   16948    /* -------------- (A1) LDRSBT reg+#imm8 -------------- */
   16949    /* Load Register Signed Byte Unprivileged:
   16950       ldrsbt<c> Rt, [Rn] {, #+/-imm8}
   16951    */
   16952    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
   16953        && INSN(7,4) == BITS4(1,1,0,1)) {
   16954       UInt rT    = INSN(15,12);
   16955       UInt rN    = INSN(19,16);
   16956       UInt bU    = INSN(23,23);
   16957       UInt imm4H = INSN(11,8);
   16958       UInt imm4L = INSN(3,0);
   16959       UInt imm8  = (imm4H << 4) | imm4L;
   16960       Bool valid = True;
   16961       if (rT == 15 || rN == 15 || rN == rT)
   16962          valid = False;
   16963       if (valid) {
   16964          IRTemp newRt = newTemp(Ity_I32);
   16965          loadGuardedLE( newRt,
   16966                         ILGop_8Sto32, getIRegA(rN), getIRegA(rT), condT );
   16967          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   16968          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   16969                              getIRegA(rN), mkU32(imm8));
   16970          putIRegA(rN, erN, condT, Ijk_Boring);
   16971          DIP("ldrsbt%s r%u, [r%u], #%c%u\n",
   16972              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
   16973          goto decode_success;
   16974       }
   16975    }
   16976 
   16977    /* -------------- (A2) LDRSBT reg+/-reg -------------- */
   16978    /* Load Register Signed Byte Unprivileged:
   16979       ldrsbt<c> Rt, [Rn], +/-Rm
   16980    */
   16981    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
   16982        && INSN(11,4) == BITS8(0,0,0,0,1,1,0,1)) {
   16983       UInt rT    = INSN(15,12);
   16984       UInt rN    = INSN(19,16);
   16985       UInt bU    = INSN(23,23);
   16986       UInt rM    = INSN(3,0);
   16987       Bool valid = True;
   16988       if (rT == 15 || rN == 15 || rN == rT || rM == 15)
   16989          valid = False;
   16990       if (valid) {
   16991          IRTemp newRt = newTemp(Ity_I32);
   16992          loadGuardedLE( newRt,
   16993                         ILGop_8Sto32, getIRegA(rN), getIRegA(rT), condT );
   16994          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   16995          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   16996                              getIRegA(rN), getIRegA(rM));
   16997          putIRegA(rN, erN, condT, Ijk_Boring);
   16998          DIP("ldrsbt%s r%u, [r%u], %cr%u\n",
   16999              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
   17000          goto decode_success;
   17001       }
   17002    }
   17003 
   17004    /* -------------- (A1) STRBT reg+#imm12 -------------- */
   17005    /* Store Register Byte Unprivileged:
   17006       strbt<c> Rt, [Rn], #+/-imm12
   17007    */
   17008    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,1,1,0) ) {
   17009       UInt rT     = INSN(15,12);
   17010       UInt rN     = INSN(19,16);
   17011       UInt imm12  = INSN(11,0);
   17012       UInt bU     = INSN(23,23);
   17013       Bool valid = True;
   17014       if (rT == 15 || rN == 15 || rN == rT) valid = False;
   17015       if (valid) {
   17016          IRExpr* address = getIRegA(rN);
   17017          IRExpr* data = unop(Iop_32to8, getIRegA(rT));
   17018          storeGuardedLE( address, data, condT);
   17019          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
   17020                                getIRegA(rN), mkU32(imm12));
   17021          putIRegA(rN, newRn, condT, Ijk_Boring);
   17022          DIP("strbt%s r%u, [r%u], #%c%u\n",
   17023              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
   17024          goto decode_success;
   17025       }
   17026    }
   17027 
   17028    /* -------------- (A2) STRBT reg+/-reg -------------- */
   17029    /* Store Register Byte Unprivileged:
   17030       strbt<c> Rt, [Rn], +/-Rm{, shift}
   17031    */
   17032    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,1,1,0)
   17033        && INSN(4,4) == 0) {
   17034       UInt rT     = INSN(15,12);
   17035       UInt rN     = INSN(19,16);
   17036       UInt imm5   = INSN(11,7);
   17037       UInt type   = INSN(6,5);
   17038       UInt rM     = INSN(3,0);
   17039       UInt bU     = INSN(23,23);
   17040       Bool valid  = True;
   17041       if (rT == 15 || rN == 15 || rN == rT || rM == 15) valid = False;
   17042       if (valid) {
   17043          IRExpr* address = getIRegA(rN);
   17044          IRExpr* data = unop(Iop_32to8, getIRegA(rT));
   17045          storeGuardedLE( address, data, condT);
   17046          // dis_buf generated is slightly bogus, in fact.
   17047          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
   17048                                                        type, imm5, dis_buf);
   17049          putIRegA(rN, erN, condT, Ijk_Boring);
   17050          DIP("strbt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
   17051          goto decode_success;
   17052       }
   17053    }
   17054 
   17055    /* -------------- (A1) STRHT reg+#imm8 -------------- */
   17056    /* Store Register Halfword Unprivileged:
   17057       strht<c> Rt, [Rn], #+/-imm8
   17058    */
   17059    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,0)
   17060        && INSN(7,4) == BITS4(1,0,1,1) ) {
   17061       UInt rT    = INSN(15,12);
   17062       UInt rN    = INSN(19,16);
   17063       UInt imm4H = INSN(11,8);
   17064       UInt imm4L = INSN(3,0);
   17065       UInt imm8  = (imm4H << 4) | imm4L;
   17066       UInt bU    = INSN(23,23);
   17067       Bool valid = True;
   17068       if (rT == 15 || rN == 15 || rN == rT) valid = False;
   17069       if (valid) {
   17070          IRExpr* address = getIRegA(rN);
   17071          IRExpr* data = unop(Iop_32to16, getIRegA(rT));
   17072          storeGuardedLE( address, data, condT);
   17073          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
   17074                                getIRegA(rN), mkU32(imm8));
   17075          putIRegA(rN, newRn, condT, Ijk_Boring);
   17076          DIP("strht%s r%u, [r%u], #%c%u\n",
   17077              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
   17078          goto decode_success;
   17079       }
   17080    }
   17081 
   17082    /* -------------- (A2) STRHT reg+reg -------------- */
   17083    /* Store Register Halfword Unprivileged:
   17084       strht<c> Rt, [Rn], +/-Rm
   17085    */
   17086    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,0)
   17087        && INSN(11,4) == BITS8(0,0,0,0,1,0,1,1) ) {
   17088       UInt rT    = INSN(15,12);
   17089       UInt rN    = INSN(19,16);
   17090       UInt rM    = INSN(3,0);
   17091       UInt bU    = INSN(23,23);
   17092       Bool valid = True;
   17093       if (rT == 15 || rN == 15 || rN == rT || rM == 15) valid = False;
   17094       if (valid) {
   17095          IRExpr* address = getIRegA(rN);
   17096          IRExpr* data = unop(Iop_32to16, getIRegA(rT));
   17097          storeGuardedLE( address, data, condT);
   17098          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
   17099                                getIRegA(rN), getIRegA(rM));
   17100          putIRegA(rN, newRn, condT, Ijk_Boring);
   17101          DIP("strht%s r%u, [r%u], %cr%u\n",
   17102              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
   17103          goto decode_success;
   17104       }
   17105    }
   17106 
   17107    /* -------------- (A1) STRT reg+imm12 -------------- */
   17108    /* Store Register Unprivileged:
   17109       strt<c> Rt, [Rn], #+/-imm12
   17110    */
   17111    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,0,1,0) ) {
   17112       UInt rT    = INSN(15,12);
   17113       UInt rN    = INSN(19,16);
   17114       UInt imm12 = INSN(11,0);
   17115       UInt bU    = INSN(23,23);
   17116       Bool valid = True;
   17117       if (rN == 15 || rN == rT) valid = False;
   17118       if (valid) {
   17119          IRExpr* address = getIRegA(rN);
   17120          storeGuardedLE( address, getIRegA(rT), condT);
   17121          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
   17122                                getIRegA(rN), mkU32(imm12));
   17123          putIRegA(rN, newRn, condT, Ijk_Boring);
   17124          DIP("strt%s r%u, [r%u], %c%u\n",
   17125              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
   17126          goto decode_success;
   17127       }
   17128    }
   17129 
   17130    /* -------------- (A2) STRT reg+reg -------------- */
   17131    /* Store Register Unprivileged:
   17132       strt<c> Rt, [Rn], +/-Rm{, shift}
   17133    */
   17134    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,0,1,0)
   17135        && INSN(4,4) == 0 ) {
   17136       UInt rT    = INSN(15,12);
   17137       UInt rN    = INSN(19,16);
   17138       UInt rM    = INSN(3,0);
   17139       UInt type  = INSN(6,5);
   17140       UInt imm5  = INSN(11,7);
   17141       UInt bU    = INSN(23,23);
   17142       Bool valid = True;
   17143       if (rN == 15 || rN == rT || rM == 15) valid = False;
   17144       /* FIXME We didn't do:
   17145          if ArchVersion() < 6 && rM == rN then UNPREDICTABLE */
   17146       if (valid) {
   17147          storeGuardedLE( getIRegA(rN), getIRegA(rT), condT);
   17148          // dis_buf generated is slightly bogus, in fact.
   17149          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
   17150                                                        type, imm5, dis_buf);
   17151          putIRegA(rN, erN, condT, Ijk_Boring);
   17152          DIP("strt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
   17153          goto decode_success;
   17154       }
   17155    }
   17156 
   17157    /* ----------------------------------------------------------- */
   17158    /* -- ARMv7 instructions                                    -- */
   17159    /* ----------------------------------------------------------- */
   17160 
   17161    /* -------------- read CP15 TPIDRURO register ------------- */
   17162    /* mrc     p15, 0, r0, c13, c0, 3  up to
   17163       mrc     p15, 0, r14, c13, c0, 3
   17164    */
   17165    /* I don't know whether this is really v7-only.  But anyway, we
   17166       have to support it since arm-linux uses TPIDRURO as a thread
   17167       state register. */
   17168    if (0x0E1D0F70 == (insn & 0x0FFF0FFF)) {
   17169       UInt rD = INSN(15,12);
   17170       if (rD <= 14) {
   17171          /* skip r15, that's too stupid to handle */
   17172          putIRegA(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32),
   17173                       condT, Ijk_Boring);
   17174          DIP("mrc%s p15,0, r%u, c13, c0, 3\n", nCC(INSN_COND), rD);
   17175          goto decode_success;
   17176       }
   17177       /* fall through */
   17178    }
   17179 
   17180    /* Handle various kinds of barriers.  This is rather indiscriminate
   17181       in the sense that they are all turned into an IR Fence, which
   17182       means we don't know which they are, so the back end has to
   17183       re-emit them all when it comes acrosss an IR Fence.
   17184    */
   17185    /* v6 */ /* mcr 15, 0, rT, c7, c10, 5 */
   17186    if (0xEE070FBA == (insn & 0xFFFF0FFF)) {
   17187       UInt rT = INSN(15,12);
   17188       if (rT <= 14) {
   17189          /* mcr 15, 0, rT, c7, c10, 5 (v6) equiv to DMB (v7).  Data
   17190             Memory Barrier -- ensures ordering of memory accesses. */
   17191          stmt( IRStmt_MBE(Imbe_Fence) );
   17192          DIP("mcr 15, 0, r%u, c7, c10, 5 (data memory barrier)\n", rT);
   17193          goto decode_success;
   17194       }
   17195       /* fall through */
   17196    }
   17197    /* other flavours of barrier */
   17198    switch (insn) {
   17199       case 0xEE070F9A: /* v6 */
   17200          /* mcr 15, 0, r0, c7, c10, 4 (v6) equiv to DSB (v7).  Data
   17201             Synch Barrier -- ensures completion of memory accesses. */
   17202          stmt( IRStmt_MBE(Imbe_Fence) );
   17203          DIP("mcr 15, 0, r0, c7, c10, 4 (data synch barrier)\n");
   17204          goto decode_success;
   17205       case 0xEE070F95: /* v6 */
   17206          /* mcr 15, 0, r0, c7, c5, 4 (v6) equiv to ISB (v7).
   17207             Instruction Synchronisation Barrier (or Flush Prefetch
   17208             Buffer) -- a pipe flush, I think.  I suspect we could
   17209             ignore those, but to be on the safe side emit a fence
   17210             anyway. */
   17211          stmt( IRStmt_MBE(Imbe_Fence) );
   17212          DIP("mcr 15, 0, r0, c7, c5, 4 (insn synch barrier)\n");
   17213          goto decode_success;
   17214       default:
   17215          break;
   17216    }
   17217 
   17218    /* ----------------------------------------------------------- */
   17219    /* -- VFP (CP 10, CP 11) instructions (in ARM mode)         -- */
   17220    /* ----------------------------------------------------------- */
   17221 
   17222    if (INSN_COND != ARMCondNV) {
   17223       Bool ok_vfp = decode_CP10_CP11_instruction (
   17224                        &dres, INSN(27,0), condT, INSN_COND,
   17225                        False/*!isT*/
   17226                     );
   17227       if (ok_vfp)
   17228          goto decode_success;
   17229    }
   17230 
   17231    /* ----------------------------------------------------------- */
   17232    /* -- NEON instructions (in ARM mode)                       -- */
   17233    /* ----------------------------------------------------------- */
   17234 
   17235    /* These are all in NV space, and so are taken care of (far) above,
   17236       by a call from this function to decode_NV_instruction(). */
   17237 
   17238    /* ----------------------------------------------------------- */
   17239    /* -- v6 media instructions (in ARM mode)                   -- */
   17240    /* ----------------------------------------------------------- */
   17241 
   17242    { Bool ok_v6m = decode_V6MEDIA_instruction(
   17243                        &dres, INSN(27,0), condT, INSN_COND,
   17244                        False/*!isT*/
   17245                    );
   17246      if (ok_v6m)
   17247         goto decode_success;
   17248    }
   17249 
   17250    /* ----------------------------------------------------------- */
   17251    /* -- Undecodable                                           -- */
   17252    /* ----------------------------------------------------------- */
   17253 
   17254    goto decode_failure;
   17255    /*NOTREACHED*/
   17256 
   17257   decode_failure:
   17258    /* All decode failures end up here. */
   17259    if (sigill_diag) {
   17260       vex_printf("disInstr(arm): unhandled instruction: "
   17261                  "0x%x\n", insn);
   17262       vex_printf("                 cond=%d(0x%x) 27:20=%u(0x%02x) "
   17263                                    "4:4=%d "
   17264                                    "3:0=%u(0x%x)\n",
   17265                  (Int)INSN_COND, (UInt)INSN_COND,
   17266                  (Int)INSN(27,20), (UInt)INSN(27,20),
   17267                  (Int)INSN(4,4),
   17268                  (Int)INSN(3,0), (UInt)INSN(3,0) );
   17269    }
   17270 
   17271    /* Tell the dispatcher that this insn cannot be decoded, and so has
   17272       not been executed, and (is currently) the next to be executed.
   17273       R15 should be up-to-date since it made so at the start of each
   17274       insn, but nevertheless be paranoid and update it again right
   17275       now. */
   17276    vassert(0 == (guest_R15_curr_instr_notENC & 3));
   17277    llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC) );
   17278    dres.whatNext    = Dis_StopHere;
   17279    dres.jk_StopHere = Ijk_NoDecode;
   17280    dres.len         = 0;
   17281    return dres;
   17282 
   17283   decode_success:
   17284    /* All decode successes end up here. */
   17285    DIP("\n");
   17286 
   17287    vassert(dres.len == 4 || dres.len == 20);
   17288 
   17289    /* Now then.  Do we have an implicit jump to r15 to deal with? */
   17290    if (r15written) {
   17291       /* If we get jump to deal with, we assume that there's been no
   17292          other competing branch stuff previously generated for this
   17293          insn.  That's reasonable, in the sense that the ARM insn set
   17294          appears to declare as "Unpredictable" any instruction which
   17295          generates more than one possible new value for r15.  Hence
   17296          just assert.  The decoders themselves should check against
   17297          all such instructions which are thusly Unpredictable, and
   17298          decline to decode them.  Hence we should never get here if we
   17299          have competing new values for r15, and hence it is safe to
   17300          assert here. */
   17301       vassert(dres.whatNext == Dis_Continue);
   17302       vassert(irsb->next == NULL);
   17303       vassert(irsb->jumpkind == Ijk_Boring);
   17304       /* If r15 is unconditionally written, terminate the block by
   17305          jumping to it.  If it's conditionally written, still
   17306          terminate the block (a shame, but we can't do side exits to
   17307          arbitrary destinations), but first jump to the next
   17308          instruction if the condition doesn't hold. */
   17309       /* We can't use getIReg(15) to get the destination, since that
   17310          will produce r15+8, which isn't what we want.  Must use
   17311          llGetIReg(15) instead. */
   17312       if (r15guard == IRTemp_INVALID) {
   17313          /* unconditional */
   17314       } else {
   17315          /* conditional */
   17316          stmt( IRStmt_Exit(
   17317                   unop(Iop_32to1,
   17318                        binop(Iop_Xor32,
   17319                              mkexpr(r15guard), mkU32(1))),
   17320                   r15kind,
   17321                   IRConst_U32(guest_R15_curr_instr_notENC + 4),
   17322                   OFFB_R15T
   17323          ));
   17324       }
   17325       /* This seems crazy, but we're required to finish the insn with
   17326          a write to the guest PC.  As usual we rely on ir_opt to tidy
   17327          up later. */
   17328       llPutIReg(15, llGetIReg(15));
   17329       dres.whatNext    = Dis_StopHere;
   17330       dres.jk_StopHere = r15kind;
   17331    } else {
   17332       /* Set up the end-state in the normal way. */
   17333       switch (dres.whatNext) {
   17334          case Dis_Continue:
   17335             llPutIReg(15, mkU32(dres.len + guest_R15_curr_instr_notENC));
   17336             break;
   17337          case Dis_ResteerU:
   17338          case Dis_ResteerC:
   17339             llPutIReg(15, mkU32(dres.continueAt));
   17340             break;
   17341          case Dis_StopHere:
   17342             break;
   17343          default:
   17344             vassert(0);
   17345       }
   17346    }
   17347 
   17348    return dres;
   17349 
   17350 #  undef INSN_COND
   17351 #  undef INSN
   17352 }
   17353 
   17354 
   17355 /*------------------------------------------------------------*/
   17356 /*--- Disassemble a single Thumb2 instruction              ---*/
   17357 /*------------------------------------------------------------*/
   17358 
   17359 static const UChar it_length_table[256]; /* fwds */
   17360 
   17361 /* NB: in Thumb mode we do fetches of regs with getIRegT, which
   17362    automagically adds 4 to fetches of r15.  However, writes to regs
   17363    are done with putIRegT, which disallows writes to r15.  Hence any
   17364    r15 writes and associated jumps have to be done "by hand". */
   17365 
   17366 /* Disassemble a single Thumb instruction into IR.  The instruction is
   17367    located in host memory at guest_instr, and has (decoded) guest IP
   17368    of guest_R15_curr_instr_notENC, which will have been set before the
   17369    call here. */
   17370 
   17371 static
   17372 DisResult disInstr_THUMB_WRK (
   17373              Bool         (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
   17374              Bool         resteerCisOk,
   17375              void*        callback_opaque,
   17376              UChar*       guest_instr,
   17377              VexArchInfo* archinfo,
   17378              VexAbiInfo*  abiinfo,
   17379              Bool         sigill_diag
   17380           )
   17381 {
   17382    /* A macro to fish bits out of insn0.  There's also INSN1, to fish
   17383       bits out of insn1, but that's defined only after the end of the
   17384       16-bit insn decoder, so as to stop it mistakenly being used
   17385       therein. */
   17386 #  define INSN0(_bMax,_bMin)  SLICE_UInt(((UInt)insn0), (_bMax), (_bMin))
   17387 
   17388    DisResult dres;
   17389    UShort    insn0; /*  first 16 bits of the insn */
   17390    UShort    insn1; /* second 16 bits of the insn */
   17391    //Bool      allow_VFP = False;
   17392    //UInt      hwcaps = archinfo->hwcaps;
   17393    HChar     dis_buf[128];  // big enough to hold LDMIA etc text
   17394 
   17395    /* Summary result of the ITxxx backwards analysis: False == safe
   17396       but suboptimal. */
   17397    Bool guaranteedUnconditional = False;
   17398 
   17399    /* What insn variants are we supporting today? */
   17400    //allow_VFP  = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
   17401    // etc etc
   17402 
   17403    /* Set result defaults. */
   17404    dres.whatNext    = Dis_Continue;
   17405    dres.len         = 2;
   17406    dres.continueAt  = 0;
   17407    dres.jk_StopHere = Ijk_INVALID;
   17408 
   17409    /* Set default actions for post-insn handling of writes to r15, if
   17410       required. */
   17411    r15written = False;
   17412    r15guard   = IRTemp_INVALID; /* unconditional */
   17413    r15kind    = Ijk_Boring;
   17414 
   17415    /* Insns could be 2 or 4 bytes long.  Just get the first 16 bits at
   17416       this point.  If we need the second 16, get them later.  We can't
   17417       get them both out immediately because it risks a fault (very
   17418       unlikely, but ..) if the second 16 bits aren't actually
   17419       necessary. */
   17420    insn0 = getUShortLittleEndianly( guest_instr );
   17421    insn1 = 0; /* We'll get it later, once we know we need it. */
   17422 
   17423    /* Similarly, will set this later. */
   17424    IRTemp old_itstate = IRTemp_INVALID;
   17425 
   17426    if (0) vex_printf("insn: 0x%x\n", insn0);
   17427 
   17428    DIP("\t(thumb) 0x%x:  ", (UInt)guest_R15_curr_instr_notENC);
   17429 
   17430    vassert(0 == (guest_R15_curr_instr_notENC & 1));
   17431 
   17432    /* ----------------------------------------------------------- */
   17433    /* Spot "Special" instructions (see comment at top of file). */
   17434    {
   17435       UChar* code = (UChar*)guest_instr;
   17436       /* Spot the 16-byte preamble:
   17437 
   17438          ea4f 0cfc  mov.w   ip, ip, ror #3
   17439          ea4f 3c7c  mov.w   ip, ip, ror #13
   17440          ea4f 7c7c  mov.w   ip, ip, ror #29
   17441          ea4f 4cfc  mov.w   ip, ip, ror #19
   17442       */
   17443       UInt word1 = 0x0CFCEA4F;
   17444       UInt word2 = 0x3C7CEA4F;
   17445       UInt word3 = 0x7C7CEA4F;
   17446       UInt word4 = 0x4CFCEA4F;
   17447       if (getUIntLittleEndianly(code+ 0) == word1 &&
   17448           getUIntLittleEndianly(code+ 4) == word2 &&
   17449           getUIntLittleEndianly(code+ 8) == word3 &&
   17450           getUIntLittleEndianly(code+12) == word4) {
   17451          /* Got a "Special" instruction preamble.  Which one is it? */
   17452          // 0x 0A 0A EA 4A
   17453          if (getUIntLittleEndianly(code+16) == 0x0A0AEA4A
   17454                                                /* orr.w r10,r10,r10 */) {
   17455             /* R3 = client_request ( R4 ) */
   17456             DIP("r3 = client_request ( %%r4 )\n");
   17457             llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
   17458             dres.jk_StopHere = Ijk_ClientReq;
   17459             dres.whatNext    = Dis_StopHere;
   17460             goto decode_success;
   17461          }
   17462          else
   17463          // 0x 0B 0B EA 4B
   17464          if (getUIntLittleEndianly(code+16) == 0x0B0BEA4B
   17465                                                /* orr r11,r11,r11 */) {
   17466             /* R3 = guest_NRADDR */
   17467             DIP("r3 = guest_NRADDR\n");
   17468             dres.len = 20;
   17469             llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
   17470             goto decode_success;
   17471          }
   17472          else
   17473          // 0x 0C 0C EA 4C
   17474          if (getUIntLittleEndianly(code+16) == 0x0C0CEA4C
   17475                                                /* orr r12,r12,r12 */) {
   17476             /*  branch-and-link-to-noredir R4 */
   17477             DIP("branch-and-link-to-noredir r4\n");
   17478             llPutIReg(14, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
   17479             llPutIReg(15, getIRegT(4));
   17480             dres.jk_StopHere = Ijk_NoRedir;
   17481             dres.whatNext    = Dis_StopHere;
   17482             goto decode_success;
   17483          }
   17484          else
   17485          // 0x 09 09 EA 49
   17486          if (getUIntLittleEndianly(code+16) == 0x0909EA49
   17487                                                /* orr r9,r9,r9 */) {
   17488             /* IR injection */
   17489             DIP("IR injection\n");
   17490             vex_inject_ir(irsb, Iend_LE);
   17491             // Invalidate the current insn. The reason is that the IRop we're
   17492             // injecting here can change. In which case the translation has to
   17493             // be redone. For ease of handling, we simply invalidate all the
   17494             // time.
   17495             stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_R15_curr_instr_notENC)));
   17496             stmt(IRStmt_Put(OFFB_CMLEN,   mkU32(20)));
   17497             llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
   17498             dres.whatNext    = Dis_StopHere;
   17499             dres.jk_StopHere = Ijk_InvalICache;
   17500             goto decode_success;
   17501          }
   17502          /* We don't know what it is.  Set insn0 so decode_failure
   17503             can print the insn following the Special-insn preamble. */
   17504          insn0 = getUShortLittleEndianly(code+16);
   17505          goto decode_failure;
   17506          /*NOTREACHED*/
   17507       }
   17508 
   17509    }
   17510 
   17511    /* ----------------------------------------------------------- */
   17512 
   17513    /* Main Thumb instruction decoder starts here.  It's a series of
   17514       switches which examine ever longer bit sequences at the MSB of
   17515       the instruction word, first for 16-bit insns, then for 32-bit
   17516       insns. */
   17517 
   17518    /* --- BEGIN ITxxx optimisation analysis --- */
   17519    /* This is a crucial optimisation for the ITState boilerplate that
   17520       follows.  Examine the 9 halfwords preceding this instruction,
   17521       and if we are absolutely sure that none of them constitute an
   17522       'it' instruction, then we can be sure that this instruction is
   17523       not under the control of any 'it' instruction, and so
   17524       guest_ITSTATE must be zero.  So write zero into ITSTATE right
   17525       now, so that iropt can fold out almost all of the resulting
   17526       junk.
   17527 
   17528       If we aren't sure, we can always safely skip this step.  So be a
   17529       bit conservative about it: only poke around in the same page as
   17530       this instruction, lest we get a fault from the previous page
   17531       that would not otherwise have happened.  The saving grace is
   17532       that such skipping is pretty rare -- it only happens,
   17533       statistically, 18/4096ths of the time, so is judged unlikely to
   17534       be a performance problems.
   17535 
   17536       FIXME: do better.  Take into account the number of insns covered
   17537       by any IT insns we find, to rule out cases where an IT clearly
   17538       cannot cover this instruction.  This would improve behaviour for
   17539       branch targets immediately following an IT-guarded group that is
   17540       not of full length.  Eg, (and completely ignoring issues of 16-
   17541       vs 32-bit insn length):
   17542 
   17543              ite cond
   17544              insn1
   17545              insn2
   17546       label: insn3
   17547              insn4
   17548 
   17549       The 'it' only conditionalises insn1 and insn2.  However, the
   17550       current analysis is conservative and considers insn3 and insn4
   17551       also possibly guarded.  Hence if 'label:' is the start of a hot
   17552       loop we will get a big performance hit.
   17553    */
   17554    {
   17555       /* Summary result of this analysis: False == safe but
   17556          suboptimal. */
   17557       vassert(guaranteedUnconditional == False);
   17558 
   17559       UInt pc = guest_R15_curr_instr_notENC;
   17560       vassert(0 == (pc & 1));
   17561 
   17562       UInt pageoff = pc & 0xFFF;
   17563       if (pageoff >= 18) {
   17564          /* It's safe to poke about in the 9 halfwords preceding this
   17565             insn.  So, have a look at them. */
   17566          guaranteedUnconditional = True; /* assume no 'it' insn found,
   17567                                             till we do */
   17568          UShort* hwp = (UShort*)(HWord)pc;
   17569          Int i;
   17570          for (i = -1; i >= -9; i--) {
   17571             /* We're in the same page.  (True, but commented out due
   17572                to expense.) */
   17573             /*
   17574             vassert( ( ((UInt)(&hwp[i])) & 0xFFFFF000 )
   17575                       == ( pc & 0xFFFFF000 ) );
   17576             */
   17577             /* All valid IT instructions must have the form 0xBFxy,
   17578                where x can be anything, but y must be nonzero.  Find
   17579                the number of insns covered by it (1 .. 4) and check to
   17580                see if it can possibly reach up to the instruction in
   17581                question.  Some (x,y) combinations mean UNPREDICTABLE,
   17582                and the table is constructed to be conservative by
   17583                returning 4 for those cases, so the analysis is safe
   17584                even if the code uses unpredictable IT instructions (in
   17585                which case its authors are nuts, but hey.)  */
   17586             UShort hwp_i = hwp[i];
   17587             if (UNLIKELY((hwp_i & 0xFF00) == 0xBF00 && (hwp_i & 0xF) != 0)) {
   17588                /* might be an 'it' insn. */
   17589                /* # guarded insns */
   17590                Int n_guarded = (Int)it_length_table[hwp_i & 0xFF];
   17591                vassert(n_guarded >= 1 && n_guarded <= 4);
   17592                if (n_guarded * 2 /* # guarded HWs, worst case */
   17593                    > (-(i+1)))   /* -(i+1): # remaining HWs after the IT */
   17594                    /* -(i+0) also seems to work, even though I think
   17595                       it's wrong.  I don't understand that. */
   17596                   guaranteedUnconditional = False;
   17597                break;
   17598             }
   17599          }
   17600       }
   17601    }
   17602    /* --- END ITxxx optimisation analysis --- */
   17603 
   17604    /* Generate the guarding condition for this insn, by examining
   17605       ITSTATE.  Assign it to condT.  Also, generate new
   17606       values for ITSTATE ready for stuffing back into the
   17607       guest state, but don't actually do the Put yet, since it will
   17608       need to stuffed back in only after the instruction gets to a
   17609       point where it is sure to complete.  Mostly we let the code at
   17610       decode_success handle this, but in cases where the insn contains
   17611       a side exit, we have to update them before the exit. */
   17612 
   17613    /* If the ITxxx optimisation analysis above could not prove that
   17614       this instruction is guaranteed unconditional, we insert a
   17615       lengthy IR preamble to compute the guarding condition at
   17616       runtime.  If it can prove it (which obviously we hope is the
   17617       normal case) then we insert a minimal preamble, which is
   17618       equivalent to setting guest_ITSTATE to zero and then folding
   17619       that through the full preamble (which completely disappears). */
   17620 
   17621    IRTemp condT              = IRTemp_INVALID;
   17622    IRTemp cond_AND_notInIT_T = IRTemp_INVALID;
   17623 
   17624    IRTemp new_itstate        = IRTemp_INVALID;
   17625    vassert(old_itstate == IRTemp_INVALID);
   17626 
   17627    if (guaranteedUnconditional) {
   17628       /* BEGIN "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
   17629 
   17630       // ITSTATE = 0 :: I32
   17631       IRTemp z32 = newTemp(Ity_I32);
   17632       assign(z32, mkU32(0));
   17633       put_ITSTATE(z32);
   17634 
   17635       // old_itstate = 0 :: I32
   17636       //
   17637       // old_itstate = get_ITSTATE();
   17638       old_itstate = z32; /* 0 :: I32 */
   17639 
   17640       // new_itstate = old_itstate >> 8
   17641       //             = 0 >> 8
   17642       //             = 0 :: I32
   17643       //
   17644       // new_itstate = newTemp(Ity_I32);
   17645       // assign(new_itstate,
   17646       //        binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
   17647       new_itstate = z32;
   17648 
   17649       // ITSTATE = 0 :: I32(again)
   17650       //
   17651       // put_ITSTATE(new_itstate);
   17652 
   17653       // condT1 = calc_cond_dyn( xor(and(old_istate,0xF0), 0xE0) )
   17654       //        = calc_cond_dyn( xor(0,0xE0) )
   17655       //        = calc_cond_dyn ( 0xE0 )
   17656       //        = 1 :: I32
   17657       // Not that this matters, since the computed value is not used:
   17658       // see condT folding below
   17659       //
   17660       // IRTemp condT1 = newTemp(Ity_I32);
   17661       // assign(condT1,
   17662       //        mk_armg_calculate_condition_dyn(
   17663       //           binop(Iop_Xor32,
   17664       //                 binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
   17665       //                 mkU32(0xE0))
   17666       //       )
   17667       // );
   17668 
   17669       // condT = 32to8(and32(old_itstate,0xF0)) == 0  ? 1  : condT1
   17670       //       = 32to8(and32(0,0xF0)) == 0  ? 1  : condT1
   17671       //       = 32to8(0) == 0  ? 1  : condT1
   17672       //       = 0 == 0  ? 1  : condT1
   17673       //       = 1
   17674       //
   17675       // condT = newTemp(Ity_I32);
   17676       // assign(condT, IRExpr_ITE(
   17677       //                  unop(Iop_32to8, binop(Iop_And32,
   17678       //                                        mkexpr(old_itstate),
   17679       //                                        mkU32(0xF0))),
   17680       //                  mkexpr(condT1),
   17681       //                  mkU32(1))
   17682       //       ));
   17683       condT = newTemp(Ity_I32);
   17684       assign(condT, mkU32(1));
   17685 
   17686       // notInITt = xor32(and32(old_itstate, 1), 1)
   17687       //          = xor32(and32(0, 1), 1)
   17688       //          = xor32(0, 1)
   17689       //          = 1 :: I32
   17690       //
   17691       // IRTemp notInITt = newTemp(Ity_I32);
   17692       // assign(notInITt,
   17693       //        binop(Iop_Xor32,
   17694       //              binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
   17695       //              mkU32(1)));
   17696 
   17697       // cond_AND_notInIT_T = and32(notInITt, condT)
   17698       //                    = and32(1, 1)
   17699       //                    = 1
   17700       //
   17701       // cond_AND_notInIT_T = newTemp(Ity_I32);
   17702       // assign(cond_AND_notInIT_T,
   17703       //        binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
   17704       cond_AND_notInIT_T = condT; /* 1 :: I32 */
   17705 
   17706       /* END "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
   17707    } else {
   17708       /* BEGIN { STANDARD PREAMBLE; } */
   17709 
   17710       old_itstate = get_ITSTATE();
   17711 
   17712       new_itstate = newTemp(Ity_I32);
   17713       assign(new_itstate,
   17714              binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
   17715 
   17716       put_ITSTATE(new_itstate);
   17717 
   17718       /* Same strategy as for ARM insns: generate a condition
   17719          temporary at this point (or IRTemp_INVALID, meaning
   17720          unconditional).  We leave it to lower-level instruction
   17721          decoders to decide whether they can generate straight-line
   17722          code, or whether they must generate a side exit before the
   17723          instruction.  condT :: Ity_I32 and is always either zero or
   17724          one. */
   17725       IRTemp condT1 = newTemp(Ity_I32);
   17726       assign(condT1,
   17727              mk_armg_calculate_condition_dyn(
   17728                 binop(Iop_Xor32,
   17729                       binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
   17730                       mkU32(0xE0))
   17731             )
   17732       );
   17733 
   17734       /* This is a bit complex, but needed to make Memcheck understand
   17735          that, if the condition in old_itstate[7:4] denotes AL (that
   17736          is, if this instruction is to be executed unconditionally),
   17737          then condT does not depend on the results of calling the
   17738          helper.
   17739 
   17740          We test explicitly for old_itstate[7:4] == AL ^ 0xE, and in
   17741          that case set condT directly to 1.  Else we use the results
   17742          of the helper.  Since old_itstate is always defined and
   17743          because Memcheck does lazy V-bit propagation through ITE,
   17744          this will cause condT to always be a defined 1 if the
   17745          condition is 'AL'.  From an execution semantics point of view
   17746          this is irrelevant since we're merely duplicating part of the
   17747          behaviour of the helper.  But it makes it clear to Memcheck,
   17748          in this case, that condT does not in fact depend on the
   17749          contents of the condition code thunk.  Without it, we get
   17750          quite a lot of false errors.
   17751 
   17752          So, just to clarify: from a straight semantics point of view,
   17753          we can simply do "assign(condT, mkexpr(condT1))", and the
   17754          simulator still runs fine.  It's just that we get loads of
   17755          false errors from Memcheck. */
   17756       condT = newTemp(Ity_I32);
   17757       assign(condT, IRExpr_ITE(
   17758                        binop(Iop_CmpNE32, binop(Iop_And32,
   17759                                                 mkexpr(old_itstate),
   17760                                                 mkU32(0xF0)),
   17761                                           mkU32(0)),
   17762                        mkexpr(condT1),
   17763                        mkU32(1)
   17764             ));
   17765 
   17766       /* Something we don't have in ARM: generate a 0 or 1 value
   17767          indicating whether or not we are in an IT block (NB: 0 = in
   17768          IT block, 1 = not in IT block).  This is used to gate
   17769          condition code updates in 16-bit Thumb instructions. */
   17770       IRTemp notInITt = newTemp(Ity_I32);
   17771       assign(notInITt,
   17772              binop(Iop_Xor32,
   17773                    binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
   17774                    mkU32(1)));
   17775 
   17776       /* Compute 'condT && notInITt' -- that is, the instruction is
   17777          going to execute, and we're not in an IT block.  This is the
   17778          gating condition for updating condition codes in 16-bit Thumb
   17779          instructions, except for CMP, CMN and TST. */
   17780       cond_AND_notInIT_T = newTemp(Ity_I32);
   17781       assign(cond_AND_notInIT_T,
   17782              binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
   17783       /* END { STANDARD PREAMBLE; } */
   17784    }
   17785 
   17786 
   17787    /* At this point:
   17788       * ITSTATE has been updated
   17789       * condT holds the guarding condition for this instruction (0 or 1),
   17790       * notInITt is 1 if we're in "normal" code, 0 if in an IT block
   17791       * cond_AND_notInIT_T is the AND of the above two.
   17792 
   17793       If the instruction proper can't trap, then there's nothing else
   17794       to do w.r.t. ITSTATE -- just go and and generate IR for the
   17795       insn, taking into account the guarding condition.
   17796 
   17797       If, however, the instruction might trap, then we must back up
   17798       ITSTATE to the old value, and re-update it after the potentially
   17799       trapping IR section.  A trap can happen either via a memory
   17800       reference or because we need to throw SIGILL.
   17801 
   17802       If an instruction has a side exit, we need to be sure that any
   17803       ITSTATE backup is re-updated before the side exit.
   17804    */
   17805 
   17806    /* ----------------------------------------------------------- */
   17807    /* --                                                       -- */
   17808    /* -- Thumb 16-bit integer instructions                     -- */
   17809    /* --                                                       -- */
   17810    /* -- IMPORTANT: references to insn1 or INSN1 are           -- */
   17811    /* --            not allowed in this section                -- */
   17812    /* --                                                       -- */
   17813    /* ----------------------------------------------------------- */
   17814 
   17815    /* 16-bit instructions inside an IT block, apart from CMP, CMN and
   17816       TST, do not set the condition codes.  Hence we must dynamically
   17817       test for this case for every condition code update. */
   17818 
   17819    IROp   anOp   = Iop_INVALID;
   17820    const HChar* anOpNm = NULL;
   17821 
   17822    /* ================ 16-bit 15:6 cases ================ */
   17823 
   17824    switch (INSN0(15,6)) {
   17825 
   17826    case 0x10a:   // CMP
   17827    case 0x10b: { // CMN
   17828       /* ---------------- CMP Rn, Rm ---------------- */
   17829       Bool   isCMN = INSN0(15,6) == 0x10b;
   17830       UInt   rN    = INSN0(2,0);
   17831       UInt   rM    = INSN0(5,3);
   17832       IRTemp argL  = newTemp(Ity_I32);
   17833       IRTemp argR  = newTemp(Ity_I32);
   17834       assign( argL, getIRegT(rN) );
   17835       assign( argR, getIRegT(rM) );
   17836       /* Update flags regardless of whether in an IT block or not. */
   17837       setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
   17838                       argL, argR, condT );
   17839       DIP("%s r%u, r%u\n", isCMN ? "cmn" : "cmp", rN, rM);
   17840       goto decode_success;
   17841    }
   17842 
   17843    case 0x108: {
   17844       /* ---------------- TST Rn, Rm ---------------- */
   17845       UInt   rN   = INSN0(2,0);
   17846       UInt   rM   = INSN0(5,3);
   17847       IRTemp oldC = newTemp(Ity_I32);
   17848       IRTemp oldV = newTemp(Ity_I32);
   17849       IRTemp res  = newTemp(Ity_I32);
   17850       assign( oldC, mk_armg_calculate_flag_c() );
   17851       assign( oldV, mk_armg_calculate_flag_v() );
   17852       assign( res,  binop(Iop_And32, getIRegT(rN), getIRegT(rM)) );
   17853       /* Update flags regardless of whether in an IT block or not. */
   17854       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
   17855       DIP("tst r%u, r%u\n", rN, rM);
   17856       goto decode_success;
   17857    }
   17858 
   17859    case 0x109: {
   17860       /* ---------------- NEGS Rd, Rm ---------------- */
   17861       /* Rd = -Rm */
   17862       UInt   rM   = INSN0(5,3);
   17863       UInt   rD   = INSN0(2,0);
   17864       IRTemp arg  = newTemp(Ity_I32);
   17865       IRTemp zero = newTemp(Ity_I32);
   17866       assign(arg, getIRegT(rM));
   17867       assign(zero, mkU32(0));
   17868       // rD can never be r15
   17869       putIRegT(rD, binop(Iop_Sub32, mkexpr(zero), mkexpr(arg)), condT);
   17870       setFlags_D1_D2( ARMG_CC_OP_SUB, zero, arg, cond_AND_notInIT_T);
   17871       DIP("negs r%u, r%u\n", rD, rM);
   17872       goto decode_success;
   17873    }
   17874 
   17875    case 0x10F: {
   17876       /* ---------------- MVNS Rd, Rm ---------------- */
   17877       /* Rd = ~Rm */
   17878       UInt   rM   = INSN0(5,3);
   17879       UInt   rD   = INSN0(2,0);
   17880       IRTemp oldV = newTemp(Ity_I32);
   17881       IRTemp oldC = newTemp(Ity_I32);
   17882       IRTemp res  = newTemp(Ity_I32);
   17883       assign( oldV, mk_armg_calculate_flag_v() );
   17884       assign( oldC, mk_armg_calculate_flag_c() );
   17885       assign(res, unop(Iop_Not32, getIRegT(rM)));
   17886       // rD can never be r15
   17887       putIRegT(rD, mkexpr(res), condT);
   17888       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   17889                          cond_AND_notInIT_T );
   17890       DIP("mvns r%u, r%u\n", rD, rM);
   17891       goto decode_success;
   17892    }
   17893 
   17894    case 0x10C:
   17895       /* ---------------- ORRS Rd, Rm ---------------- */
   17896       anOp = Iop_Or32; anOpNm = "orr"; goto and_orr_eor_mul;
   17897    case 0x100:
   17898       /* ---------------- ANDS Rd, Rm ---------------- */
   17899       anOp = Iop_And32; anOpNm = "and"; goto and_orr_eor_mul;
   17900    case 0x101:
   17901       /* ---------------- EORS Rd, Rm ---------------- */
   17902       anOp = Iop_Xor32; anOpNm = "eor"; goto and_orr_eor_mul;
   17903    case 0x10d:
   17904       /* ---------------- MULS Rd, Rm ---------------- */
   17905       anOp = Iop_Mul32; anOpNm = "mul"; goto and_orr_eor_mul;
   17906    and_orr_eor_mul: {
   17907       /* Rd = Rd `op` Rm */
   17908       UInt   rM   = INSN0(5,3);
   17909       UInt   rD   = INSN0(2,0);
   17910       IRTemp res  = newTemp(Ity_I32);
   17911       IRTemp oldV = newTemp(Ity_I32);
   17912       IRTemp oldC = newTemp(Ity_I32);
   17913       assign( oldV, mk_armg_calculate_flag_v() );
   17914       assign( oldC, mk_armg_calculate_flag_c() );
   17915       assign( res, binop(anOp, getIRegT(rD), getIRegT(rM) ));
   17916       // not safe to read guest state after here
   17917       // rD can never be r15
   17918       putIRegT(rD, mkexpr(res), condT);
   17919       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   17920                          cond_AND_notInIT_T );
   17921       DIP("%s r%u, r%u\n", anOpNm, rD, rM);
   17922       goto decode_success;
   17923    }
   17924 
   17925    case 0x10E: {
   17926       /* ---------------- BICS Rd, Rm ---------------- */
   17927       /* Rd = Rd & ~Rm */
   17928       UInt   rM   = INSN0(5,3);
   17929       UInt   rD   = INSN0(2,0);
   17930       IRTemp res  = newTemp(Ity_I32);
   17931       IRTemp oldV = newTemp(Ity_I32);
   17932       IRTemp oldC = newTemp(Ity_I32);
   17933       assign( oldV, mk_armg_calculate_flag_v() );
   17934       assign( oldC, mk_armg_calculate_flag_c() );
   17935       assign( res, binop(Iop_And32, getIRegT(rD),
   17936                                     unop(Iop_Not32, getIRegT(rM) )));
   17937       // not safe to read guest state after here
   17938       // rD can never be r15
   17939       putIRegT(rD, mkexpr(res), condT);
   17940       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   17941                          cond_AND_notInIT_T );
   17942       DIP("bics r%u, r%u\n", rD, rM);
   17943       goto decode_success;
   17944    }
   17945 
   17946    case 0x105: {
   17947       /* ---------------- ADCS Rd, Rm ---------------- */
   17948       /* Rd = Rd + Rm + oldC */
   17949       UInt   rM   = INSN0(5,3);
   17950       UInt   rD   = INSN0(2,0);
   17951       IRTemp argL = newTemp(Ity_I32);
   17952       IRTemp argR = newTemp(Ity_I32);
   17953       IRTemp oldC = newTemp(Ity_I32);
   17954       IRTemp res  = newTemp(Ity_I32);
   17955       assign(argL, getIRegT(rD));
   17956       assign(argR, getIRegT(rM));
   17957       assign(oldC, mk_armg_calculate_flag_c());
   17958       assign(res, binop(Iop_Add32,
   17959                         binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
   17960                         mkexpr(oldC)));
   17961       // rD can never be r15
   17962       putIRegT(rD, mkexpr(res), condT);
   17963       setFlags_D1_D2_ND( ARMG_CC_OP_ADC, argL, argR, oldC,
   17964                          cond_AND_notInIT_T );
   17965       DIP("adcs r%u, r%u\n", rD, rM);
   17966       goto decode_success;
   17967    }
   17968 
   17969    case 0x106: {
   17970       /* ---------------- SBCS Rd, Rm ---------------- */
   17971       /* Rd = Rd - Rm - (oldC ^ 1) */
   17972       UInt   rM   = INSN0(5,3);
   17973       UInt   rD   = INSN0(2,0);
   17974       IRTemp argL = newTemp(Ity_I32);
   17975       IRTemp argR = newTemp(Ity_I32);
   17976       IRTemp oldC = newTemp(Ity_I32);
   17977       IRTemp res  = newTemp(Ity_I32);
   17978       assign(argL, getIRegT(rD));
   17979       assign(argR, getIRegT(rM));
   17980       assign(oldC, mk_armg_calculate_flag_c());
   17981       assign(res, binop(Iop_Sub32,
   17982                         binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
   17983                         binop(Iop_Xor32, mkexpr(oldC), mkU32(1))));
   17984       // rD can never be r15
   17985       putIRegT(rD, mkexpr(res), condT);
   17986       setFlags_D1_D2_ND( ARMG_CC_OP_SBB, argL, argR, oldC,
   17987                          cond_AND_notInIT_T );
   17988       DIP("sbcs r%u, r%u\n", rD, rM);
   17989       goto decode_success;
   17990    }
   17991 
   17992    case 0x2CB: {
   17993       /* ---------------- UXTB Rd, Rm ---------------- */
   17994       /* Rd = 8Uto32(Rm) */
   17995       UInt rM = INSN0(5,3);
   17996       UInt rD = INSN0(2,0);
   17997       putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFF)),
   17998                    condT);
   17999       DIP("uxtb r%u, r%u\n", rD, rM);
   18000       goto decode_success;
   18001    }
   18002 
   18003    case 0x2C9: {
   18004       /* ---------------- SXTB Rd, Rm ---------------- */
   18005       /* Rd = 8Sto32(Rm) */
   18006       UInt rM = INSN0(5,3);
   18007       UInt rD = INSN0(2,0);
   18008       putIRegT(rD, binop(Iop_Sar32,
   18009                          binop(Iop_Shl32, getIRegT(rM), mkU8(24)),
   18010                          mkU8(24)),
   18011                    condT);
   18012       DIP("sxtb r%u, r%u\n", rD, rM);
   18013       goto decode_success;
   18014    }
   18015 
   18016    case 0x2CA: {
   18017       /* ---------------- UXTH Rd, Rm ---------------- */
   18018       /* Rd = 16Uto32(Rm) */
   18019       UInt rM = INSN0(5,3);
   18020       UInt rD = INSN0(2,0);
   18021       putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFFFF)),
   18022                    condT);
   18023       DIP("uxth r%u, r%u\n", rD, rM);
   18024       goto decode_success;
   18025    }
   18026 
   18027    case 0x2C8: {
   18028       /* ---------------- SXTH Rd, Rm ---------------- */
   18029       /* Rd = 16Sto32(Rm) */
   18030       UInt rM = INSN0(5,3);
   18031       UInt rD = INSN0(2,0);
   18032       putIRegT(rD, binop(Iop_Sar32,
   18033                          binop(Iop_Shl32, getIRegT(rM), mkU8(16)),
   18034                          mkU8(16)),
   18035                    condT);
   18036       DIP("sxth r%u, r%u\n", rD, rM);
   18037       goto decode_success;
   18038    }
   18039 
   18040    case 0x102:   // LSLS
   18041    case 0x103:   // LSRS
   18042    case 0x104:   // ASRS
   18043    case 0x107: { // RORS
   18044       /* ---------------- LSLS Rs, Rd ---------------- */
   18045       /* ---------------- LSRS Rs, Rd ---------------- */
   18046       /* ---------------- ASRS Rs, Rd ---------------- */
   18047       /* ---------------- RORS Rs, Rd ---------------- */
   18048       /* Rd = Rd `op` Rs, and set flags */
   18049       UInt   rS   = INSN0(5,3);
   18050       UInt   rD   = INSN0(2,0);
   18051       IRTemp oldV = newTemp(Ity_I32);
   18052       IRTemp rDt  = newTemp(Ity_I32);
   18053       IRTemp rSt  = newTemp(Ity_I32);
   18054       IRTemp res  = newTemp(Ity_I32);
   18055       IRTemp resC = newTemp(Ity_I32);
   18056       const HChar* wot  = "???";
   18057       assign(rSt, getIRegT(rS));
   18058       assign(rDt, getIRegT(rD));
   18059       assign(oldV, mk_armg_calculate_flag_v());
   18060       /* Does not appear to be the standard 'how' encoding. */
   18061       switch (INSN0(15,6)) {
   18062          case 0x102:
   18063             compute_result_and_C_after_LSL_by_reg(
   18064                dis_buf, &res, &resC, rDt, rSt, rD, rS
   18065             );
   18066             wot = "lsl";
   18067             break;
   18068          case 0x103:
   18069             compute_result_and_C_after_LSR_by_reg(
   18070                dis_buf, &res, &resC, rDt, rSt, rD, rS
   18071             );
   18072             wot = "lsr";
   18073             break;
   18074          case 0x104:
   18075             compute_result_and_C_after_ASR_by_reg(
   18076                dis_buf, &res, &resC, rDt, rSt, rD, rS
   18077             );
   18078             wot = "asr";
   18079             break;
   18080          case 0x107:
   18081             compute_result_and_C_after_ROR_by_reg(
   18082                dis_buf, &res, &resC, rDt, rSt, rD, rS
   18083             );
   18084             wot = "ror";
   18085             break;
   18086          default:
   18087             /*NOTREACHED*/vassert(0);
   18088       }
   18089       // not safe to read guest state after this point
   18090       putIRegT(rD, mkexpr(res), condT);
   18091       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
   18092                          cond_AND_notInIT_T );
   18093       DIP("%ss r%u, r%u\n", wot, rS, rD);
   18094       goto decode_success;
   18095    }
   18096 
   18097    case 0x2E8:   // REV
   18098    case 0x2E9: { // REV16
   18099       /* ---------------- REV   Rd, Rm ---------------- */
   18100       /* ---------------- REV16 Rd, Rm ---------------- */
   18101       UInt rM = INSN0(5,3);
   18102       UInt rD = INSN0(2,0);
   18103       Bool isREV = INSN0(15,6) == 0x2E8;
   18104       IRTemp arg = newTemp(Ity_I32);
   18105       assign(arg, getIRegT(rM));
   18106       IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
   18107       putIRegT(rD, mkexpr(res), condT);
   18108       DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM);
   18109       goto decode_success;
   18110    }
   18111 
   18112    case 0x2EB: { // REVSH
   18113       /* ---------------- REVSH Rd, Rn ---------------- */
   18114       UInt rM = INSN0(5,3);
   18115       UInt rD = INSN0(2,0);
   18116       IRTemp irt_rM  = newTemp(Ity_I32);
   18117       IRTemp irt_hi  = newTemp(Ity_I32);
   18118       IRTemp irt_low = newTemp(Ity_I32);
   18119       IRTemp irt_res = newTemp(Ity_I32);
   18120       assign(irt_rM, getIRegT(rM));
   18121       assign(irt_hi,
   18122              binop(Iop_Sar32,
   18123                    binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
   18124                    mkU8(16)
   18125              )
   18126       );
   18127       assign(irt_low,
   18128              binop(Iop_And32,
   18129                    binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
   18130                    mkU32(0xFF)
   18131              )
   18132       );
   18133       assign(irt_res,
   18134              binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
   18135       );
   18136       putIRegT(rD, mkexpr(irt_res), condT);
   18137       DIP("revsh r%u, r%u\n", rD, rM);
   18138       goto decode_success;
   18139    }
   18140 
   18141    default:
   18142       break; /* examine the next shortest prefix */
   18143 
   18144    }
   18145 
   18146 
   18147    /* ================ 16-bit 15:7 cases ================ */
   18148 
   18149    switch (INSN0(15,7)) {
   18150 
   18151    case BITS9(1,0,1,1,0,0,0,0,0): {
   18152       /* ------------ ADD SP, #imm7 * 4 ------------ */
   18153       UInt uimm7 = INSN0(6,0);
   18154       putIRegT(13, binop(Iop_Add32, getIRegT(13), mkU32(uimm7 * 4)),
   18155                    condT);
   18156       DIP("add sp, #%u\n", uimm7 * 4);
   18157       goto decode_success;
   18158    }
   18159 
   18160    case BITS9(1,0,1,1,0,0,0,0,1): {
   18161       /* ------------ SUB SP, #imm7 * 4 ------------ */
   18162       UInt uimm7 = INSN0(6,0);
   18163       putIRegT(13, binop(Iop_Sub32, getIRegT(13), mkU32(uimm7 * 4)),
   18164                    condT);
   18165       DIP("sub sp, #%u\n", uimm7 * 4);
   18166       goto decode_success;
   18167    }
   18168 
   18169    case BITS9(0,1,0,0,0,1,1,1,0): {
   18170       /* ---------------- BX rM ---------------- */
   18171       /* Branch to reg, and optionally switch modes.  Reg contains a
   18172          suitably encoded address therefore (w CPSR.T at the bottom).
   18173          Have to special-case r15, as usual. */
   18174       UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
   18175       if (BITS3(0,0,0) == INSN0(2,0)) {
   18176          IRTemp dst = newTemp(Ity_I32);
   18177          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   18178          mk_skip_over_T16_if_cond_is_false(condT);
   18179          condT = IRTemp_INVALID;
   18180          // now uncond
   18181          if (rM <= 14) {
   18182             assign( dst, getIRegT(rM) );
   18183          } else {
   18184             vassert(rM == 15);
   18185             assign( dst, mkU32(guest_R15_curr_instr_notENC + 4) );
   18186          }
   18187          llPutIReg(15, mkexpr(dst));
   18188          dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
   18189          dres.whatNext    = Dis_StopHere;
   18190          DIP("bx r%u (possibly switch to ARM mode)\n", rM);
   18191          goto decode_success;
   18192       }
   18193       break;
   18194    }
   18195 
   18196    /* ---------------- BLX rM ---------------- */
   18197    /* Branch and link to interworking address in rM. */
   18198    case BITS9(0,1,0,0,0,1,1,1,1): {
   18199       if (BITS3(0,0,0) == INSN0(2,0)) {
   18200          UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
   18201          IRTemp dst = newTemp(Ity_I32);
   18202          if (rM <= 14) {
   18203             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   18204             mk_skip_over_T16_if_cond_is_false(condT);
   18205             condT = IRTemp_INVALID;
   18206             // now uncond
   18207             /* We're returning to Thumb code, hence "| 1" */
   18208             assign( dst, getIRegT(rM) );
   18209             putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ),
   18210                           IRTemp_INVALID );
   18211             llPutIReg(15, mkexpr(dst));
   18212             dres.jk_StopHere = Ijk_Call;
   18213             dres.whatNext    = Dis_StopHere;
   18214             DIP("blx r%u (possibly switch to ARM mode)\n", rM);
   18215             goto decode_success;
   18216          }
   18217          /* else unpredictable, fall through */
   18218       }
   18219       break;
   18220    }
   18221 
   18222    default:
   18223       break; /* examine the next shortest prefix */
   18224 
   18225    }
   18226 
   18227 
   18228    /* ================ 16-bit 15:8 cases ================ */
   18229 
   18230    switch (INSN0(15,8)) {
   18231 
   18232    case BITS8(1,1,0,1,1,1,1,1): {
   18233       /* ---------------- SVC ---------------- */
   18234       UInt imm8 = INSN0(7,0);
   18235       if (imm8 == 0) {
   18236          /* A syscall.  We can't do this conditionally, hence: */
   18237          mk_skip_over_T16_if_cond_is_false( condT );
   18238          // FIXME: what if we have to back up and restart this insn?
   18239          // then ITSTATE will be wrong (we'll have it as "used")
   18240          // when it isn't.  Correct is to save ITSTATE in a
   18241          // stash pseudo-reg, and back up from that if we have to
   18242          // restart.
   18243          // uncond after here
   18244          llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ));
   18245          dres.jk_StopHere = Ijk_Sys_syscall;
   18246          dres.whatNext    = Dis_StopHere;
   18247          DIP("svc #0x%08x\n", imm8);
   18248          goto decode_success;
   18249       }
   18250       /* else fall through */
   18251       break;
   18252    }
   18253 
   18254    case BITS8(0,1,0,0,0,1,0,0): {
   18255       /* ---------------- ADD(HI) Rd, Rm ---------------- */
   18256       UInt h1 = INSN0(7,7);
   18257       UInt h2 = INSN0(6,6);
   18258       UInt rM = (h2 << 3) | INSN0(5,3);
   18259       UInt rD = (h1 << 3) | INSN0(2,0);
   18260       //if (h1 == 0 && h2 == 0) { // Original T1 was more restrictive
   18261       if (rD == 15 && rM == 15) {
   18262          // then it's invalid
   18263       } else {
   18264          IRTemp res = newTemp(Ity_I32);
   18265          assign( res, binop(Iop_Add32, getIRegT(rD), getIRegT(rM) ));
   18266          if (rD != 15) {
   18267             putIRegT( rD, mkexpr(res), condT );
   18268          } else {
   18269             /* Only allowed outside or last-in IT block; SIGILL if not so. */
   18270             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   18271             /* jump over insn if not selected */
   18272             mk_skip_over_T16_if_cond_is_false(condT);
   18273             condT = IRTemp_INVALID;
   18274             // now uncond
   18275             /* non-interworking branch */
   18276             llPutIReg(15, binop(Iop_Or32, mkexpr(res), mkU32(1)));
   18277             dres.jk_StopHere = Ijk_Boring;
   18278             dres.whatNext    = Dis_StopHere;
   18279          }
   18280          DIP("add(hi) r%u, r%u\n", rD, rM);
   18281          goto decode_success;
   18282       }
   18283       break;
   18284    }
   18285 
   18286    case BITS8(0,1,0,0,0,1,0,1): {
   18287       /* ---------------- CMP(HI) Rd, Rm ---------------- */
   18288       UInt h1 = INSN0(7,7);
   18289       UInt h2 = INSN0(6,6);
   18290       UInt rM = (h2 << 3) | INSN0(5,3);
   18291       UInt rN = (h1 << 3) | INSN0(2,0);
   18292       if (h1 != 0 || h2 != 0) {
   18293          IRTemp argL  = newTemp(Ity_I32);
   18294          IRTemp argR  = newTemp(Ity_I32);
   18295          assign( argL, getIRegT(rN) );
   18296          assign( argR, getIRegT(rM) );
   18297          /* Update flags regardless of whether in an IT block or not. */
   18298          setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
   18299          DIP("cmphi r%u, r%u\n", rN, rM);
   18300          goto decode_success;
   18301       }
   18302       break;
   18303    }
   18304 
   18305    case BITS8(0,1,0,0,0,1,1,0): {
   18306       /* ---------------- MOV(HI) Rd, Rm ---------------- */
   18307       UInt h1 = INSN0(7,7);
   18308       UInt h2 = INSN0(6,6);
   18309       UInt rM = (h2 << 3) | INSN0(5,3);
   18310       UInt rD = (h1 << 3) | INSN0(2,0);
   18311       /* The old ARM ARM seems to disallow the case where both Rd and
   18312          Rm are "low" registers, but newer versions allow it. */
   18313       if (1 /*h1 != 0 || h2 != 0*/) {
   18314          IRTemp val = newTemp(Ity_I32);
   18315          assign( val, getIRegT(rM) );
   18316          if (rD != 15) {
   18317             putIRegT( rD, mkexpr(val), condT );
   18318          } else {
   18319             /* Only allowed outside or last-in IT block; SIGILL if not so. */
   18320             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   18321             /* jump over insn if not selected */
   18322             mk_skip_over_T16_if_cond_is_false(condT);
   18323             condT = IRTemp_INVALID;
   18324             // now uncond
   18325             /* non-interworking branch */
   18326             llPutIReg(15, binop(Iop_Or32, mkexpr(val), mkU32(1)));
   18327             dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
   18328             dres.whatNext    = Dis_StopHere;
   18329          }
   18330          DIP("mov r%u, r%u\n", rD, rM);
   18331          goto decode_success;
   18332       }
   18333       break;
   18334    }
   18335 
   18336    case BITS8(1,0,1,1,1,1,1,1): {
   18337       /* ---------------- IT (if-then) ---------------- */
   18338       UInt firstcond = INSN0(7,4);
   18339       UInt mask = INSN0(3,0);
   18340       UInt newITSTATE = 0;
   18341       /* This is the ITSTATE represented as described in
   18342          libvex_guest_arm.h.  It is not the ARM ARM representation. */
   18343       HChar c1 = '.';
   18344       HChar c2 = '.';
   18345       HChar c3 = '.';
   18346       Bool valid = compute_ITSTATE( &newITSTATE, &c1, &c2, &c3,
   18347                                     firstcond, mask );
   18348       if (valid && firstcond != 0xF/*NV*/) {
   18349          /* Not allowed in an IT block; SIGILL if so. */
   18350          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   18351 
   18352          IRTemp t = newTemp(Ity_I32);
   18353          assign(t, mkU32(newITSTATE));
   18354          put_ITSTATE(t);
   18355 
   18356          DIP("it%c%c%c %s\n", c1, c2, c3, nCC(firstcond));
   18357          goto decode_success;
   18358       }
   18359       break;
   18360    }
   18361 
   18362    case BITS8(1,0,1,1,0,0,0,1):
   18363    case BITS8(1,0,1,1,0,0,1,1):
   18364    case BITS8(1,0,1,1,1,0,0,1):
   18365    case BITS8(1,0,1,1,1,0,1,1): {
   18366       /* ---------------- CB{N}Z ---------------- */
   18367       UInt rN    = INSN0(2,0);
   18368       UInt bOP   = INSN0(11,11);
   18369       UInt imm32 = (INSN0(9,9) << 6) | (INSN0(7,3) << 1);
   18370       gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   18371       /* It's a conditional branch forward. */
   18372       IRTemp kond = newTemp(Ity_I1);
   18373       assign( kond, binop(bOP ? Iop_CmpNE32 : Iop_CmpEQ32,
   18374                           getIRegT(rN), mkU32(0)) );
   18375 
   18376       vassert(0 == (guest_R15_curr_instr_notENC & 1));
   18377       /* Looks like the nearest insn we can branch to is the one after
   18378          next.  That makes sense, as there's no point in being able to
   18379          encode a conditional branch to the next instruction. */
   18380       UInt dst = (guest_R15_curr_instr_notENC + 4 + imm32) | 1;
   18381       stmt(IRStmt_Exit( mkexpr(kond),
   18382                         Ijk_Boring,
   18383                         IRConst_U32(toUInt(dst)),
   18384                         OFFB_R15T ));
   18385       DIP("cb%s r%u, 0x%x\n", bOP ? "nz" : "z", rN, dst - 1);
   18386       goto decode_success;
   18387    }
   18388 
   18389    default:
   18390       break; /* examine the next shortest prefix */
   18391 
   18392    }
   18393 
   18394 
   18395    /* ================ 16-bit 15:9 cases ================ */
   18396 
   18397    switch (INSN0(15,9)) {
   18398 
   18399    case BITS7(1,0,1,1,0,1,0): {
   18400       /* ---------------- PUSH ---------------- */
   18401       /* This is a bit like STMxx, but way simpler. Complications we
   18402          don't have to deal with:
   18403          * SP being one of the transferred registers
   18404          * direction (increment vs decrement)
   18405          * before-vs-after-ness
   18406       */
   18407       Int  i, nRegs;
   18408       UInt bitR    = INSN0(8,8);
   18409       UInt regList = INSN0(7,0);
   18410       if (bitR) regList |= (1 << 14);
   18411 
   18412       /* At least one register must be transferred, else result is
   18413          UNPREDICTABLE. */
   18414       if (regList != 0) {
   18415          /* Since we can't generate a guaranteed non-trapping IR
   18416             sequence, (1) jump over the insn if it is gated false, and
   18417             (2) back out the ITSTATE update. */
   18418          mk_skip_over_T16_if_cond_is_false(condT);
   18419          condT = IRTemp_INVALID;
   18420          put_ITSTATE(old_itstate);
   18421          // now uncond
   18422 
   18423          nRegs = 0;
   18424          for (i = 0; i < 16; i++) {
   18425             if ((regList & (1 << i)) != 0)
   18426                nRegs++;
   18427          }
   18428          vassert(nRegs >= 1 && nRegs <= 9);
   18429 
   18430          /* Move SP down first of all, so we're "covered".  And don't
   18431             mess with its alignment. */
   18432          IRTemp newSP = newTemp(Ity_I32);
   18433          assign(newSP, binop(Iop_Sub32, getIRegT(13), mkU32(4 * nRegs)));
   18434          putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
   18435 
   18436          /* Generate a transfer base address as a forced-aligned
   18437             version of the final SP value. */
   18438          IRTemp base = newTemp(Ity_I32);
   18439          assign(base, binop(Iop_And32, mkexpr(newSP), mkU32(~3)));
   18440 
   18441          /* Now the transfers */
   18442          nRegs = 0;
   18443          for (i = 0; i < 16; i++) {
   18444             if ((regList & (1 << i)) != 0) {
   18445                storeLE( binop(Iop_Add32, mkexpr(base), mkU32(4 * nRegs)),
   18446                         getIRegT(i) );
   18447                nRegs++;
   18448             }
   18449          }
   18450 
   18451          /* Reinstate the ITSTATE update. */
   18452          put_ITSTATE(new_itstate);
   18453 
   18454          DIP("push {%s0x%04x}\n", bitR ? "lr," : "", regList & 0xFF);
   18455          goto decode_success;
   18456       }
   18457       break;
   18458    }
   18459 
   18460    case BITS7(1,0,1,1,1,1,0): {
   18461       /* ---------------- POP ---------------- */
   18462       Int  i, nRegs;
   18463       UInt bitR    = INSN0(8,8);
   18464       UInt regList = INSN0(7,0);
   18465 
   18466       /* At least one register must be transferred, else result is
   18467          UNPREDICTABLE. */
   18468       if (regList != 0 || bitR) {
   18469          /* Since we can't generate a guaranteed non-trapping IR
   18470             sequence, (1) jump over the insn if it is gated false, and
   18471             (2) back out the ITSTATE update. */
   18472          mk_skip_over_T16_if_cond_is_false(condT);
   18473          condT = IRTemp_INVALID;
   18474          put_ITSTATE(old_itstate);
   18475          // now uncond
   18476 
   18477          nRegs = 0;
   18478          for (i = 0; i < 8; i++) {
   18479             if ((regList & (1 << i)) != 0)
   18480                nRegs++;
   18481          }
   18482          vassert(nRegs >= 0 && nRegs <= 8);
   18483          vassert(bitR == 0 || bitR == 1);
   18484 
   18485          IRTemp oldSP = newTemp(Ity_I32);
   18486          assign(oldSP, getIRegT(13));
   18487 
   18488          /* Generate a transfer base address as a forced-aligned
   18489             version of the original SP value. */
   18490          IRTemp base = newTemp(Ity_I32);
   18491          assign(base, binop(Iop_And32, mkexpr(oldSP), mkU32(~3)));
   18492 
   18493          /* Compute a new value for SP, but don't install it yet, so
   18494             that we're "covered" until all the transfers are done.
   18495             And don't mess with its alignment. */
   18496          IRTemp newSP = newTemp(Ity_I32);
   18497          assign(newSP, binop(Iop_Add32, mkexpr(oldSP),
   18498                                         mkU32(4 * (nRegs + bitR))));
   18499 
   18500          /* Now the transfers, not including PC */
   18501          nRegs = 0;
   18502          for (i = 0; i < 8; i++) {
   18503             if ((regList & (1 << i)) != 0) {
   18504                putIRegT(i, loadLE( Ity_I32,
   18505                                    binop(Iop_Add32, mkexpr(base),
   18506                                                     mkU32(4 * nRegs))),
   18507                            IRTemp_INVALID );
   18508                nRegs++;
   18509             }
   18510          }
   18511 
   18512          IRTemp newPC = IRTemp_INVALID;
   18513          if (bitR) {
   18514             newPC = newTemp(Ity_I32);
   18515             assign( newPC, loadLE( Ity_I32,
   18516                                    binop(Iop_Add32, mkexpr(base),
   18517                                                     mkU32(4 * nRegs))));
   18518          }
   18519 
   18520          /* Now we can safely install the new SP value */
   18521          putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
   18522 
   18523          /* Reinstate the ITSTATE update. */
   18524          put_ITSTATE(new_itstate);
   18525 
   18526          /* now, do we also have to do a branch?  If so, it turns out
   18527             that the new PC value is encoded exactly as we need it to
   18528             be -- with CPSR.T in the bottom bit.  So we can simply use
   18529             it as is, no need to mess with it.  Note, therefore, this
   18530             is an interworking return. */
   18531          if (bitR) {
   18532             llPutIReg(15, mkexpr(newPC));
   18533             dres.jk_StopHere = Ijk_Ret;
   18534             dres.whatNext    = Dis_StopHere;
   18535          }
   18536 
   18537          DIP("pop {%s0x%04x}\n", bitR ? "pc," : "", regList & 0xFF);
   18538          goto decode_success;
   18539       }
   18540       break;
   18541    }
   18542 
   18543    case BITS7(0,0,0,1,1,1,0):   /* ADDS */
   18544    case BITS7(0,0,0,1,1,1,1): { /* SUBS */
   18545       /* ---------------- ADDS Rd, Rn, #uimm3 ---------------- */
   18546       /* ---------------- SUBS Rd, Rn, #uimm3 ---------------- */
   18547       UInt   uimm3 = INSN0(8,6);
   18548       UInt   rN    = INSN0(5,3);
   18549       UInt   rD    = INSN0(2,0);
   18550       UInt   isSub = INSN0(9,9);
   18551       IRTemp argL  = newTemp(Ity_I32);
   18552       IRTemp argR  = newTemp(Ity_I32);
   18553       assign( argL, getIRegT(rN) );
   18554       assign( argR, mkU32(uimm3) );
   18555       putIRegT(rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
   18556                          mkexpr(argL), mkexpr(argR)),
   18557                    condT);
   18558       setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
   18559                       argL, argR, cond_AND_notInIT_T );
   18560       DIP("%s r%u, r%u, #%u\n", isSub ? "subs" : "adds", rD, rN, uimm3);
   18561       goto decode_success;
   18562    }
   18563 
   18564    case BITS7(0,0,0,1,1,0,0):   /* ADDS */
   18565    case BITS7(0,0,0,1,1,0,1): { /* SUBS */
   18566       /* ---------------- ADDS Rd, Rn, Rm ---------------- */
   18567       /* ---------------- SUBS Rd, Rn, Rm ---------------- */
   18568       UInt   rM    = INSN0(8,6);
   18569       UInt   rN    = INSN0(5,3);
   18570       UInt   rD    = INSN0(2,0);
   18571       UInt   isSub = INSN0(9,9);
   18572       IRTemp argL  = newTemp(Ity_I32);
   18573       IRTemp argR  = newTemp(Ity_I32);
   18574       assign( argL, getIRegT(rN) );
   18575       assign( argR, getIRegT(rM) );
   18576       putIRegT( rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
   18577                           mkexpr(argL), mkexpr(argR)),
   18578                     condT );
   18579       setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
   18580                       argL, argR, cond_AND_notInIT_T );
   18581       DIP("%s r%u, r%u, r%u\n", isSub ? "subs" : "adds", rD, rN, rM);
   18582       goto decode_success;
   18583    }
   18584 
   18585    case BITS7(0,1,0,1,0,0,0):   /* STR */
   18586    case BITS7(0,1,0,1,1,0,0): { /* LDR */
   18587       /* ------------- LDR Rd, [Rn, Rm] ------------- */
   18588       /* ------------- STR Rd, [Rn, Rm] ------------- */
   18589       /* LDR/STR Rd, [Rn + Rm] */
   18590       UInt    rD   = INSN0(2,0);
   18591       UInt    rN   = INSN0(5,3);
   18592       UInt    rM   = INSN0(8,6);
   18593       UInt    isLD = INSN0(11,11);
   18594 
   18595       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
   18596       put_ITSTATE(old_itstate); // backout
   18597       if (isLD) {
   18598          IRTemp tD = newTemp(Ity_I32);
   18599          loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
   18600          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18601       } else {
   18602          storeGuardedLE(ea, getIRegT(rD), condT);
   18603       }
   18604       put_ITSTATE(new_itstate); // restore
   18605 
   18606       DIP("%s r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
   18607       goto decode_success;
   18608    }
   18609 
   18610    case BITS7(0,1,0,1,0,0,1):
   18611    case BITS7(0,1,0,1,1,0,1): {
   18612       /* ------------- LDRH Rd, [Rn, Rm] ------------- */
   18613       /* ------------- STRH Rd, [Rn, Rm] ------------- */
   18614       /* LDRH/STRH Rd, [Rn + Rm] */
   18615       UInt    rD   = INSN0(2,0);
   18616       UInt    rN   = INSN0(5,3);
   18617       UInt    rM   = INSN0(8,6);
   18618       UInt    isLD = INSN0(11,11);
   18619 
   18620       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
   18621       put_ITSTATE(old_itstate); // backout
   18622       if (isLD) {
   18623          IRTemp tD = newTemp(Ity_I32);
   18624          loadGuardedLE(tD, ILGop_16Uto32, ea, llGetIReg(rD), condT);
   18625          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18626       } else {
   18627          storeGuardedLE( ea, unop(Iop_32to16, getIRegT(rD)), condT );
   18628       }
   18629       put_ITSTATE(new_itstate); // restore
   18630 
   18631       DIP("%sh r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
   18632       goto decode_success;
   18633    }
   18634 
   18635    case BITS7(0,1,0,1,1,1,1): {
   18636       /* ------------- LDRSH Rd, [Rn, Rm] ------------- */
   18637       /* LDRSH Rd, [Rn + Rm] */
   18638       UInt    rD = INSN0(2,0);
   18639       UInt    rN = INSN0(5,3);
   18640       UInt    rM = INSN0(8,6);
   18641 
   18642       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
   18643       put_ITSTATE(old_itstate); // backout
   18644       IRTemp tD = newTemp(Ity_I32);
   18645       loadGuardedLE(tD, ILGop_16Sto32, ea, llGetIReg(rD), condT);
   18646       putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18647       put_ITSTATE(new_itstate); // restore
   18648 
   18649       DIP("ldrsh r%u, [r%u, r%u]\n", rD, rN, rM);
   18650       goto decode_success;
   18651    }
   18652 
   18653    case BITS7(0,1,0,1,0,1,1): {
   18654       /* ------------- LDRSB Rd, [Rn, Rm] ------------- */
   18655       /* LDRSB Rd, [Rn + Rm] */
   18656       UInt    rD = INSN0(2,0);
   18657       UInt    rN = INSN0(5,3);
   18658       UInt    rM = INSN0(8,6);
   18659 
   18660       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
   18661       put_ITSTATE(old_itstate); // backout
   18662       IRTemp tD = newTemp(Ity_I32);
   18663       loadGuardedLE(tD, ILGop_8Sto32, ea, llGetIReg(rD), condT);
   18664       putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18665       put_ITSTATE(new_itstate); // restore
   18666 
   18667       DIP("ldrsb r%u, [r%u, r%u]\n", rD, rN, rM);
   18668       goto decode_success;
   18669    }
   18670 
   18671    case BITS7(0,1,0,1,0,1,0):
   18672    case BITS7(0,1,0,1,1,1,0): {
   18673       /* ------------- LDRB Rd, [Rn, Rm] ------------- */
   18674       /* ------------- STRB Rd, [Rn, Rm] ------------- */
   18675       /* LDRB/STRB Rd, [Rn + Rm] */
   18676       UInt    rD   = INSN0(2,0);
   18677       UInt    rN   = INSN0(5,3);
   18678       UInt    rM   = INSN0(8,6);
   18679       UInt    isLD = INSN0(11,11);
   18680 
   18681       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
   18682       put_ITSTATE(old_itstate); // backout
   18683       if (isLD) {
   18684          IRTemp tD = newTemp(Ity_I32);
   18685          loadGuardedLE(tD, ILGop_8Uto32, ea, llGetIReg(rD), condT);
   18686          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18687       } else {
   18688          storeGuardedLE( ea, unop(Iop_32to8, getIRegT(rD)), condT );
   18689       }
   18690       put_ITSTATE(new_itstate); // restore
   18691 
   18692       DIP("%sb r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
   18693       goto decode_success;
   18694    }
   18695 
   18696    default:
   18697       break; /* examine the next shortest prefix */
   18698 
   18699    }
   18700 
   18701 
   18702    /* ================ 16-bit 15:11 cases ================ */
   18703 
   18704    switch (INSN0(15,11)) {
   18705 
   18706    case BITS5(0,0,1,1,0):
   18707    case BITS5(0,0,1,1,1): {
   18708       /* ---------------- ADDS Rn, #uimm8 ---------------- */
   18709       /* ---------------- SUBS Rn, #uimm8 ---------------- */
   18710       UInt   isSub = INSN0(11,11);
   18711       UInt   rN    = INSN0(10,8);
   18712       UInt   uimm8 = INSN0(7,0);
   18713       IRTemp argL  = newTemp(Ity_I32);
   18714       IRTemp argR  = newTemp(Ity_I32);
   18715       assign( argL, getIRegT(rN) );
   18716       assign( argR, mkU32(uimm8) );
   18717       putIRegT( rN, binop(isSub ? Iop_Sub32 : Iop_Add32,
   18718                           mkexpr(argL), mkexpr(argR)), condT );
   18719       setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
   18720                       argL, argR, cond_AND_notInIT_T );
   18721       DIP("%s r%u, #%u\n", isSub ? "subs" : "adds", rN, uimm8);
   18722       goto decode_success;
   18723    }
   18724 
   18725    case BITS5(1,0,1,0,0): {
   18726       /* ---------------- ADD rD, PC, #imm8 * 4 ---------------- */
   18727       /* a.k.a. ADR */
   18728       /* rD = align4(PC) + imm8 * 4 */
   18729       UInt rD   = INSN0(10,8);
   18730       UInt imm8 = INSN0(7,0);
   18731       putIRegT(rD, binop(Iop_Add32,
   18732                          binop(Iop_And32, getIRegT(15), mkU32(~3U)),
   18733                          mkU32(imm8 * 4)),
   18734                    condT);
   18735       DIP("add r%u, pc, #%u\n", rD, imm8 * 4);
   18736       goto decode_success;
   18737    }
   18738 
   18739    case BITS5(1,0,1,0,1): {
   18740       /* ---------------- ADD rD, SP, #imm8 * 4 ---------------- */
   18741       UInt rD   = INSN0(10,8);
   18742       UInt imm8 = INSN0(7,0);
   18743       putIRegT(rD, binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4)),
   18744                    condT);
   18745       DIP("add r%u, r13, #%u\n", rD, imm8 * 4);
   18746       goto decode_success;
   18747    }
   18748 
   18749    case BITS5(0,0,1,0,1): {
   18750       /* ---------------- CMP Rn, #uimm8 ---------------- */
   18751       UInt   rN    = INSN0(10,8);
   18752       UInt   uimm8 = INSN0(7,0);
   18753       IRTemp argL  = newTemp(Ity_I32);
   18754       IRTemp argR  = newTemp(Ity_I32);
   18755       assign( argL, getIRegT(rN) );
   18756       assign( argR, mkU32(uimm8) );
   18757       /* Update flags regardless of whether in an IT block or not. */
   18758       setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
   18759       DIP("cmp r%u, #%u\n", rN, uimm8);
   18760       goto decode_success;
   18761    }
   18762 
   18763    case BITS5(0,0,1,0,0): {
   18764       /* -------------- (T1) MOVS Rn, #uimm8 -------------- */
   18765       UInt   rD    = INSN0(10,8);
   18766       UInt   uimm8 = INSN0(7,0);
   18767       IRTemp oldV  = newTemp(Ity_I32);
   18768       IRTemp oldC  = newTemp(Ity_I32);
   18769       IRTemp res   = newTemp(Ity_I32);
   18770       assign( oldV, mk_armg_calculate_flag_v() );
   18771       assign( oldC, mk_armg_calculate_flag_c() );
   18772       assign( res, mkU32(uimm8) );
   18773       putIRegT(rD, mkexpr(res), condT);
   18774       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   18775                          cond_AND_notInIT_T );
   18776       DIP("movs r%u, #%u\n", rD, uimm8);
   18777       goto decode_success;
   18778    }
   18779 
   18780    case BITS5(0,1,0,0,1): {
   18781       /* ------------- LDR Rd, [PC, #imm8 * 4] ------------- */
   18782       /* LDR Rd, [align4(PC) + imm8 * 4] */
   18783       UInt   rD   = INSN0(10,8);
   18784       UInt   imm8 = INSN0(7,0);
   18785       IRTemp ea   = newTemp(Ity_I32);
   18786 
   18787       assign(ea, binop(Iop_Add32,
   18788                        binop(Iop_And32, getIRegT(15), mkU32(~3U)),
   18789                        mkU32(imm8 * 4)));
   18790       put_ITSTATE(old_itstate); // backout
   18791       IRTemp tD = newTemp(Ity_I32);
   18792       loadGuardedLE( tD, ILGop_Ident32, mkexpr(ea), llGetIReg(rD), condT );
   18793       putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18794       put_ITSTATE(new_itstate); // restore
   18795 
   18796       DIP("ldr r%u, [pc, #%u]\n", rD, imm8 * 4);
   18797       goto decode_success;
   18798    }
   18799 
   18800    case BITS5(0,1,1,0,0):   /* STR */
   18801    case BITS5(0,1,1,0,1): { /* LDR */
   18802       /* ------------- LDR Rd, [Rn, #imm5 * 4] ------------- */
   18803       /* ------------- STR Rd, [Rn, #imm5 * 4] ------------- */
   18804       /* LDR/STR Rd, [Rn + imm5 * 4] */
   18805       UInt    rD   = INSN0(2,0);
   18806       UInt    rN   = INSN0(5,3);
   18807       UInt    imm5 = INSN0(10,6);
   18808       UInt    isLD = INSN0(11,11);
   18809 
   18810       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 4));
   18811       put_ITSTATE(old_itstate); // backout
   18812       if (isLD) {
   18813          IRTemp tD = newTemp(Ity_I32);
   18814          loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
   18815          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18816       } else {
   18817          storeGuardedLE( ea, getIRegT(rD), condT );
   18818       }
   18819       put_ITSTATE(new_itstate); // restore
   18820 
   18821       DIP("%s r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 4);
   18822       goto decode_success;
   18823    }
   18824 
   18825    case BITS5(1,0,0,0,0):   /* STRH */
   18826    case BITS5(1,0,0,0,1): { /* LDRH */
   18827       /* ------------- LDRH Rd, [Rn, #imm5 * 2] ------------- */
   18828       /* ------------- STRH Rd, [Rn, #imm5 * 2] ------------- */
   18829       /* LDRH/STRH Rd, [Rn + imm5 * 2] */
   18830       UInt    rD   = INSN0(2,0);
   18831       UInt    rN   = INSN0(5,3);
   18832       UInt    imm5 = INSN0(10,6);
   18833       UInt    isLD = INSN0(11,11);
   18834 
   18835       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 2));
   18836       put_ITSTATE(old_itstate); // backout
   18837       if (isLD) {
   18838          IRTemp tD = newTemp(Ity_I32);
   18839          loadGuardedLE( tD, ILGop_16Uto32, ea, llGetIReg(rD), condT );
   18840          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18841       } else {
   18842          storeGuardedLE( ea, unop(Iop_32to16, getIRegT(rD)), condT );
   18843       }
   18844       put_ITSTATE(new_itstate); // restore
   18845 
   18846       DIP("%sh r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 2);
   18847       goto decode_success;
   18848    }
   18849 
   18850    case BITS5(0,1,1,1,0):   /* STRB */
   18851    case BITS5(0,1,1,1,1): { /* LDRB */
   18852       /* ------------- LDRB Rd, [Rn, #imm5] ------------- */
   18853       /* ------------- STRB Rd, [Rn, #imm5] ------------- */
   18854       /* LDRB/STRB Rd, [Rn + imm5] */
   18855       UInt    rD   = INSN0(2,0);
   18856       UInt    rN   = INSN0(5,3);
   18857       UInt    imm5 = INSN0(10,6);
   18858       UInt    isLD = INSN0(11,11);
   18859 
   18860       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5));
   18861       put_ITSTATE(old_itstate); // backout
   18862       if (isLD) {
   18863          IRTemp tD = newTemp(Ity_I32);
   18864          loadGuardedLE( tD, ILGop_8Uto32, ea, llGetIReg(rD), condT );
   18865          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18866       } else {
   18867          storeGuardedLE( ea, unop(Iop_32to8, getIRegT(rD)), condT );
   18868       }
   18869       put_ITSTATE(new_itstate); // restore
   18870 
   18871       DIP("%sb r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5);
   18872       goto decode_success;
   18873    }
   18874 
   18875    case BITS5(1,0,0,1,0):   /* STR */
   18876    case BITS5(1,0,0,1,1): { /* LDR */
   18877       /* ------------- LDR Rd, [SP, #imm8 * 4] ------------- */
   18878       /* ------------- STR Rd, [SP, #imm8 * 4] ------------- */
   18879       /* LDR/STR Rd, [SP + imm8 * 4] */
   18880       UInt rD    = INSN0(10,8);
   18881       UInt imm8  = INSN0(7,0);
   18882       UInt isLD  = INSN0(11,11);
   18883 
   18884       IRExpr* ea = binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4));
   18885       put_ITSTATE(old_itstate); // backout
   18886       if (isLD) {
   18887          IRTemp tD = newTemp(Ity_I32);
   18888          loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
   18889          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18890       } else {
   18891          storeGuardedLE(ea, getIRegT(rD), condT);
   18892       }
   18893       put_ITSTATE(new_itstate); // restore
   18894 
   18895       DIP("%s r%u, [sp, #%u]\n", isLD ? "ldr" : "str", rD, imm8 * 4);
   18896       goto decode_success;
   18897    }
   18898 
   18899    case BITS5(1,1,0,0,1): {
   18900       /* ------------- LDMIA Rn!, {reglist} ------------- */
   18901       Int i, nRegs = 0;
   18902       UInt rN   = INSN0(10,8);
   18903       UInt list = INSN0(7,0);
   18904       /* Empty lists aren't allowed. */
   18905       if (list != 0) {
   18906          mk_skip_over_T16_if_cond_is_false(condT);
   18907          condT = IRTemp_INVALID;
   18908          put_ITSTATE(old_itstate);
   18909          // now uncond
   18910 
   18911          IRTemp oldRn = newTemp(Ity_I32);
   18912          IRTemp base  = newTemp(Ity_I32);
   18913          assign(oldRn, getIRegT(rN));
   18914          assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
   18915          for (i = 0; i < 8; i++) {
   18916             if (0 == (list & (1 << i)))
   18917                continue;
   18918             nRegs++;
   18919             putIRegT(
   18920                i, loadLE(Ity_I32,
   18921                          binop(Iop_Add32, mkexpr(base),
   18922                                           mkU32(nRegs * 4 - 4))),
   18923                IRTemp_INVALID
   18924             );
   18925          }
   18926          /* Only do the writeback for rN if it isn't in the list of
   18927             registers to be transferred. */
   18928          if (0 == (list & (1 << rN))) {
   18929             putIRegT(rN,
   18930                      binop(Iop_Add32, mkexpr(oldRn),
   18931                                       mkU32(nRegs * 4)),
   18932                      IRTemp_INVALID
   18933             );
   18934          }
   18935 
   18936          /* Reinstate the ITSTATE update. */
   18937          put_ITSTATE(new_itstate);
   18938 
   18939          DIP("ldmia r%u!, {0x%04x}\n", rN, list);
   18940          goto decode_success;
   18941       }
   18942       break;
   18943    }
   18944 
   18945    case BITS5(1,1,0,0,0): {
   18946       /* ------------- STMIA Rn!, {reglist} ------------- */
   18947       Int i, nRegs = 0;
   18948       UInt rN   = INSN0(10,8);
   18949       UInt list = INSN0(7,0);
   18950       /* Empty lists aren't allowed.  Also, if rN is in the list then
   18951          it must be the lowest numbered register in the list. */
   18952       Bool valid = list != 0;
   18953       if (valid && 0 != (list & (1 << rN))) {
   18954          for (i = 0; i < rN; i++) {
   18955             if (0 != (list & (1 << i)))
   18956                valid = False;
   18957          }
   18958       }
   18959       if (valid) {
   18960          mk_skip_over_T16_if_cond_is_false(condT);
   18961          condT = IRTemp_INVALID;
   18962          put_ITSTATE(old_itstate);
   18963          // now uncond
   18964 
   18965          IRTemp oldRn = newTemp(Ity_I32);
   18966          IRTemp base = newTemp(Ity_I32);
   18967          assign(oldRn, getIRegT(rN));
   18968          assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
   18969          for (i = 0; i < 8; i++) {
   18970             if (0 == (list & (1 << i)))
   18971                continue;
   18972             nRegs++;
   18973             storeLE( binop(Iop_Add32, mkexpr(base), mkU32(nRegs * 4 - 4)),
   18974                      getIRegT(i) );
   18975          }
   18976          /* Always do the writeback. */
   18977          putIRegT(rN,
   18978                   binop(Iop_Add32, mkexpr(oldRn),
   18979                                    mkU32(nRegs * 4)),
   18980                   IRTemp_INVALID);
   18981 
   18982          /* Reinstate the ITSTATE update. */
   18983          put_ITSTATE(new_itstate);
   18984 
   18985          DIP("stmia r%u!, {0x%04x}\n", rN, list);
   18986          goto decode_success;
   18987       }
   18988       break;
   18989    }
   18990 
   18991    case BITS5(0,0,0,0,0):   /* LSLS */
   18992    case BITS5(0,0,0,0,1):   /* LSRS */
   18993    case BITS5(0,0,0,1,0): { /* ASRS */
   18994       /* ---------------- LSLS Rd, Rm, #imm5 ---------------- */
   18995       /* ---------------- LSRS Rd, Rm, #imm5 ---------------- */
   18996       /* ---------------- ASRS Rd, Rm, #imm5 ---------------- */
   18997       UInt   rD   = INSN0(2,0);
   18998       UInt   rM   = INSN0(5,3);
   18999       UInt   imm5 = INSN0(10,6);
   19000       IRTemp res  = newTemp(Ity_I32);
   19001       IRTemp resC = newTemp(Ity_I32);
   19002       IRTemp rMt  = newTemp(Ity_I32);
   19003       IRTemp oldV = newTemp(Ity_I32);
   19004       const HChar* wot  = "???";
   19005       assign(rMt, getIRegT(rM));
   19006       assign(oldV, mk_armg_calculate_flag_v());
   19007       /* Looks like INSN0(12,11) are the standard 'how' encoding.
   19008          Could compactify if the ROR case later appears. */
   19009       switch (INSN0(15,11)) {
   19010          case BITS5(0,0,0,0,0):
   19011             compute_result_and_C_after_LSL_by_imm5(
   19012                dis_buf, &res, &resC, rMt, imm5, rM
   19013             );
   19014             wot = "lsl";
   19015             break;
   19016          case BITS5(0,0,0,0,1):
   19017             compute_result_and_C_after_LSR_by_imm5(
   19018                dis_buf, &res, &resC, rMt, imm5, rM
   19019             );
   19020             wot = "lsr";
   19021             break;
   19022          case BITS5(0,0,0,1,0):
   19023             compute_result_and_C_after_ASR_by_imm5(
   19024                dis_buf, &res, &resC, rMt, imm5, rM
   19025             );
   19026             wot = "asr";
   19027             break;
   19028          default:
   19029             /*NOTREACHED*/vassert(0);
   19030       }
   19031       // not safe to read guest state after this point
   19032       putIRegT(rD, mkexpr(res), condT);
   19033       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
   19034                          cond_AND_notInIT_T );
   19035       /* ignore buf and roll our own output */
   19036       DIP("%ss r%u, r%u, #%u\n", wot, rD, rM, imm5);
   19037       goto decode_success;
   19038    }
   19039 
   19040    case BITS5(1,1,1,0,0): {
   19041       /* ---------------- B #simm11 ---------------- */
   19042       Int  simm11 = INSN0(10,0);
   19043            simm11 = (simm11 << 21) >> 20;
   19044       UInt dst    = simm11 + guest_R15_curr_instr_notENC + 4;
   19045       /* Only allowed outside or last-in IT block; SIGILL if not so. */
   19046       gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   19047       // and skip this insn if not selected; being cleverer is too
   19048       // difficult
   19049       mk_skip_over_T16_if_cond_is_false(condT);
   19050       condT = IRTemp_INVALID;
   19051       // now uncond
   19052       llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
   19053       dres.jk_StopHere = Ijk_Boring;
   19054       dres.whatNext    = Dis_StopHere;
   19055       DIP("b 0x%x\n", dst);
   19056       goto decode_success;
   19057    }
   19058 
   19059    default:
   19060       break; /* examine the next shortest prefix */
   19061 
   19062    }
   19063 
   19064 
   19065    /* ================ 16-bit 15:12 cases ================ */
   19066 
   19067    switch (INSN0(15,12)) {
   19068 
   19069    case BITS4(1,1,0,1): {
   19070       /* ---------------- Bcond #simm8 ---------------- */
   19071       UInt cond  = INSN0(11,8);
   19072       Int  simm8 = INSN0(7,0);
   19073            simm8 = (simm8 << 24) >> 23;
   19074       UInt dst   = simm8 + guest_R15_curr_instr_notENC + 4;
   19075       if (cond != ARMCondAL && cond != ARMCondNV) {
   19076          /* Not allowed in an IT block; SIGILL if so. */
   19077          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   19078 
   19079          IRTemp kondT = newTemp(Ity_I32);
   19080          assign( kondT, mk_armg_calculate_condition(cond) );
   19081          stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
   19082                             Ijk_Boring,
   19083                             IRConst_U32(dst | 1/*CPSR.T*/),
   19084                             OFFB_R15T ));
   19085          llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2)
   19086                               | 1 /*CPSR.T*/ ));
   19087          dres.jk_StopHere = Ijk_Boring;
   19088          dres.whatNext    = Dis_StopHere;
   19089          DIP("b%s 0x%x\n", nCC(cond), dst);
   19090          goto decode_success;
   19091       }
   19092       break;
   19093    }
   19094 
   19095    default:
   19096       break; /* hmm, nothing matched */
   19097 
   19098    }
   19099 
   19100    /* ================ 16-bit misc cases ================ */
   19101 
   19102    switch (INSN0(15,0)) {
   19103       case 0xBF00:
   19104          /* ------ NOP ------ */
   19105          DIP("nop\n");
   19106          goto decode_success;
   19107       case 0xBF20:
   19108          /* ------ WFE ------ */
   19109          /* WFE gets used as a spin-loop hint.  Do the usual thing,
   19110             which is to continue after yielding. */
   19111          stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
   19112                             Ijk_Yield,
   19113                             IRConst_U32((guest_R15_curr_instr_notENC + 2)
   19114                                         | 1 /*CPSR.T*/),
   19115                             OFFB_R15T ));
   19116          DIP("wfe\n");
   19117          goto decode_success;
   19118       case 0xBF40:
   19119          /* ------ SEV ------ */
   19120          /* Treat this as a no-op.  Any matching WFEs won't really
   19121             cause the host CPU to snooze; they just cause V to try to
   19122             run some other thread for a while.  So there's no point in
   19123             really doing anything for SEV. */
   19124          DIP("sev\n");
   19125          goto decode_success;
   19126       default:
   19127          break; /* fall through */
   19128    }
   19129 
   19130    /* ----------------------------------------------------------- */
   19131    /* --                                                       -- */
   19132    /* -- Thumb 32-bit integer instructions                     -- */
   19133    /* --                                                       -- */
   19134    /* ----------------------------------------------------------- */
   19135 
   19136 #  define INSN1(_bMax,_bMin)  SLICE_UInt(((UInt)insn1), (_bMax), (_bMin))
   19137 
   19138    /* second 16 bits of the instruction, if any */
   19139    vassert(insn1 == 0);
   19140    insn1 = getUShortLittleEndianly( guest_instr+2 );
   19141 
   19142    anOp   = Iop_INVALID; /* paranoia */
   19143    anOpNm = NULL;        /* paranoia */
   19144 
   19145    /* Change result defaults to suit 32-bit insns. */
   19146    vassert(dres.whatNext   == Dis_Continue);
   19147    vassert(dres.len        == 2);
   19148    vassert(dres.continueAt == 0);
   19149    dres.len = 4;
   19150 
   19151    /* ---------------- BL/BLX simm26 ---------------- */
   19152    if (BITS5(1,1,1,1,0) == INSN0(15,11) && BITS2(1,1) == INSN1(15,14)) {
   19153       UInt isBL = INSN1(12,12);
   19154       UInt bS   = INSN0(10,10);
   19155       UInt bJ1  = INSN1(13,13);
   19156       UInt bJ2  = INSN1(11,11);
   19157       UInt bI1  = 1 ^ (bJ1 ^ bS);
   19158       UInt bI2  = 1 ^ (bJ2 ^ bS);
   19159       Int simm25
   19160          =   (bS          << (1 + 1 + 10 + 11 + 1))
   19161            | (bI1         << (1 + 10 + 11 + 1))
   19162            | (bI2         << (10 + 11 + 1))
   19163            | (INSN0(9,0)  << (11 + 1))
   19164            | (INSN1(10,0) << 1);
   19165       simm25 = (simm25 << 7) >> 7;
   19166 
   19167       vassert(0 == (guest_R15_curr_instr_notENC & 1));
   19168       UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
   19169 
   19170       /* One further validity case to check: in the case of BLX
   19171          (not-BL), that insn1[0] must be zero. */
   19172       Bool valid = True;
   19173       if (isBL == 0 && INSN1(0,0) == 1) valid = False;
   19174       if (valid) {
   19175          /* Only allowed outside or last-in IT block; SIGILL if not so. */
   19176          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   19177          // and skip this insn if not selected; being cleverer is too
   19178          // difficult
   19179          mk_skip_over_T32_if_cond_is_false(condT);
   19180          condT = IRTemp_INVALID;
   19181          // now uncond
   19182 
   19183          /* We're returning to Thumb code, hence "| 1" */
   19184          putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 4) | 1 ),
   19185                    IRTemp_INVALID);
   19186          if (isBL) {
   19187             /* BL: unconditional T -> T call */
   19188             /* we're calling Thumb code, hence "| 1" */
   19189             llPutIReg(15, mkU32( dst | 1 ));
   19190             DIP("bl 0x%x (stay in Thumb mode)\n", dst);
   19191          } else {
   19192             /* BLX: unconditional T -> A call */
   19193             /* we're calling ARM code, hence "& 3" to align to a
   19194                valid ARM insn address */
   19195             llPutIReg(15, mkU32( dst & ~3 ));
   19196             DIP("blx 0x%x (switch to ARM mode)\n", dst & ~3);
   19197          }
   19198          dres.whatNext    = Dis_StopHere;
   19199          dres.jk_StopHere = Ijk_Call;
   19200          goto decode_success;
   19201       }
   19202    }
   19203 
   19204    /* ---------------- {LD,ST}M{IA,DB} ---------------- */
   19205    if (0x3a2 == INSN0(15,6) // {LD,ST}MIA
   19206        || 0x3a4 == INSN0(15,6)) { // {LD,ST}MDB
   19207       UInt bW      = INSN0(5,5); /* writeback Rn ? */
   19208       UInt bL      = INSN0(4,4);
   19209       UInt rN      = INSN0(3,0);
   19210       UInt bP      = INSN1(15,15); /* reglist entry for r15 */
   19211       UInt bM      = INSN1(14,14); /* reglist entry for r14 */
   19212       UInt rLmost  = INSN1(12,0);  /* reglist entry for r0 .. 12 */
   19213       UInt rL13    = INSN1(13,13); /* must be zero */
   19214       UInt regList = 0;
   19215       Bool valid   = True;
   19216 
   19217       UInt bINC    = 1;
   19218       UInt bBEFORE = 0;
   19219       if (INSN0(15,6) == 0x3a4) {
   19220          bINC    = 0;
   19221          bBEFORE = 1;
   19222       }
   19223 
   19224       /* detect statically invalid cases, and construct the final
   19225          reglist */
   19226       if (rL13 == 1)
   19227          valid = False;
   19228 
   19229       if (bL == 1) {
   19230          regList = (bP << 15) | (bM << 14) | rLmost;
   19231          if (rN == 15)                       valid = False;
   19232          if (popcount32(regList) < 2)        valid = False;
   19233          if (bP == 1 && bM == 1)             valid = False;
   19234          if (bW == 1 && (regList & (1<<rN))) valid = False;
   19235       } else {
   19236          regList = (bM << 14) | rLmost;
   19237          if (bP == 1)                        valid = False;
   19238          if (rN == 15)                       valid = False;
   19239          if (popcount32(regList) < 2)        valid = False;
   19240          if (bW == 1 && (regList & (1<<rN))) valid = False;
   19241       }
   19242 
   19243       if (valid) {
   19244          if (bL == 1 && bP == 1) {
   19245             // We'll be writing the PC.  Hence:
   19246             /* Only allowed outside or last-in IT block; SIGILL if not so. */
   19247             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   19248          }
   19249 
   19250          /* Go uncond: */
   19251          mk_skip_over_T32_if_cond_is_false(condT);
   19252          condT = IRTemp_INVALID;
   19253          // now uncond
   19254 
   19255          /* Generate the IR.  This might generate a write to R15. */
   19256          mk_ldm_stm(False/*!arm*/, rN, bINC, bBEFORE, bW, bL, regList);
   19257 
   19258          if (bL == 1 && (regList & (1<<15))) {
   19259             // If we wrote to R15, we have an interworking return to
   19260             // deal with.
   19261             llPutIReg(15, llGetIReg(15));
   19262             dres.jk_StopHere = Ijk_Ret;
   19263             dres.whatNext    = Dis_StopHere;
   19264          }
   19265 
   19266          DIP("%sm%c%c r%u%s, {0x%04x}\n",
   19267               bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
   19268               rN, bW ? "!" : "", regList);
   19269 
   19270          goto decode_success;
   19271       }
   19272    }
   19273 
   19274    /* -------------- (T3) ADD{S}.W Rd, Rn, #constT -------------- */
   19275    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19276        && INSN0(9,5) == BITS5(0,1,0,0,0)
   19277        && INSN1(15,15) == 0) {
   19278       UInt bS = INSN0(4,4);
   19279       UInt rN = INSN0(3,0);
   19280       UInt rD = INSN1(11,8);
   19281       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
   19282       /* but allow "add.w reg, sp, #constT" for reg != PC */
   19283       if (!valid && rD <= 14 && rN == 13)
   19284          valid = True;
   19285       if (valid) {
   19286          IRTemp argL  = newTemp(Ity_I32);
   19287          IRTemp argR  = newTemp(Ity_I32);
   19288          IRTemp res   = newTemp(Ity_I32);
   19289          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
   19290          assign(argL, getIRegT(rN));
   19291          assign(argR, mkU32(imm32));
   19292          assign(res,  binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
   19293          putIRegT(rD, mkexpr(res), condT);
   19294          if (bS == 1)
   19295             setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
   19296          DIP("add%s.w r%u, r%u, #%u\n",
   19297              bS == 1 ? "s" : "", rD, rN, imm32);
   19298          goto decode_success;
   19299       }
   19300    }
   19301 
   19302    /* ---------------- (T4) ADDW Rd, Rn, #uimm12 -------------- */
   19303    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19304        && INSN0(9,4) == BITS6(1,0,0,0,0,0)
   19305        && INSN1(15,15) == 0) {
   19306       UInt rN = INSN0(3,0);
   19307       UInt rD = INSN1(11,8);
   19308       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
   19309       /* but allow "addw reg, sp, #uimm12" for reg != PC */
   19310       if (!valid && rD <= 14 && rN == 13)
   19311          valid = True;
   19312       if (valid) {
   19313          IRTemp argL = newTemp(Ity_I32);
   19314          IRTemp argR = newTemp(Ity_I32);
   19315          IRTemp res  = newTemp(Ity_I32);
   19316          UInt imm12  = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
   19317          assign(argL, getIRegT(rN));
   19318          assign(argR, mkU32(imm12));
   19319          assign(res,  binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
   19320          putIRegT(rD, mkexpr(res), condT);
   19321          DIP("addw r%u, r%u, #%u\n", rD, rN, imm12);
   19322          goto decode_success;
   19323       }
   19324    }
   19325 
   19326    /* ---------------- (T2) CMP.W Rn, #constT ---------------- */
   19327    /* ---------------- (T2) CMN.W Rn, #constT ---------------- */
   19328    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19329        && (   INSN0(9,4) == BITS6(0,1,1,0,1,1)  // CMP
   19330            || INSN0(9,4) == BITS6(0,1,0,0,0,1)) // CMN
   19331        && INSN1(15,15) == 0
   19332        && INSN1(11,8) == BITS4(1,1,1,1)) {
   19333       UInt rN = INSN0(3,0);
   19334       if (rN != 15) {
   19335          IRTemp argL  = newTemp(Ity_I32);
   19336          IRTemp argR  = newTemp(Ity_I32);
   19337          Bool   isCMN = INSN0(9,4) == BITS6(0,1,0,0,0,1);
   19338          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
   19339          assign(argL, getIRegT(rN));
   19340          assign(argR, mkU32(imm32));
   19341          setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
   19342                          argL, argR, condT );
   19343          DIP("%s.w r%u, #%u\n", isCMN ? "cmn" : "cmp", rN, imm32);
   19344          goto decode_success;
   19345       }
   19346    }
   19347 
   19348    /* -------------- (T1) TST.W Rn, #constT -------------- */
   19349    /* -------------- (T1) TEQ.W Rn, #constT -------------- */
   19350    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19351        && (   INSN0(9,4) == BITS6(0,0,0,0,0,1)  // TST
   19352            || INSN0(9,4) == BITS6(0,0,1,0,0,1)) // TEQ
   19353        && INSN1(15,15) == 0
   19354        && INSN1(11,8) == BITS4(1,1,1,1)) {
   19355       UInt rN = INSN0(3,0);
   19356       if (!isBadRegT(rN)) { // yes, really, it's inconsistent with CMP.W
   19357          Bool  isTST  = INSN0(9,4) == BITS6(0,0,0,0,0,1);
   19358          IRTemp argL  = newTemp(Ity_I32);
   19359          IRTemp argR  = newTemp(Ity_I32);
   19360          IRTemp res   = newTemp(Ity_I32);
   19361          IRTemp oldV  = newTemp(Ity_I32);
   19362          IRTemp oldC  = newTemp(Ity_I32);
   19363          Bool   updC  = False;
   19364          UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
   19365          assign(argL, getIRegT(rN));
   19366          assign(argR, mkU32(imm32));
   19367          assign(res,  binop(isTST ? Iop_And32 : Iop_Xor32,
   19368                             mkexpr(argL), mkexpr(argR)));
   19369          assign( oldV, mk_armg_calculate_flag_v() );
   19370          assign( oldC, updC
   19371                        ? mkU32((imm32 >> 31) & 1)
   19372                        : mk_armg_calculate_flag_c() );
   19373          setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
   19374          DIP("%s.w r%u, #%u\n", isTST ? "tst" : "teq", rN, imm32);
   19375          goto decode_success;
   19376       }
   19377    }
   19378 
   19379    /* -------------- (T3) SUB{S}.W Rd, Rn, #constT -------------- */
   19380    /* -------------- (T3) RSB{S}.W Rd, Rn, #constT -------------- */
   19381    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19382        && (INSN0(9,5) == BITS5(0,1,1,0,1) // SUB
   19383            || INSN0(9,5) == BITS5(0,1,1,1,0)) // RSB
   19384        && INSN1(15,15) == 0) {
   19385       Bool isRSB = INSN0(9,5) == BITS5(0,1,1,1,0);
   19386       UInt bS    = INSN0(4,4);
   19387       UInt rN    = INSN0(3,0);
   19388       UInt rD    = INSN1(11,8);
   19389       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
   19390       /* but allow "sub{s}.w reg, sp, #constT
   19391          this is (T2) of "SUB (SP minus immediate)" */
   19392       if (!valid && !isRSB && rN == 13 && rD != 15)
   19393          valid = True;
   19394       if (valid) {
   19395          IRTemp argL  = newTemp(Ity_I32);
   19396          IRTemp argR  = newTemp(Ity_I32);
   19397          IRTemp res   = newTemp(Ity_I32);
   19398          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
   19399          assign(argL, getIRegT(rN));
   19400          assign(argR, mkU32(imm32));
   19401          assign(res,  isRSB
   19402                       ? binop(Iop_Sub32, mkexpr(argR), mkexpr(argL))
   19403                       : binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
   19404          putIRegT(rD, mkexpr(res), condT);
   19405          if (bS == 1) {
   19406             if (isRSB)
   19407                setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
   19408             else
   19409                setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
   19410          }
   19411          DIP("%s%s.w r%u, r%u, #%u\n",
   19412              isRSB ? "rsb" : "sub", bS == 1 ? "s" : "", rD, rN, imm32);
   19413          goto decode_success;
   19414       }
   19415    }
   19416 
   19417    /* -------------- (T4) SUBW Rd, Rn, #uimm12 ------------------- */
   19418    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19419        && INSN0(9,4) == BITS6(1,0,1,0,1,0)
   19420        && INSN1(15,15) == 0) {
   19421       UInt rN = INSN0(3,0);
   19422       UInt rD = INSN1(11,8);
   19423       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
   19424       /* but allow "subw sp, sp, #uimm12" */
   19425       if (!valid && rD == 13 && rN == 13)
   19426          valid = True;
   19427       if (valid) {
   19428          IRTemp argL  = newTemp(Ity_I32);
   19429          IRTemp argR  = newTemp(Ity_I32);
   19430          IRTemp res   = newTemp(Ity_I32);
   19431          UInt imm12   = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
   19432          assign(argL, getIRegT(rN));
   19433          assign(argR, mkU32(imm12));
   19434          assign(res,  binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
   19435          putIRegT(rD, mkexpr(res), condT);
   19436          DIP("subw r%u, r%u, #%u\n", rD, rN, imm12);
   19437          goto decode_success;
   19438       }
   19439    }
   19440 
   19441    /* -------------- (T1) ADC{S}.W Rd, Rn, #constT -------------- */
   19442    /* -------------- (T1) SBC{S}.W Rd, Rn, #constT -------------- */
   19443    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19444        && (   INSN0(9,5) == BITS5(0,1,0,1,0)  // ADC
   19445            || INSN0(9,5) == BITS5(0,1,0,1,1)) // SBC
   19446        && INSN1(15,15) == 0) {
   19447       /* ADC:  Rd = Rn + constT + oldC */
   19448       /* SBC:  Rd = Rn - constT - (oldC ^ 1) */
   19449       UInt bS    = INSN0(4,4);
   19450       UInt rN    = INSN0(3,0);
   19451       UInt rD    = INSN1(11,8);
   19452       if (!isBadRegT(rN) && !isBadRegT(rD)) {
   19453          IRTemp argL  = newTemp(Ity_I32);
   19454          IRTemp argR  = newTemp(Ity_I32);
   19455          IRTemp res   = newTemp(Ity_I32);
   19456          IRTemp oldC  = newTemp(Ity_I32);
   19457          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
   19458          assign(argL, getIRegT(rN));
   19459          assign(argR, mkU32(imm32));
   19460          assign(oldC, mk_armg_calculate_flag_c() );
   19461          const HChar* nm  = "???";
   19462          switch (INSN0(9,5)) {
   19463             case BITS5(0,1,0,1,0): // ADC
   19464                nm = "adc";
   19465                assign(res,
   19466                       binop(Iop_Add32,
   19467                             binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
   19468                             mkexpr(oldC) ));
   19469                putIRegT(rD, mkexpr(res), condT);
   19470                if (bS)
   19471                   setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
   19472                                      argL, argR, oldC, condT );
   19473                break;
   19474             case BITS5(0,1,0,1,1): // SBC
   19475                nm = "sbc";
   19476                assign(res,
   19477                       binop(Iop_Sub32,
   19478                             binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
   19479                             binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
   19480                putIRegT(rD, mkexpr(res), condT);
   19481                if (bS)
   19482                   setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
   19483                                      argL, argR, oldC, condT );
   19484                break;
   19485             default:
   19486               vassert(0);
   19487          }
   19488          DIP("%s%s.w r%u, r%u, #%u\n",
   19489              nm, bS == 1 ? "s" : "", rD, rN, imm32);
   19490          goto decode_success;
   19491       }
   19492    }
   19493 
   19494    /* -------------- (T1) ORR{S}.W Rd, Rn, #constT -------------- */
   19495    /* -------------- (T1) AND{S}.W Rd, Rn, #constT -------------- */
   19496    /* -------------- (T1) BIC{S}.W Rd, Rn, #constT -------------- */
   19497    /* -------------- (T1) EOR{S}.W Rd, Rn, #constT -------------- */
   19498    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19499        && (   INSN0(9,5) == BITS5(0,0,0,1,0)  // ORR
   19500            || INSN0(9,5) == BITS5(0,0,0,0,0)  // AND
   19501            || INSN0(9,5) == BITS5(0,0,0,0,1)  // BIC
   19502            || INSN0(9,5) == BITS5(0,0,1,0,0)  // EOR
   19503            || INSN0(9,5) == BITS5(0,0,0,1,1)) // ORN
   19504        && INSN1(15,15) == 0) {
   19505       UInt bS = INSN0(4,4);
   19506       UInt rN = INSN0(3,0);
   19507       UInt rD = INSN1(11,8);
   19508       if (!isBadRegT(rN) && !isBadRegT(rD)) {
   19509          Bool   notArgR = False;
   19510          IROp   op      = Iop_INVALID;
   19511          const HChar* nm = "???";
   19512          switch (INSN0(9,5)) {
   19513             case BITS5(0,0,0,1,0): op = Iop_Or32;  nm = "orr"; break;
   19514             case BITS5(0,0,0,0,0): op = Iop_And32; nm = "and"; break;
   19515             case BITS5(0,0,0,0,1): op = Iop_And32; nm = "bic";
   19516                                    notArgR = True; break;
   19517             case BITS5(0,0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
   19518             case BITS5(0,0,0,1,1): op = Iop_Or32;  nm = "orn";
   19519                                    notArgR = True; break;
   19520             default: vassert(0);
   19521          }
   19522          IRTemp argL  = newTemp(Ity_I32);
   19523          IRTemp argR  = newTemp(Ity_I32);
   19524          IRTemp res   = newTemp(Ity_I32);
   19525          Bool   updC  = False;
   19526          UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
   19527          assign(argL, getIRegT(rN));
   19528          assign(argR, mkU32(notArgR ? ~imm32 : imm32));
   19529          assign(res,  binop(op, mkexpr(argL), mkexpr(argR)));
   19530          putIRegT(rD, mkexpr(res), condT);
   19531          if (bS) {
   19532             IRTemp oldV = newTemp(Ity_I32);
   19533             IRTemp oldC = newTemp(Ity_I32);
   19534             assign( oldV, mk_armg_calculate_flag_v() );
   19535             assign( oldC, updC
   19536                           ? mkU32((imm32 >> 31) & 1)
   19537                           : mk_armg_calculate_flag_c() );
   19538             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   19539                                condT );
   19540          }
   19541          DIP("%s%s.w r%u, r%u, #%u\n",
   19542              nm, bS == 1 ? "s" : "", rD, rN, imm32);
   19543          goto decode_success;
   19544       }
   19545    }
   19546 
   19547    /* ---------- (T3) ADD{S}.W Rd, Rn, Rm, {shift} ---------- */
   19548    /* ---------- (T3) SUB{S}.W Rd, Rn, Rm, {shift} ---------- */
   19549    /* ---------- (T3) RSB{S}.W Rd, Rn, Rm, {shift} ---------- */
   19550    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
   19551        && (   INSN0(8,5) == BITS4(1,0,0,0)  // add subopc
   19552            || INSN0(8,5) == BITS4(1,1,0,1)  // sub subopc
   19553            || INSN0(8,5) == BITS4(1,1,1,0)) // rsb subopc
   19554        && INSN1(15,15) == 0) {
   19555       UInt rN   = INSN0(3,0);
   19556       UInt rD   = INSN1(11,8);
   19557       UInt rM   = INSN1(3,0);
   19558       UInt bS   = INSN0(4,4);
   19559       UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
   19560       UInt how  = INSN1(5,4);
   19561 
   19562       Bool valid = !isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM);
   19563       /* but allow "add.w reg, sp, reg, lsl #N for N=0,1,2 or 3
   19564          (T3) "ADD (SP plus register) */
   19565       if (!valid && INSN0(8,5) == BITS4(1,0,0,0) // add
   19566           && rD != 15 && rN == 13 && imm5 <= 3 && how == 0) {
   19567          valid = True;
   19568       }
   19569       /* also allow "sub.w reg, sp, reg   w/ no shift
   19570          (T1) "SUB (SP minus register) */
   19571       if (!valid && INSN0(8,5) == BITS4(1,1,0,1) // sub
   19572           && rD != 15 && rN == 13 && imm5 == 0 && how == 0) {
   19573          valid = True;
   19574       }
   19575       if (valid) {
   19576          Bool   swap = False;
   19577          IROp   op   = Iop_INVALID;
   19578          const HChar* nm = "???";
   19579          switch (INSN0(8,5)) {
   19580             case BITS4(1,0,0,0): op = Iop_Add32; nm = "add"; break;
   19581             case BITS4(1,1,0,1): op = Iop_Sub32; nm = "sub"; break;
   19582             case BITS4(1,1,1,0): op = Iop_Sub32; nm = "rsb";
   19583                                  swap = True; break;
   19584             default: vassert(0);
   19585          }
   19586 
   19587          IRTemp argL = newTemp(Ity_I32);
   19588          assign(argL, getIRegT(rN));
   19589 
   19590          IRTemp rMt = newTemp(Ity_I32);
   19591          assign(rMt, getIRegT(rM));
   19592 
   19593          IRTemp argR = newTemp(Ity_I32);
   19594          compute_result_and_C_after_shift_by_imm5(
   19595             dis_buf, &argR, NULL, rMt, how, imm5, rM
   19596          );
   19597 
   19598          IRTemp res = newTemp(Ity_I32);
   19599          assign(res, swap
   19600                      ? binop(op, mkexpr(argR), mkexpr(argL))
   19601                      : binop(op, mkexpr(argL), mkexpr(argR)));
   19602 
   19603          putIRegT(rD, mkexpr(res), condT);
   19604          if (bS) {
   19605             switch (op) {
   19606                case Iop_Add32:
   19607                   setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
   19608                   break;
   19609                case Iop_Sub32:
   19610                   if (swap)
   19611                      setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
   19612                   else
   19613                      setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
   19614                   break;
   19615                default:
   19616                   vassert(0);
   19617             }
   19618          }
   19619 
   19620          DIP("%s%s.w r%u, r%u, %s\n",
   19621              nm, bS ? "s" : "", rD, rN, dis_buf);
   19622          goto decode_success;
   19623       }
   19624    }
   19625 
   19626    /* ---------- (T3) ADC{S}.W Rd, Rn, Rm, {shift} ---------- */
   19627    /* ---------- (T2) SBC{S}.W Rd, Rn, Rm, {shift} ---------- */
   19628    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
   19629        && (   INSN0(8,5) == BITS4(1,0,1,0)   // adc subopc
   19630            || INSN0(8,5) == BITS4(1,0,1,1))  // sbc subopc
   19631        && INSN1(15,15) == 0) {
   19632       /* ADC:  Rd = Rn + shifter_operand + oldC */
   19633       /* SBC:  Rd = Rn - shifter_operand - (oldC ^ 1) */
   19634       UInt rN = INSN0(3,0);
   19635       UInt rD = INSN1(11,8);
   19636       UInt rM = INSN1(3,0);
   19637       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   19638          UInt bS   = INSN0(4,4);
   19639          UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
   19640          UInt how  = INSN1(5,4);
   19641 
   19642          IRTemp argL = newTemp(Ity_I32);
   19643          assign(argL, getIRegT(rN));
   19644 
   19645          IRTemp rMt = newTemp(Ity_I32);
   19646          assign(rMt, getIRegT(rM));
   19647 
   19648          IRTemp oldC = newTemp(Ity_I32);
   19649          assign(oldC, mk_armg_calculate_flag_c());
   19650 
   19651          IRTemp argR = newTemp(Ity_I32);
   19652          compute_result_and_C_after_shift_by_imm5(
   19653             dis_buf, &argR, NULL, rMt, how, imm5, rM
   19654          );
   19655 
   19656          const HChar* nm  = "???";
   19657          IRTemp res = newTemp(Ity_I32);
   19658          switch (INSN0(8,5)) {
   19659             case BITS4(1,0,1,0): // ADC
   19660                nm = "adc";
   19661                assign(res,
   19662                       binop(Iop_Add32,
   19663                             binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
   19664                             mkexpr(oldC) ));
   19665                putIRegT(rD, mkexpr(res), condT);
   19666                if (bS)
   19667                   setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
   19668                                      argL, argR, oldC, condT );
   19669                break;
   19670             case BITS4(1,0,1,1): // SBC
   19671                nm = "sbc";
   19672                assign(res,
   19673                       binop(Iop_Sub32,
   19674                             binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
   19675                             binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
   19676                putIRegT(rD, mkexpr(res), condT);
   19677                if (bS)
   19678                   setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
   19679                                      argL, argR, oldC, condT );
   19680                break;
   19681             default:
   19682                vassert(0);
   19683          }
   19684 
   19685          DIP("%s%s.w r%u, r%u, %s\n",
   19686              nm, bS ? "s" : "", rD, rN, dis_buf);
   19687          goto decode_success;
   19688       }
   19689    }
   19690 
   19691    /* ---------- (T3) AND{S}.W Rd, Rn, Rm, {shift} ---------- */
   19692    /* ---------- (T3) ORR{S}.W Rd, Rn, Rm, {shift} ---------- */
   19693    /* ---------- (T3) EOR{S}.W Rd, Rn, Rm, {shift} ---------- */
   19694    /* ---------- (T3) BIC{S}.W Rd, Rn, Rm, {shift} ---------- */
   19695    /* ---------- (T1) ORN{S}.W Rd, Rn, Rm, {shift} ---------- */
   19696    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
   19697        && (   INSN0(8,5) == BITS4(0,0,0,0)  // and subopc
   19698            || INSN0(8,5) == BITS4(0,0,1,0)  // orr subopc
   19699            || INSN0(8,5) == BITS4(0,1,0,0)  // eor subopc
   19700            || INSN0(8,5) == BITS4(0,0,0,1)  // bic subopc
   19701            || INSN0(8,5) == BITS4(0,0,1,1)) // orn subopc
   19702        && INSN1(15,15) == 0) {
   19703       UInt rN = INSN0(3,0);
   19704       UInt rD = INSN1(11,8);
   19705       UInt rM = INSN1(3,0);
   19706       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   19707          Bool notArgR = False;
   19708          IROp op      = Iop_INVALID;
   19709          const HChar* nm  = "???";
   19710          switch (INSN0(8,5)) {
   19711             case BITS4(0,0,0,0): op = Iop_And32; nm = "and"; break;
   19712             case BITS4(0,0,1,0): op = Iop_Or32;  nm = "orr"; break;
   19713             case BITS4(0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
   19714             case BITS4(0,0,0,1): op = Iop_And32; nm = "bic";
   19715                                  notArgR = True; break;
   19716             case BITS4(0,0,1,1): op = Iop_Or32; nm = "orn";
   19717                                  notArgR = True; break;
   19718             default: vassert(0);
   19719          }
   19720          UInt bS   = INSN0(4,4);
   19721          UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
   19722          UInt how  = INSN1(5,4);
   19723 
   19724          IRTemp rNt = newTemp(Ity_I32);
   19725          assign(rNt, getIRegT(rN));
   19726 
   19727          IRTemp rMt = newTemp(Ity_I32);
   19728          assign(rMt, getIRegT(rM));
   19729 
   19730          IRTemp argR = newTemp(Ity_I32);
   19731          IRTemp oldC = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
   19732 
   19733          compute_result_and_C_after_shift_by_imm5(
   19734             dis_buf, &argR, bS ? &oldC : NULL, rMt, how, imm5, rM
   19735          );
   19736 
   19737          IRTemp res = newTemp(Ity_I32);
   19738          if (notArgR) {
   19739             vassert(op == Iop_And32 || op == Iop_Or32);
   19740             assign(res, binop(op, mkexpr(rNt),
   19741                                   unop(Iop_Not32, mkexpr(argR))));
   19742          } else {
   19743             assign(res, binop(op, mkexpr(rNt), mkexpr(argR)));
   19744          }
   19745 
   19746          putIRegT(rD, mkexpr(res), condT);
   19747          if (bS) {
   19748             IRTemp oldV = newTemp(Ity_I32);
   19749             assign( oldV, mk_armg_calculate_flag_v() );
   19750             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   19751                                condT );
   19752          }
   19753 
   19754          DIP("%s%s.w r%u, r%u, %s\n",
   19755              nm, bS ? "s" : "", rD, rN, dis_buf);
   19756          goto decode_success;
   19757       }
   19758    }
   19759 
   19760    /* -------------- (T?) LSL{S}.W Rd, Rn, Rm -------------- */
   19761    /* -------------- (T?) LSR{S}.W Rd, Rn, Rm -------------- */
   19762    /* -------------- (T?) ASR{S}.W Rd, Rn, Rm -------------- */
   19763    /* -------------- (T?) ROR{S}.W Rd, Rn, Rm -------------- */
   19764    if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,0,0)
   19765        && INSN1(15,12) == BITS4(1,1,1,1)
   19766        && INSN1(7,4) == BITS4(0,0,0,0)) {
   19767       UInt how = INSN0(6,5); // standard encoding
   19768       UInt rN  = INSN0(3,0);
   19769       UInt rD  = INSN1(11,8);
   19770       UInt rM  = INSN1(3,0);
   19771       UInt bS  = INSN0(4,4);
   19772       Bool valid = !isBadRegT(rN) && !isBadRegT(rM) && !isBadRegT(rD);
   19773       if (valid) {
   19774          IRTemp rNt    = newTemp(Ity_I32);
   19775          IRTemp rMt    = newTemp(Ity_I32);
   19776          IRTemp res    = newTemp(Ity_I32);
   19777          IRTemp oldC   = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
   19778          IRTemp oldV   = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
   19779          const HChar* nms[4] = { "lsl", "lsr", "asr", "ror" };
   19780          const HChar* nm     = nms[how];
   19781          assign(rNt, getIRegT(rN));
   19782          assign(rMt, getIRegT(rM));
   19783          compute_result_and_C_after_shift_by_reg(
   19784             dis_buf, &res, bS ? &oldC : NULL,
   19785             rNt, how, rMt, rN, rM
   19786          );
   19787          if (bS)
   19788             assign(oldV, mk_armg_calculate_flag_v());
   19789          putIRegT(rD, mkexpr(res), condT);
   19790          if (bS) {
   19791             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   19792                                condT );
   19793          }
   19794          DIP("%s%s.w r%u, r%u, r%u\n",
   19795              nm, bS ? "s" : "", rD, rN, rM);
   19796          goto decode_success;
   19797       }
   19798    }
   19799 
   19800    /* ------------ (T?) MOV{S}.W Rd, Rn, {shift} ------------ */
   19801    /* ------------ (T?) MVN{S}.W Rd, Rn, {shift} ------------ */
   19802    if ((INSN0(15,0) & 0xFFCF) == 0xEA4F
   19803        && INSN1(15,15) == 0) {
   19804       UInt rD = INSN1(11,8);
   19805       UInt rN = INSN1(3,0);
   19806       UInt bS = INSN0(4,4);
   19807       int badRegs = bS ? (isBadRegT(rD) || isBadRegT(rN))
   19808                        : (rD == 15 || rN == 15 || (rD == 15 && rN == 15));
   19809 
   19810       if (!badRegs) {
   19811          UInt isMVN = INSN0(5,5);
   19812          UInt imm5  = (INSN1(14,12) << 2) | INSN1(7,6);
   19813          UInt how   = INSN1(5,4);
   19814 
   19815          IRTemp rNt = newTemp(Ity_I32);
   19816          assign(rNt, getIRegT(rN));
   19817 
   19818          IRTemp oldRn = newTemp(Ity_I32);
   19819          IRTemp oldC  = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
   19820          compute_result_and_C_after_shift_by_imm5(
   19821             dis_buf, &oldRn, bS ? &oldC : NULL, rNt, how, imm5, rN
   19822          );
   19823 
   19824          IRTemp res = newTemp(Ity_I32);
   19825          assign(res, isMVN ? unop(Iop_Not32, mkexpr(oldRn))
   19826                            : mkexpr(oldRn));
   19827 
   19828          putIRegT(rD, mkexpr(res), condT);
   19829          if (bS) {
   19830             IRTemp oldV = newTemp(Ity_I32);
   19831             assign( oldV, mk_armg_calculate_flag_v() );
   19832             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT);
   19833          }
   19834          DIP("%s%s.w r%u, %s\n",
   19835              isMVN ? "mvn" : "mov", bS ? "s" : "", rD, dis_buf);
   19836          goto decode_success;
   19837       }
   19838    }
   19839 
   19840    /* -------------- (T?) TST.W Rn, Rm, {shift} -------------- */
   19841    /* -------------- (T?) TEQ.W Rn, Rm, {shift} -------------- */
   19842    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
   19843        && (   INSN0(8,4) == BITS5(0,0,0,0,1)  // TST
   19844            || INSN0(8,4) == BITS5(0,1,0,0,1)) // TEQ
   19845        && INSN1(15,15) == 0
   19846        && INSN1(11,8) == BITS4(1,1,1,1)) {
   19847       UInt rN = INSN0(3,0);
   19848       UInt rM = INSN1(3,0);
   19849       if (!isBadRegT(rN) && !isBadRegT(rM)) {
   19850          Bool isTST = INSN0(8,4) == BITS5(0,0,0,0,1);
   19851 
   19852          UInt how  = INSN1(5,4);
   19853          UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
   19854 
   19855          IRTemp argL = newTemp(Ity_I32);
   19856          assign(argL, getIRegT(rN));
   19857 
   19858          IRTemp rMt = newTemp(Ity_I32);
   19859          assign(rMt, getIRegT(rM));
   19860 
   19861          IRTemp argR = newTemp(Ity_I32);
   19862          IRTemp oldC = newTemp(Ity_I32);
   19863          compute_result_and_C_after_shift_by_imm5(
   19864             dis_buf, &argR, &oldC, rMt, how, imm5, rM
   19865          );
   19866 
   19867          IRTemp oldV = newTemp(Ity_I32);
   19868          assign( oldV, mk_armg_calculate_flag_v() );
   19869 
   19870          IRTemp res = newTemp(Ity_I32);
   19871          assign(res, binop(isTST ? Iop_And32 : Iop_Xor32,
   19872                            mkexpr(argL), mkexpr(argR)));
   19873 
   19874          setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   19875                             condT );
   19876          DIP("%s.w r%u, %s\n", isTST ? "tst" : "teq", rN, dis_buf);
   19877          goto decode_success;
   19878       }
   19879    }
   19880 
   19881    /* -------------- (T3) CMP.W Rn, Rm, {shift} -------------- */
   19882    /* -------------- (T2) CMN.W Rn, Rm, {shift} -------------- */
   19883    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
   19884        && (   INSN0(8,4) == BITS5(1,1,0,1,1)  // CMP
   19885            || INSN0(8,4) == BITS5(1,0,0,0,1)) // CMN
   19886        && INSN1(15,15) == 0
   19887        && INSN1(11,8) == BITS4(1,1,1,1)) {
   19888       UInt rN = INSN0(3,0);
   19889       UInt rM = INSN1(3,0);
   19890       if (!isBadRegT(rN) && !isBadRegT(rM)) {
   19891          Bool isCMN = INSN0(8,4) == BITS5(1,0,0,0,1);
   19892          UInt how   = INSN1(5,4);
   19893          UInt imm5  = (INSN1(14,12) << 2) | INSN1(7,6);
   19894 
   19895          IRTemp argL = newTemp(Ity_I32);
   19896          assign(argL, getIRegT(rN));
   19897 
   19898          IRTemp rMt = newTemp(Ity_I32);
   19899          assign(rMt, getIRegT(rM));
   19900 
   19901          IRTemp argR = newTemp(Ity_I32);
   19902          compute_result_and_C_after_shift_by_imm5(
   19903             dis_buf, &argR, NULL, rMt, how, imm5, rM
   19904          );
   19905 
   19906          setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
   19907                          argL, argR, condT );
   19908 
   19909          DIP("%s.w r%u, %s\n", isCMN ? "cmn" : "cmp", rN, dis_buf);
   19910          goto decode_success;
   19911       }
   19912    }
   19913 
   19914    /* -------------- (T2) MOV{S}.W Rd, #constT -------------- */
   19915    /* -------------- (T2) MVN{S}.W Rd, #constT -------------- */
   19916    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19917        && (   INSN0(9,5) == BITS5(0,0,0,1,0)  // MOV
   19918            || INSN0(9,5) == BITS5(0,0,0,1,1)) // MVN
   19919        && INSN0(3,0) == BITS4(1,1,1,1)
   19920        && INSN1(15,15) == 0) {
   19921       UInt rD = INSN1(11,8);
   19922       if (!isBadRegT(rD)) {
   19923          Bool   updC  = False;
   19924          UInt   bS    = INSN0(4,4);
   19925          Bool   isMVN = INSN0(5,5) == 1;
   19926          UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
   19927          IRTemp res   = newTemp(Ity_I32);
   19928          assign(res, mkU32(isMVN ? ~imm32 : imm32));
   19929          putIRegT(rD, mkexpr(res), condT);
   19930          if (bS) {
   19931             IRTemp oldV = newTemp(Ity_I32);
   19932             IRTemp oldC = newTemp(Ity_I32);
   19933             assign( oldV, mk_armg_calculate_flag_v() );
   19934             assign( oldC, updC
   19935                           ? mkU32((imm32 >> 31) & 1)
   19936                           : mk_armg_calculate_flag_c() );
   19937             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   19938                                condT );
   19939          }
   19940          DIP("%s%s.w r%u, #%u\n",
   19941              isMVN ? "mvn" : "mov", bS ? "s" : "", rD, imm32);
   19942          goto decode_success;
   19943       }
   19944    }
   19945 
   19946    /* -------------- (T3) MOVW Rd, #imm16 -------------- */
   19947    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19948        && INSN0(9,4) == BITS6(1,0,0,1,0,0)
   19949        && INSN1(15,15) == 0) {
   19950       UInt rD = INSN1(11,8);
   19951       if (!isBadRegT(rD)) {
   19952          UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
   19953                       | (INSN1(14,12) << 8) | INSN1(7,0);
   19954          putIRegT(rD, mkU32(imm16), condT);
   19955          DIP("movw r%u, #%u\n", rD, imm16);
   19956          goto decode_success;
   19957       }
   19958    }
   19959 
   19960    /* ---------------- MOVT Rd, #imm16 ---------------- */
   19961    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19962        && INSN0(9,4) == BITS6(1,0,1,1,0,0)
   19963        && INSN1(15,15) == 0) {
   19964       UInt rD = INSN1(11,8);
   19965       if (!isBadRegT(rD)) {
   19966          UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
   19967                       | (INSN1(14,12) << 8) | INSN1(7,0);
   19968          IRTemp res = newTemp(Ity_I32);
   19969          assign(res,
   19970                 binop(Iop_Or32,
   19971                       binop(Iop_And32, getIRegT(rD), mkU32(0xFFFF)),
   19972                       mkU32(imm16 << 16)));
   19973          putIRegT(rD, mkexpr(res), condT);
   19974          DIP("movt r%u, #%u\n", rD, imm16);
   19975          goto decode_success;
   19976       }
   19977    }
   19978 
   19979    /* ---------------- LD/ST reg+/-#imm8 ---------------- */
   19980    /* Loads and stores of the form:
   19981          op  Rt, [Rn, #-imm8]      or
   19982          op  Rt, [Rn], #+/-imm8    or
   19983          op  Rt, [Rn, #+/-imm8]!
   19984       where op is one of
   19985          ldrb ldrh ldr  ldrsb ldrsh
   19986          strb strh str
   19987    */
   19988    if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0) && INSN1(11,11) == 1) {
   19989       Bool   valid  = True;
   19990       Bool   syned  = False;
   19991       Bool   isST   = False;
   19992       IRType ty     = Ity_I8;
   19993       const HChar* nm = "???";
   19994 
   19995       switch (INSN0(8,4)) {
   19996          case BITS5(0,0,0,0,0):   // strb
   19997             nm = "strb"; isST = True; break;
   19998          case BITS5(0,0,0,0,1):   // ldrb
   19999             nm = "ldrb"; break;
   20000          case BITS5(1,0,0,0,1):   // ldrsb
   20001             nm = "ldrsb"; syned = True; break;
   20002          case BITS5(0,0,0,1,0):   // strh
   20003             nm = "strh"; ty = Ity_I16; isST = True; break;
   20004          case BITS5(0,0,0,1,1):   // ldrh
   20005             nm = "ldrh"; ty = Ity_I16; break;
   20006          case BITS5(1,0,0,1,1):   // ldrsh
   20007             nm = "ldrsh"; ty = Ity_I16; syned = True; break;
   20008          case BITS5(0,0,1,0,0):   // str
   20009             nm = "str"; ty = Ity_I32; isST = True; break;
   20010          case BITS5(0,0,1,0,1):
   20011             nm = "ldr"; ty = Ity_I32; break;  // ldr
   20012          default:
   20013             valid = False; break;
   20014       }
   20015 
   20016       UInt rN      = INSN0(3,0);
   20017       UInt rT      = INSN1(15,12);
   20018       UInt bP      = INSN1(10,10);
   20019       UInt bU      = INSN1(9,9);
   20020       UInt bW      = INSN1(8,8);
   20021       UInt imm8    = INSN1(7,0);
   20022       Bool loadsPC = False;
   20023 
   20024       if (valid) {
   20025          if (bP == 1 && bU == 1 && bW == 0)
   20026             valid = False;
   20027          if (bP == 0 && bW == 0)
   20028             valid = False;
   20029          if (rN == 15)
   20030             valid = False;
   20031          if (bW == 1 && rN == rT)
   20032             valid = False;
   20033          if (ty == Ity_I8 || ty == Ity_I16) {
   20034             if (isBadRegT(rT))
   20035                valid = False;
   20036          } else {
   20037             /* ty == Ity_I32 */
   20038             if (isST && rT == 15)
   20039                valid = False;
   20040             if (!isST && rT == 15)
   20041                loadsPC = True;
   20042          }
   20043       }
   20044 
   20045       if (valid) {
   20046          // if it's a branch, it can't happen in the middle of an IT block
   20047          // Also, if it is a branch, make it unconditional at this point.
   20048          // Doing conditional branches in-line is too complex (for now)
   20049          if (loadsPC) {
   20050             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   20051             // go uncond
   20052             mk_skip_over_T32_if_cond_is_false(condT);
   20053             condT = IRTemp_INVALID;
   20054             // now uncond
   20055          }
   20056 
   20057          IRTemp preAddr = newTemp(Ity_I32);
   20058          assign(preAddr, getIRegT(rN));
   20059 
   20060          IRTemp postAddr = newTemp(Ity_I32);
   20061          assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
   20062                                 mkexpr(preAddr), mkU32(imm8)));
   20063 
   20064          IRTemp transAddr = bP == 1 ? postAddr : preAddr;
   20065 
   20066          if (isST) {
   20067 
   20068             /* Store.  If necessary, update the base register before
   20069                the store itself, so that the common idiom of "str rX,
   20070                [sp, #-4]!" (store rX at sp-4, then do new sp = sp-4,
   20071                a.k.a "push rX") doesn't cause Memcheck to complain
   20072                that the access is below the stack pointer.  Also, not
   20073                updating sp before the store confuses Valgrind's
   20074                dynamic stack-extending logic.  So do it before the
   20075                store.  Hence we need to snarf the store data before
   20076                doing the basereg update. */
   20077 
   20078             /* get hold of the data to be stored */
   20079             IRTemp oldRt = newTemp(Ity_I32);
   20080             assign(oldRt, getIRegT(rT));
   20081 
   20082             /* Update Rn if necessary. */
   20083             if (bW == 1) {
   20084                vassert(rN != rT); // assured by validity check above
   20085                putIRegT(rN, mkexpr(postAddr), condT);
   20086             }
   20087 
   20088             /* generate the transfer */
   20089             IRExpr* data = NULL;
   20090             switch (ty) {
   20091                case Ity_I8:
   20092                   data = unop(Iop_32to8, mkexpr(oldRt));
   20093                   break;
   20094                case Ity_I16:
   20095                   data = unop(Iop_32to16, mkexpr(oldRt));
   20096                   break;
   20097                case Ity_I32:
   20098                   data = mkexpr(oldRt);
   20099                   break;
   20100                default:
   20101                   vassert(0);
   20102             }
   20103             storeGuardedLE(mkexpr(transAddr), data, condT);
   20104 
   20105          } else {
   20106 
   20107             /* Load. */
   20108             IRTemp llOldRt = newTemp(Ity_I32);
   20109             assign(llOldRt, llGetIReg(rT));
   20110 
   20111             /* generate the transfer */
   20112             IRTemp    newRt = newTemp(Ity_I32);
   20113             IRLoadGOp widen = ILGop_INVALID;
   20114             switch (ty) {
   20115                case Ity_I8:
   20116                   widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
   20117                case Ity_I16:
   20118                   widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
   20119                case Ity_I32:
   20120                   widen = ILGop_Ident32; break;
   20121                default:
   20122                   vassert(0);
   20123             }
   20124             loadGuardedLE(newRt, widen,
   20125                           mkexpr(transAddr), mkexpr(llOldRt), condT);
   20126             if (rT == 15) {
   20127                vassert(loadsPC);
   20128                /* We'll do the write to the PC just below */
   20129             } else {
   20130                vassert(!loadsPC);
   20131                /* IRTemp_INVALID is OK here because in the case where
   20132                   condT is false at run time, we're just putting the
   20133                   old rT value back. */
   20134                putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   20135             }
   20136 
   20137             /* Update Rn if necessary. */
   20138             if (bW == 1) {
   20139                vassert(rN != rT); // assured by validity check above
   20140                putIRegT(rN, mkexpr(postAddr), condT);
   20141             }
   20142 
   20143             if (loadsPC) {
   20144                /* Presumably this is an interworking branch. */
   20145                vassert(rN != 15); // assured by validity check above
   20146                vassert(rT == 15);
   20147                vassert(condT == IRTemp_INVALID); /* due to check above */
   20148                llPutIReg(15, mkexpr(newRt));
   20149                dres.jk_StopHere = Ijk_Boring;  /* or _Ret ? */
   20150                dres.whatNext    = Dis_StopHere;
   20151             }
   20152          }
   20153 
   20154          if (bP == 1 && bW == 0) {
   20155             DIP("%s.w r%u, [r%u, #%c%u]\n",
   20156                 nm, rT, rN, bU ? '+' : '-', imm8);
   20157          }
   20158          else if (bP == 1 && bW == 1) {
   20159             DIP("%s.w r%u, [r%u, #%c%u]!\n",
   20160                 nm, rT, rN, bU ? '+' : '-', imm8);
   20161          }
   20162          else {
   20163             vassert(bP == 0 && bW == 1);
   20164             DIP("%s.w r%u, [r%u], #%c%u\n",
   20165                 nm, rT, rN, bU ? '+' : '-', imm8);
   20166          }
   20167 
   20168          goto decode_success;
   20169       }
   20170    }
   20171 
   20172    /* ------------- LD/ST reg+(reg<<imm2) ------------- */
   20173    /* Loads and stores of the form:
   20174          op  Rt, [Rn, Rm, LSL #imm8]
   20175       where op is one of
   20176          ldrb ldrh ldr  ldrsb ldrsh
   20177          strb strh str
   20178    */
   20179    if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)
   20180        && INSN1(11,6) == BITS6(0,0,0,0,0,0)) {
   20181       Bool   valid  = True;
   20182       Bool   syned  = False;
   20183       Bool   isST   = False;
   20184       IRType ty     = Ity_I8;
   20185       const HChar* nm = "???";
   20186 
   20187       switch (INSN0(8,4)) {
   20188          case BITS5(0,0,0,0,0):   // strb
   20189             nm = "strb"; isST = True; break;
   20190          case BITS5(0,0,0,0,1):   // ldrb
   20191             nm = "ldrb"; break;
   20192          case BITS5(1,0,0,0,1):   // ldrsb
   20193             nm = "ldrsb"; syned = True; break;
   20194          case BITS5(0,0,0,1,0):   // strh
   20195             nm = "strh"; ty = Ity_I16; isST = True; break;
   20196          case BITS5(0,0,0,1,1):   // ldrh
   20197             nm = "ldrh"; ty = Ity_I16; break;
   20198          case BITS5(1,0,0,1,1):   // ldrsh
   20199             nm = "ldrsh"; ty = Ity_I16; syned = True; break;
   20200          case BITS5(0,0,1,0,0):   // str
   20201             nm = "str"; ty = Ity_I32; isST = True; break;
   20202          case BITS5(0,0,1,0,1):
   20203             nm = "ldr"; ty = Ity_I32; break;  // ldr
   20204          default:
   20205             valid = False; break;
   20206       }
   20207 
   20208       UInt rN      = INSN0(3,0);
   20209       UInt rM      = INSN1(3,0);
   20210       UInt rT      = INSN1(15,12);
   20211       UInt imm2    = INSN1(5,4);
   20212       Bool loadsPC = False;
   20213 
   20214       if (ty == Ity_I8 || ty == Ity_I16) {
   20215          /* all 8- and 16-bit load and store cases have the
   20216             same exclusion set. */
   20217          if (rN == 15 || isBadRegT(rT) || isBadRegT(rM))
   20218             valid = False;
   20219       } else {
   20220          vassert(ty == Ity_I32);
   20221          if (rN == 15 || isBadRegT(rM))
   20222             valid = False;
   20223          if (isST && rT == 15)
   20224             valid = False;
   20225          /* If it is a load and rT is 15, that's only allowable if we
   20226             not in an IT block, or are the last in it.  Need to insert
   20227             a dynamic check for that. */
   20228          if (!isST && rT == 15)
   20229             loadsPC = True;
   20230       }
   20231 
   20232       if (valid) {
   20233          // if it's a branch, it can't happen in the middle of an IT block
   20234          // Also, if it is a branch, make it unconditional at this point.
   20235          // Doing conditional branches in-line is too complex (for now)
   20236          if (loadsPC) {
   20237             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   20238             // go uncond
   20239             mk_skip_over_T32_if_cond_is_false(condT);
   20240             condT = IRTemp_INVALID;
   20241             // now uncond
   20242          }
   20243 
   20244          IRTemp transAddr = newTemp(Ity_I32);
   20245          assign(transAddr,
   20246                 binop( Iop_Add32,
   20247                        getIRegT(rN),
   20248                        binop(Iop_Shl32, getIRegT(rM), mkU8(imm2)) ));
   20249 
   20250          if (isST) {
   20251 
   20252             /* get hold of the data to be stored */
   20253             IRTemp oldRt = newTemp(Ity_I32);
   20254             assign(oldRt, getIRegT(rT));
   20255 
   20256             /* generate the transfer */
   20257             IRExpr* data = NULL;
   20258             switch (ty) {
   20259                case Ity_I8:
   20260                   data = unop(Iop_32to8, mkexpr(oldRt));
   20261                   break;
   20262                case Ity_I16:
   20263                   data = unop(Iop_32to16, mkexpr(oldRt));
   20264                   break;
   20265               case Ity_I32:
   20266                   data = mkexpr(oldRt);
   20267                   break;
   20268               default:
   20269                  vassert(0);
   20270             }
   20271             storeGuardedLE(mkexpr(transAddr), data, condT);
   20272 
   20273          } else {
   20274 
   20275             /* Load. */
   20276             IRTemp llOldRt = newTemp(Ity_I32);
   20277             assign(llOldRt, llGetIReg(rT));
   20278 
   20279             /* generate the transfer */
   20280             IRTemp    newRt = newTemp(Ity_I32);
   20281             IRLoadGOp widen = ILGop_INVALID;
   20282             switch (ty) {
   20283                case Ity_I8:
   20284                   widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
   20285                case Ity_I16:
   20286                   widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
   20287                case Ity_I32:
   20288                   widen = ILGop_Ident32; break;
   20289                default:
   20290                   vassert(0);
   20291             }
   20292             loadGuardedLE(newRt, widen,
   20293                           mkexpr(transAddr), mkexpr(llOldRt), condT);
   20294 
   20295             if (rT == 15) {
   20296                vassert(loadsPC);
   20297                /* We'll do the write to the PC just below */
   20298             } else {
   20299                vassert(!loadsPC);
   20300                /* IRTemp_INVALID is OK here because in the case where
   20301                   condT is false at run time, we're just putting the
   20302                   old rT value back. */
   20303                putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   20304             }
   20305 
   20306             if (loadsPC) {
   20307                /* Presumably this is an interworking branch. */
   20308                vassert(rN != 15); // assured by validity check above
   20309                vassert(rT == 15);
   20310                vassert(condT == IRTemp_INVALID); /* due to check above */
   20311                llPutIReg(15, mkexpr(newRt));
   20312                dres.jk_StopHere = Ijk_Boring;  /* or _Ret ? */
   20313                dres.whatNext    = Dis_StopHere;
   20314             }
   20315          }
   20316 
   20317          DIP("%s.w r%u, [r%u, r%u, LSL #%u]\n",
   20318              nm, rT, rN, rM, imm2);
   20319 
   20320          goto decode_success;
   20321       }
   20322    }
   20323 
   20324    /* --------------- LD/ST reg+imm12 --------------- */
   20325    /* Loads and stores of the form:
   20326          op  Rt, [Rn, #+-imm12]
   20327       where op is one of
   20328          ldrb ldrh ldr  ldrsb ldrsh
   20329          strb strh str
   20330    */
   20331    if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)) {
   20332       Bool   valid  = True;
   20333       Bool   syned  = INSN0(8,8) == 1;
   20334       Bool   isST   = False;
   20335       IRType ty     = Ity_I8;
   20336       UInt   bU     = INSN0(7,7); // 1: +imm   0: -imm
   20337                                   // -imm is only supported by literal versions
   20338       const HChar* nm = "???";
   20339 
   20340       switch (INSN0(6,4)) {
   20341          case BITS3(0,0,0):   // strb
   20342             nm = "strb"; isST = True; break;
   20343          case BITS3(0,0,1):   // ldrb
   20344             nm = syned ? "ldrsb" : "ldrb"; break;
   20345          case BITS3(0,1,0):   // strh
   20346             nm = "strh"; ty = Ity_I16; isST = True; break;
   20347          case BITS3(0,1,1):   // ldrh
   20348             nm = syned ? "ldrsh" : "ldrh"; ty = Ity_I16; break;
   20349          case BITS3(1,0,0):   // str
   20350             nm = "str"; ty = Ity_I32; isST = True; break;
   20351          case BITS3(1,0,1):
   20352             nm = "ldr"; ty = Ity_I32; break;  // ldr
   20353          default:
   20354             valid = False; break;
   20355       }
   20356 
   20357       UInt rN      = INSN0(3,0);
   20358       UInt rT      = INSN1(15,12);
   20359       UInt imm12   = INSN1(11,0);
   20360       Bool loadsPC = False;
   20361 
   20362       if (rN != 15 && bU == 0) {
   20363          // only pc supports #-imm12
   20364          valid = False;
   20365       }
   20366 
   20367       if (isST) {
   20368          if (syned) valid = False;
   20369          if (rN == 15 || rT == 15)
   20370             valid = False;
   20371       } else {
   20372          /* For a 32-bit load, rT == 15 is only allowable if we are not
   20373             in an IT block, or are the last in it.  Need to insert
   20374             a dynamic check for that.  Also, in this particular
   20375             case, rN == 15 is allowable.  In this case however, the
   20376             value obtained for rN is (apparently)
   20377             "word-align(address of current insn + 4)". */
   20378          if (rT == 15) {
   20379             if (ty == Ity_I32)
   20380                loadsPC = True;
   20381             else // Can't do it for B/H loads
   20382                valid = False;
   20383          }
   20384       }
   20385 
   20386       if (valid) {
   20387          // if it's a branch, it can't happen in the middle of an IT block
   20388          // Also, if it is a branch, make it unconditional at this point.
   20389          // Doing conditional branches in-line is too complex (for now)
   20390          if (loadsPC) {
   20391             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   20392             // go uncond
   20393             mk_skip_over_T32_if_cond_is_false(condT);
   20394             condT = IRTemp_INVALID;
   20395             // now uncond
   20396          }
   20397 
   20398          IRTemp rNt = newTemp(Ity_I32);
   20399          if (rN == 15) {
   20400             vassert(!isST);
   20401             assign(rNt, binop(Iop_And32, getIRegT(15), mkU32(~3)));
   20402          } else {
   20403             assign(rNt, getIRegT(rN));
   20404          }
   20405 
   20406          IRTemp transAddr = newTemp(Ity_I32);
   20407          assign(transAddr,
   20408                 binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
   20409                       mkexpr(rNt), mkU32(imm12)));
   20410 
   20411          IRTemp oldRt = newTemp(Ity_I32);
   20412          assign(oldRt, getIRegT(rT));
   20413 
   20414          IRTemp llOldRt = newTemp(Ity_I32);
   20415          assign(llOldRt, llGetIReg(rT));
   20416 
   20417          if (isST) {
   20418             IRExpr* data = NULL;
   20419             switch (ty) {
   20420                case Ity_I8:
   20421                   data = unop(Iop_32to8, mkexpr(oldRt));
   20422                   break;
   20423                case Ity_I16:
   20424                   data = unop(Iop_32to16, mkexpr(oldRt));
   20425                   break;
   20426               case Ity_I32:
   20427                   data = mkexpr(oldRt);
   20428                   break;
   20429               default:
   20430                  vassert(0);
   20431             }
   20432             storeGuardedLE(mkexpr(transAddr), data, condT);
   20433          } else {
   20434             IRTemp    newRt = newTemp(Ity_I32);
   20435             IRLoadGOp widen = ILGop_INVALID;
   20436             switch (ty) {
   20437                case Ity_I8:
   20438                   widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
   20439                case Ity_I16:
   20440                   widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
   20441                case Ity_I32:
   20442                   widen = ILGop_Ident32; break;
   20443                default:
   20444                   vassert(0);
   20445             }
   20446             loadGuardedLE(newRt, widen,
   20447                           mkexpr(transAddr), mkexpr(llOldRt), condT);
   20448             if (rT == 15) {
   20449                vassert(loadsPC);
   20450                /* We'll do the write to the PC just below */
   20451             } else {
   20452                vassert(!loadsPC);
   20453                /* IRTemp_INVALID is OK here because in the case where
   20454                   condT is false at run time, we're just putting the
   20455                   old rT value back. */
   20456                putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   20457             }
   20458 
   20459             if (loadsPC) {
   20460                /* Presumably this is an interworking branch. */
   20461                vassert(rT == 15);
   20462                vassert(condT == IRTemp_INVALID); /* due to check above */
   20463                llPutIReg(15, mkexpr(newRt));
   20464                dres.jk_StopHere = Ijk_Boring;
   20465                dres.whatNext    = Dis_StopHere;
   20466             }
   20467          }
   20468 
   20469          DIP("%s.w r%u, [r%u, +#%u]\n", nm, rT, rN, imm12);
   20470 
   20471          goto decode_success;
   20472       }
   20473    }
   20474 
   20475    /* -------------- LDRD/STRD reg+/-#imm8 -------------- */
   20476    /* Doubleword loads and stores of the form:
   20477          ldrd/strd  Rt, Rt2, [Rn, #+/-imm8]    or
   20478          ldrd/strd  Rt, Rt2, [Rn], #+/-imm8    or
   20479          ldrd/strd  Rt, Rt2, [Rn, #+/-imm8]!
   20480    */
   20481    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,0) && INSN0(6,6) == 1) {
   20482       UInt bP   = INSN0(8,8);
   20483       UInt bU   = INSN0(7,7);
   20484       UInt bW   = INSN0(5,5);
   20485       UInt bL   = INSN0(4,4);  // 1: load  0: store
   20486       UInt rN   = INSN0(3,0);
   20487       UInt rT   = INSN1(15,12);
   20488       UInt rT2  = INSN1(11,8);
   20489       UInt imm8 = INSN1(7,0);
   20490 
   20491       Bool valid = True;
   20492       if (bP == 0 && bW == 0)                 valid = False;
   20493       if (bW == 1 && (rN == rT || rN == rT2)) valid = False;
   20494       if (isBadRegT(rT) || isBadRegT(rT2))    valid = False;
   20495       if (bL == 1 && rT == rT2)               valid = False;
   20496       /* It's OK to use PC as the base register only in the
   20497          following case: ldrd Rt, Rt2, [PC, #+/-imm8] */
   20498       if (rN == 15 && (bL == 0/*store*/
   20499                        || bW == 1/*wb*/))     valid = False;
   20500 
   20501       if (valid) {
   20502          IRTemp preAddr = newTemp(Ity_I32);
   20503          assign(preAddr, 15 == rN
   20504                            ? binop(Iop_And32, getIRegT(15), mkU32(~3U))
   20505                            : getIRegT(rN));
   20506 
   20507          IRTemp postAddr = newTemp(Ity_I32);
   20508          assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
   20509                                 mkexpr(preAddr), mkU32(imm8 << 2)));
   20510 
   20511          IRTemp transAddr = bP == 1 ? postAddr : preAddr;
   20512 
   20513          /* For almost all cases, we do the writeback after the transfers.
   20514             However, that leaves the stack "uncovered" in this case:
   20515                strd    rD, [sp, #-8]
   20516             In which case, do the writeback to SP now, instead of later.
   20517             This is bad in that it makes the insn non-restartable if the
   20518             accesses fault, but at least keeps Memcheck happy. */
   20519          Bool writeback_already_done = False;
   20520          if (bL == 0/*store*/ && bW == 1/*wb*/
   20521              && rN == 13 && rN != rT && rN != rT2
   20522              && bU == 0/*minus*/ && (imm8 << 2) == 8) {
   20523             putIRegT(rN, mkexpr(postAddr), condT);
   20524             writeback_already_done = True;
   20525          }
   20526 
   20527          if (bL == 0) {
   20528             IRTemp oldRt  = newTemp(Ity_I32);
   20529             IRTemp oldRt2 = newTemp(Ity_I32);
   20530             assign(oldRt,  getIRegT(rT));
   20531             assign(oldRt2, getIRegT(rT2));
   20532             storeGuardedLE( mkexpr(transAddr),
   20533                             mkexpr(oldRt), condT );
   20534             storeGuardedLE( binop(Iop_Add32, mkexpr(transAddr), mkU32(4)),
   20535                             mkexpr(oldRt2), condT );
   20536          } else {
   20537             IRTemp oldRt  = newTemp(Ity_I32);
   20538             IRTemp oldRt2 = newTemp(Ity_I32);
   20539             IRTemp newRt  = newTemp(Ity_I32);
   20540             IRTemp newRt2 = newTemp(Ity_I32);
   20541             assign(oldRt,  llGetIReg(rT));
   20542             assign(oldRt2, llGetIReg(rT2));
   20543             loadGuardedLE( newRt, ILGop_Ident32,
   20544                            mkexpr(transAddr),
   20545                            mkexpr(oldRt), condT );
   20546             loadGuardedLE( newRt2, ILGop_Ident32,
   20547                            binop(Iop_Add32, mkexpr(transAddr), mkU32(4)),
   20548                            mkexpr(oldRt2), condT );
   20549             /* Put unconditionally, since we already switched on the condT
   20550                in the guarded loads. */
   20551             putIRegT(rT,  mkexpr(newRt),  IRTemp_INVALID);
   20552             putIRegT(rT2, mkexpr(newRt2), IRTemp_INVALID);
   20553          }
   20554 
   20555          if (bW == 1 && !writeback_already_done) {
   20556             putIRegT(rN, mkexpr(postAddr), condT);
   20557          }
   20558 
   20559          const HChar* nm = bL ? "ldrd" : "strd";
   20560 
   20561          if (bP == 1 && bW == 0) {
   20562             DIP("%s.w r%u, r%u, [r%u, #%c%u]\n",
   20563                 nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
   20564          }
   20565          else if (bP == 1 && bW == 1) {
   20566             DIP("%s.w r%u, r%u, [r%u, #%c%u]!\n",
   20567                 nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
   20568          }
   20569          else {
   20570             vassert(bP == 0 && bW == 1);
   20571             DIP("%s.w r%u, r%u, [r%u], #%c%u\n",
   20572                 nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
   20573          }
   20574 
   20575          goto decode_success;
   20576       }
   20577    }
   20578 
   20579    /* -------------- (T3) Bcond.W label -------------- */
   20580    /* This variant carries its own condition, so can't be part of an
   20581       IT block ... */
   20582    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   20583        && INSN1(15,14) == BITS2(1,0)
   20584        && INSN1(12,12) == 0) {
   20585       UInt cond = INSN0(9,6);
   20586       if (cond != ARMCondAL && cond != ARMCondNV) {
   20587          Int simm21
   20588             =   (INSN0(10,10) << (1 + 1 + 6 + 11 + 1))
   20589               | (INSN1(11,11) << (1 + 6 + 11 + 1))
   20590               | (INSN1(13,13) << (6 + 11 + 1))
   20591               | (INSN0(5,0)   << (11 + 1))
   20592               | (INSN1(10,0)  << 1);
   20593          simm21 = (simm21 << 11) >> 11;
   20594 
   20595          vassert(0 == (guest_R15_curr_instr_notENC & 1));
   20596          UInt dst = simm21 + guest_R15_curr_instr_notENC + 4;
   20597 
   20598          /* Not allowed in an IT block; SIGILL if so. */
   20599          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   20600 
   20601          IRTemp kondT = newTemp(Ity_I32);
   20602          assign( kondT, mk_armg_calculate_condition(cond) );
   20603          stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
   20604                             Ijk_Boring,
   20605                             IRConst_U32(dst | 1/*CPSR.T*/),
   20606                             OFFB_R15T ));
   20607          llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 4)
   20608                               | 1 /*CPSR.T*/ ));
   20609          dres.jk_StopHere = Ijk_Boring;
   20610          dres.whatNext    = Dis_StopHere;
   20611          DIP("b%s.w 0x%x\n", nCC(cond), dst);
   20612          goto decode_success;
   20613       }
   20614    }
   20615 
   20616    /* ---------------- (T4) B.W label ---------------- */
   20617    /* ... whereas this variant doesn't carry its own condition, so it
   20618       has to be either unconditional or the conditional by virtue of
   20619       being the last in an IT block.  The upside is that there's 4
   20620       more bits available for the jump offset, so it has a 16-times
   20621       greater branch range than the T3 variant. */
   20622    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   20623        && INSN1(15,14) == BITS2(1,0)
   20624        && INSN1(12,12) == 1) {
   20625       if (1) {
   20626          UInt bS  = INSN0(10,10);
   20627          UInt bJ1 = INSN1(13,13);
   20628          UInt bJ2 = INSN1(11,11);
   20629          UInt bI1 = 1 ^ (bJ1 ^ bS);
   20630          UInt bI2 = 1 ^ (bJ2 ^ bS);
   20631          Int simm25
   20632             =   (bS          << (1 + 1 + 10 + 11 + 1))
   20633               | (bI1         << (1 + 10 + 11 + 1))
   20634               | (bI2         << (10 + 11 + 1))
   20635               | (INSN0(9,0)  << (11 + 1))
   20636               | (INSN1(10,0) << 1);
   20637          simm25 = (simm25 << 7) >> 7;
   20638 
   20639          vassert(0 == (guest_R15_curr_instr_notENC & 1));
   20640          UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
   20641 
   20642          /* If in an IT block, must be the last insn. */
   20643          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   20644 
   20645          // go uncond
   20646          mk_skip_over_T32_if_cond_is_false(condT);
   20647          condT = IRTemp_INVALID;
   20648          // now uncond
   20649 
   20650          // branch to dst
   20651          llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
   20652          dres.jk_StopHere = Ijk_Boring;
   20653          dres.whatNext    = Dis_StopHere;
   20654          DIP("b.w 0x%x\n", dst);
   20655          goto decode_success;
   20656       }
   20657    }
   20658 
   20659    /* ------------------ TBB, TBH ------------------ */
   20660    if (INSN0(15,4) == 0xE8D && INSN1(15,5) == 0x780) {
   20661       UInt rN = INSN0(3,0);
   20662       UInt rM = INSN1(3,0);
   20663       UInt bH = INSN1(4,4);
   20664       if (bH/*ATC*/ || (rN != 13 && !isBadRegT(rM))) {
   20665          /* Must be last or not-in IT block */
   20666          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   20667          /* Go uncond */
   20668          mk_skip_over_T32_if_cond_is_false(condT);
   20669          condT = IRTemp_INVALID;
   20670 
   20671          IRExpr* ea
   20672              = binop(Iop_Add32,
   20673                      getIRegT(rN),
   20674                      bH ? binop(Iop_Shl32, getIRegT(rM), mkU8(1))
   20675                         : getIRegT(rM));
   20676 
   20677          IRTemp delta = newTemp(Ity_I32);
   20678          if (bH) {
   20679             assign(delta, unop(Iop_16Uto32, loadLE(Ity_I16, ea)));
   20680          } else {
   20681             assign(delta, unop(Iop_8Uto32, loadLE(Ity_I8, ea)));
   20682          }
   20683 
   20684          llPutIReg(
   20685             15,
   20686             binop(Iop_Or32,
   20687                   binop(Iop_Add32,
   20688                         getIRegT(15),
   20689                         binop(Iop_Shl32, mkexpr(delta), mkU8(1))
   20690                   ),
   20691                   mkU32(1)
   20692          ));
   20693          dres.jk_StopHere = Ijk_Boring;
   20694          dres.whatNext    = Dis_StopHere;
   20695          DIP("tb%c [r%u, r%u%s]\n",
   20696              bH ? 'h' : 'b', rN, rM, bH ? ", LSL #1" : "");
   20697          goto decode_success;
   20698       }
   20699    }
   20700 
   20701    /* ------------------ UBFX ------------------ */
   20702    /* ------------------ SBFX ------------------ */
   20703    /* There's also ARM versions of same, but it doesn't seem worth the
   20704       hassle to common up the handling (it's only a couple of C
   20705       statements). */
   20706    if ((INSN0(15,4) == 0xF3C // UBFX
   20707         || INSN0(15,4) == 0xF34) // SBFX
   20708        && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
   20709       UInt rN  = INSN0(3,0);
   20710       UInt rD  = INSN1(11,8);
   20711       UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
   20712       UInt wm1 = INSN1(4,0);
   20713       UInt msb =  lsb + wm1;
   20714       if (!isBadRegT(rD) && !isBadRegT(rN) && msb <= 31) {
   20715          Bool   isU  = INSN0(15,4) == 0xF3C;
   20716          IRTemp src  = newTemp(Ity_I32);
   20717          IRTemp tmp  = newTemp(Ity_I32);
   20718          IRTemp res  = newTemp(Ity_I32);
   20719          UInt   mask = ((1 << wm1) - 1) + (1 << wm1);
   20720          vassert(msb >= 0 && msb <= 31);
   20721          vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
   20722 
   20723          assign(src, getIRegT(rN));
   20724          assign(tmp, binop(Iop_And32,
   20725                            binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
   20726                            mkU32(mask)));
   20727          assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
   20728                            binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
   20729                            mkU8(31-wm1)));
   20730 
   20731          putIRegT(rD, mkexpr(res), condT);
   20732 
   20733          DIP("%s r%u, r%u, #%u, #%u\n",
   20734              isU ? "ubfx" : "sbfx", rD, rN, lsb, wm1 + 1);
   20735          goto decode_success;
   20736       }
   20737    }
   20738 
   20739    /* ------------------ UXTB ------------------ */
   20740    /* ------------------ UXTH ------------------ */
   20741    /* ------------------ SXTB ------------------ */
   20742    /* ------------------ SXTH ------------------ */
   20743    /* ----------------- UXTB16 ----------------- */
   20744    /* ----------------- SXTB16 ----------------- */
   20745    /* FIXME: this is an exact duplicate of the ARM version.  They
   20746       should be commoned up. */
   20747    if ((INSN0(15,0) == 0xFA5F     // UXTB
   20748         || INSN0(15,0) == 0xFA1F  // UXTH
   20749         || INSN0(15,0) == 0xFA4F  // SXTB
   20750         || INSN0(15,0) == 0xFA0F  // SXTH
   20751         || INSN0(15,0) == 0xFA3F  // UXTB16
   20752         || INSN0(15,0) == 0xFA2F) // SXTB16
   20753        && INSN1(15,12) == BITS4(1,1,1,1)
   20754        && INSN1(7,6) == BITS2(1,0)) {
   20755       UInt rD = INSN1(11,8);
   20756       UInt rM = INSN1(3,0);
   20757       UInt rot = INSN1(5,4);
   20758       if (!isBadRegT(rD) && !isBadRegT(rM)) {
   20759          const HChar* nm = "???";
   20760          IRTemp srcT = newTemp(Ity_I32);
   20761          IRTemp rotT = newTemp(Ity_I32);
   20762          IRTemp dstT = newTemp(Ity_I32);
   20763          assign(srcT, getIRegT(rM));
   20764          assign(rotT, genROR32(srcT, 8 * rot));
   20765          switch (INSN0(15,0)) {
   20766             case 0xFA5F: // UXTB
   20767                nm = "uxtb";
   20768                assign(dstT, unop(Iop_8Uto32,
   20769                                  unop(Iop_32to8, mkexpr(rotT))));
   20770                break;
   20771             case 0xFA1F: // UXTH
   20772                nm = "uxth";
   20773                assign(dstT, unop(Iop_16Uto32,
   20774                                  unop(Iop_32to16, mkexpr(rotT))));
   20775                break;
   20776             case 0xFA4F: // SXTB
   20777                nm = "sxtb";
   20778                assign(dstT, unop(Iop_8Sto32,
   20779                                  unop(Iop_32to8, mkexpr(rotT))));
   20780                break;
   20781             case 0xFA0F: // SXTH
   20782                nm = "sxth";
   20783                assign(dstT, unop(Iop_16Sto32,
   20784                                  unop(Iop_32to16, mkexpr(rotT))));
   20785                break;
   20786             case 0xFA3F: // UXTB16
   20787                nm = "uxtb16";
   20788                assign(dstT, binop(Iop_And32, mkexpr(rotT),
   20789                                              mkU32(0x00FF00FF)));
   20790                break;
   20791             case 0xFA2F: { // SXTB16
   20792                nm = "sxtb16";
   20793                IRTemp lo32 = newTemp(Ity_I32);
   20794                IRTemp hi32 = newTemp(Ity_I32);
   20795                assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
   20796                assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
   20797                assign(
   20798                   dstT,
   20799                   binop(Iop_Or32,
   20800                         binop(Iop_And32,
   20801                               unop(Iop_8Sto32,
   20802                                    unop(Iop_32to8, mkexpr(lo32))),
   20803                               mkU32(0xFFFF)),
   20804                         binop(Iop_Shl32,
   20805                               unop(Iop_8Sto32,
   20806                                    unop(Iop_32to8, mkexpr(hi32))),
   20807                               mkU8(16))
   20808                ));
   20809                break;
   20810             }
   20811             default:
   20812                vassert(0);
   20813          }
   20814          putIRegT(rD, mkexpr(dstT), condT);
   20815          DIP("%s r%u, r%u, ror #%u\n", nm, rD, rM, 8 * rot);
   20816          goto decode_success;
   20817       }
   20818    }
   20819 
   20820    /* -------------- MUL.W Rd, Rn, Rm -------------- */
   20821    if (INSN0(15,4) == 0xFB0
   20822        && (INSN1(15,0) & 0xF0F0) == 0xF000) {
   20823       UInt rN = INSN0(3,0);
   20824       UInt rD = INSN1(11,8);
   20825       UInt rM = INSN1(3,0);
   20826       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   20827          IRTemp res = newTemp(Ity_I32);
   20828          assign(res, binop(Iop_Mul32, getIRegT(rN), getIRegT(rM)));
   20829          putIRegT(rD, mkexpr(res), condT);
   20830          DIP("mul.w r%u, r%u, r%u\n", rD, rN, rM);
   20831          goto decode_success;
   20832       }
   20833    }
   20834 
   20835    /* -------------- SDIV.W Rd, Rn, Rm -------------- */
   20836    if (INSN0(15,4) == 0xFB9
   20837        && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
   20838       UInt rN = INSN0(3,0);
   20839       UInt rD = INSN1(11,8);
   20840       UInt rM = INSN1(3,0);
   20841       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   20842          IRTemp res  = newTemp(Ity_I32);
   20843          IRTemp argL = newTemp(Ity_I32);
   20844          IRTemp argR = newTemp(Ity_I32);
   20845          assign(argL, getIRegT(rN));
   20846          assign(argR, getIRegT(rM));
   20847          assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
   20848          putIRegT(rD, mkexpr(res), condT);
   20849          DIP("sdiv.w r%u, r%u, r%u\n", rD, rN, rM);
   20850          goto decode_success;
   20851       }
   20852    }
   20853 
   20854    /* -------------- UDIV.W Rd, Rn, Rm -------------- */
   20855    if (INSN0(15,4) == 0xFBB
   20856        && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
   20857       UInt rN = INSN0(3,0);
   20858       UInt rD = INSN1(11,8);
   20859       UInt rM = INSN1(3,0);
   20860       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   20861          IRTemp res  = newTemp(Ity_I32);
   20862          IRTemp argL = newTemp(Ity_I32);
   20863          IRTemp argR = newTemp(Ity_I32);
   20864          assign(argL, getIRegT(rN));
   20865          assign(argR, getIRegT(rM));
   20866          assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
   20867          putIRegT(rD, mkexpr(res), condT);
   20868          DIP("udiv.w r%u, r%u, r%u\n", rD, rN, rM);
   20869          goto decode_success;
   20870       }
   20871    }
   20872 
   20873    /* ------------------ {U,S}MULL ------------------ */
   20874    if ((INSN0(15,4) == 0xFB8 || INSN0(15,4) == 0xFBA)
   20875        && INSN1(7,4) == BITS4(0,0,0,0)) {
   20876       UInt isU  = INSN0(5,5);
   20877       UInt rN   = INSN0(3,0);
   20878       UInt rDlo = INSN1(15,12);
   20879       UInt rDhi = INSN1(11,8);
   20880       UInt rM   = INSN1(3,0);
   20881       if (!isBadRegT(rDhi) && !isBadRegT(rDlo)
   20882           && !isBadRegT(rN) && !isBadRegT(rM) && rDlo != rDhi) {
   20883          IRTemp res   = newTemp(Ity_I64);
   20884          assign(res, binop(isU ? Iop_MullU32 : Iop_MullS32,
   20885                            getIRegT(rN), getIRegT(rM)));
   20886          putIRegT( rDhi, unop(Iop_64HIto32, mkexpr(res)), condT );
   20887          putIRegT( rDlo, unop(Iop_64to32, mkexpr(res)), condT );
   20888          DIP("%cmull r%u, r%u, r%u, r%u\n",
   20889              isU ? 'u' : 's', rDlo, rDhi, rN, rM);
   20890          goto decode_success;
   20891       }
   20892    }
   20893 
   20894    /* ------------------ ML{A,S} ------------------ */
   20895    if (INSN0(15,4) == 0xFB0
   20896        && (   INSN1(7,4) == BITS4(0,0,0,0)    // MLA
   20897            || INSN1(7,4) == BITS4(0,0,0,1))) { // MLS
   20898       UInt rN = INSN0(3,0);
   20899       UInt rA = INSN1(15,12);
   20900       UInt rD = INSN1(11,8);
   20901       UInt rM = INSN1(3,0);
   20902       if (!isBadRegT(rD) && !isBadRegT(rN)
   20903           && !isBadRegT(rM) && !isBadRegT(rA)) {
   20904          Bool   isMLA = INSN1(7,4) == BITS4(0,0,0,0);
   20905          IRTemp res   = newTemp(Ity_I32);
   20906          assign(res,
   20907                 binop(isMLA ? Iop_Add32 : Iop_Sub32,
   20908                       getIRegT(rA),
   20909                       binop(Iop_Mul32, getIRegT(rN), getIRegT(rM))));
   20910          putIRegT(rD, mkexpr(res), condT);
   20911          DIP("%s r%u, r%u, r%u, r%u\n",
   20912              isMLA ? "mla" : "mls", rD, rN, rM, rA);
   20913          goto decode_success;
   20914       }
   20915    }
   20916 
   20917    /* ------------------ (T3) ADR ------------------ */
   20918    if ((INSN0(15,0) == 0xF20F || INSN0(15,0) == 0xF60F)
   20919        && INSN1(15,15) == 0) {
   20920       /* rD = align4(PC) + imm32 */
   20921       UInt rD = INSN1(11,8);
   20922       if (!isBadRegT(rD)) {
   20923          UInt imm32 = (INSN0(10,10) << 11)
   20924                       | (INSN1(14,12) << 8) | INSN1(7,0);
   20925          putIRegT(rD, binop(Iop_Add32,
   20926                             binop(Iop_And32, getIRegT(15), mkU32(~3U)),
   20927                             mkU32(imm32)),
   20928                       condT);
   20929          DIP("add r%u, pc, #%u\n", rD, imm32);
   20930          goto decode_success;
   20931       }
   20932    }
   20933 
   20934    /* ----------------- (T1) UMLAL ----------------- */
   20935    /* ----------------- (T1) SMLAL ----------------- */
   20936    if ((INSN0(15,4) == 0xFBE // UMLAL
   20937         || INSN0(15,4) == 0xFBC) // SMLAL
   20938        && INSN1(7,4) == BITS4(0,0,0,0)) {
   20939       UInt rN   = INSN0(3,0);
   20940       UInt rDlo = INSN1(15,12);
   20941       UInt rDhi = INSN1(11,8);
   20942       UInt rM   = INSN1(3,0);
   20943       if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
   20944           && !isBadRegT(rM) && rDhi != rDlo) {
   20945          Bool   isS   = INSN0(15,4) == 0xFBC;
   20946          IRTemp argL  = newTemp(Ity_I32);
   20947          IRTemp argR  = newTemp(Ity_I32);
   20948          IRTemp old   = newTemp(Ity_I64);
   20949          IRTemp res   = newTemp(Ity_I64);
   20950          IRTemp resHi = newTemp(Ity_I32);
   20951          IRTemp resLo = newTemp(Ity_I32);
   20952          IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
   20953          assign( argL, getIRegT(rM));
   20954          assign( argR, getIRegT(rN));
   20955          assign( old, binop(Iop_32HLto64, getIRegT(rDhi), getIRegT(rDlo)) );
   20956          assign( res, binop(Iop_Add64,
   20957                             mkexpr(old),
   20958                             binop(mulOp, mkexpr(argL), mkexpr(argR))) );
   20959          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
   20960          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
   20961          putIRegT( rDhi, mkexpr(resHi), condT );
   20962          putIRegT( rDlo, mkexpr(resLo), condT );
   20963          DIP("%cmlal r%u, r%u, r%u, r%u\n",
   20964              isS ? 's' : 'u', rDlo, rDhi, rN, rM);
   20965          goto decode_success;
   20966       }
   20967    }
   20968 
   20969    /* ------------------ (T1) UMAAL ------------------ */
   20970    if (INSN0(15,4) == 0xFBE && INSN1(7,4) == BITS4(0,1,1,0)) {
   20971       UInt rN   = INSN0(3,0);
   20972       UInt rDlo = INSN1(15,12);
   20973       UInt rDhi = INSN1(11,8);
   20974       UInt rM   = INSN1(3,0);
   20975       if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
   20976           && !isBadRegT(rM) && rDhi != rDlo) {
   20977          IRTemp argN   = newTemp(Ity_I32);
   20978          IRTemp argM   = newTemp(Ity_I32);
   20979          IRTemp argDhi = newTemp(Ity_I32);
   20980          IRTemp argDlo = newTemp(Ity_I32);
   20981          IRTemp res    = newTemp(Ity_I64);
   20982          IRTemp resHi  = newTemp(Ity_I32);
   20983          IRTemp resLo  = newTemp(Ity_I32);
   20984          assign( argN,   getIRegT(rN) );
   20985          assign( argM,   getIRegT(rM) );
   20986          assign( argDhi, getIRegT(rDhi) );
   20987          assign( argDlo, getIRegT(rDlo) );
   20988          assign( res,
   20989                  binop(Iop_Add64,
   20990                        binop(Iop_Add64,
   20991                              binop(Iop_MullU32, mkexpr(argN), mkexpr(argM)),
   20992                              unop(Iop_32Uto64, mkexpr(argDhi))),
   20993                        unop(Iop_32Uto64, mkexpr(argDlo))) );
   20994          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
   20995          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
   20996          putIRegT( rDhi, mkexpr(resHi), condT );
   20997          putIRegT( rDlo, mkexpr(resLo), condT );
   20998          DIP("umaal r%u, r%u, r%u, r%u\n", rDlo, rDhi, rN, rM);
   20999          goto decode_success;
   21000       }
   21001    }
   21002 
   21003    /* ------------------- (T1) SMMUL{R} ------------------ */
   21004    if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
   21005        && INSN0(6,4) == BITS3(1,0,1)
   21006        && INSN1(15,12) == BITS4(1,1,1,1)
   21007        && INSN1(7,5) == BITS3(0,0,0)) {
   21008       UInt bitR = INSN1(4,4);
   21009       UInt rD = INSN1(11,8);
   21010       UInt rM = INSN1(3,0);
   21011       UInt rN = INSN0(3,0);
   21012       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   21013          IRExpr* res
   21014          = unop(Iop_64HIto32,
   21015                 binop(Iop_Add64,
   21016                       binop(Iop_MullS32, getIRegT(rN), getIRegT(rM)),
   21017                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
   21018          putIRegT(rD, res, condT);
   21019          DIP("smmul%s r%u, r%u, r%u\n",
   21020              bitR ? "r" : "", rD, rN, rM);
   21021          goto decode_success;
   21022       }
   21023    }
   21024 
   21025    /* ------------------- (T1) SMMLA{R} ------------------ */
   21026    if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
   21027        && INSN0(6,4) == BITS3(1,0,1)
   21028        && INSN1(7,5) == BITS3(0,0,0)) {
   21029       UInt bitR = INSN1(4,4);
   21030       UInt rA = INSN1(15,12);
   21031       UInt rD = INSN1(11,8);
   21032       UInt rM = INSN1(3,0);
   21033       UInt rN = INSN0(3,0);
   21034       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && (rA != 13)) {
   21035          IRExpr* res
   21036          = unop(Iop_64HIto32,
   21037                 binop(Iop_Add64,
   21038                       binop(Iop_Add64,
   21039                             binop(Iop_32HLto64, getIRegT(rA), mkU32(0)),
   21040                             binop(Iop_MullS32, getIRegT(rN), getIRegT(rM))),
   21041                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
   21042          putIRegT(rD, res, condT);
   21043          DIP("smmla%s r%u, r%u, r%u, r%u\n",
   21044              bitR ? "r" : "", rD, rN, rM, rA);
   21045          goto decode_success;
   21046       }
   21047    }
   21048 
   21049    /* ------------------ (T2) ADR ------------------ */
   21050    if ((INSN0(15,0) == 0xF2AF || INSN0(15,0) == 0xF6AF)
   21051        && INSN1(15,15) == 0) {
   21052       /* rD = align4(PC) - imm32 */
   21053       UInt rD = INSN1(11,8);
   21054       if (!isBadRegT(rD)) {
   21055          UInt imm32 = (INSN0(10,10) << 11)
   21056                       | (INSN1(14,12) << 8) | INSN1(7,0);
   21057          putIRegT(rD, binop(Iop_Sub32,
   21058                             binop(Iop_And32, getIRegT(15), mkU32(~3U)),
   21059                             mkU32(imm32)),
   21060                       condT);
   21061          DIP("sub r%u, pc, #%u\n", rD, imm32);
   21062          goto decode_success;
   21063       }
   21064    }
   21065 
   21066    /* ------------------- (T1) BFI ------------------- */
   21067    /* ------------------- (T1) BFC ------------------- */
   21068    if (INSN0(15,4) == 0xF36 && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
   21069       UInt rD  = INSN1(11,8);
   21070       UInt rN  = INSN0(3,0);
   21071       UInt msb = INSN1(4,0);
   21072       UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
   21073       if (isBadRegT(rD) || rN == 13 || msb < lsb) {
   21074          /* undecodable; fall through */
   21075       } else {
   21076          IRTemp src    = newTemp(Ity_I32);
   21077          IRTemp olddst = newTemp(Ity_I32);
   21078          IRTemp newdst = newTemp(Ity_I32);
   21079          UInt   mask = 1 << (msb - lsb);
   21080          mask = (mask - 1) + mask;
   21081          vassert(mask != 0); // guaranteed by "msb < lsb" check above
   21082          mask <<= lsb;
   21083 
   21084          assign(src, rN == 15 ? mkU32(0) : getIRegT(rN));
   21085          assign(olddst, getIRegT(rD));
   21086          assign(newdst,
   21087                 binop(Iop_Or32,
   21088                    binop(Iop_And32,
   21089                          binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
   21090                          mkU32(mask)),
   21091                    binop(Iop_And32,
   21092                          mkexpr(olddst),
   21093                          mkU32(~mask)))
   21094                );
   21095 
   21096          putIRegT(rD, mkexpr(newdst), condT);
   21097 
   21098          if (rN == 15) {
   21099             DIP("bfc r%u, #%u, #%u\n",
   21100                 rD, lsb, msb-lsb+1);
   21101          } else {
   21102             DIP("bfi r%u, r%u, #%u, #%u\n",
   21103                 rD, rN, lsb, msb-lsb+1);
   21104          }
   21105          goto decode_success;
   21106       }
   21107    }
   21108 
   21109    /* ------------------- (T1) SXTAH ------------------- */
   21110    /* ------------------- (T1) UXTAH ------------------- */
   21111    if ((INSN0(15,4) == 0xFA1      // UXTAH
   21112         || INSN0(15,4) == 0xFA0)  // SXTAH
   21113        && INSN1(15,12) == BITS4(1,1,1,1)
   21114        && INSN1(7,6) == BITS2(1,0)) {
   21115       Bool isU = INSN0(15,4) == 0xFA1;
   21116       UInt rN  = INSN0(3,0);
   21117       UInt rD  = INSN1(11,8);
   21118       UInt rM  = INSN1(3,0);
   21119       UInt rot = INSN1(5,4);
   21120       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   21121          IRTemp srcL = newTemp(Ity_I32);
   21122          IRTemp srcR = newTemp(Ity_I32);
   21123          IRTemp res  = newTemp(Ity_I32);
   21124          assign(srcR, getIRegT(rM));
   21125          assign(srcL, getIRegT(rN));
   21126          assign(res,  binop(Iop_Add32,
   21127                             mkexpr(srcL),
   21128                             unop(isU ? Iop_16Uto32 : Iop_16Sto32,
   21129                                  unop(Iop_32to16,
   21130                                       genROR32(srcR, 8 * rot)))));
   21131          putIRegT(rD, mkexpr(res), condT);
   21132          DIP("%cxtah r%u, r%u, r%u, ror #%u\n",
   21133              isU ? 'u' : 's', rD, rN, rM, rot);
   21134          goto decode_success;
   21135       }
   21136    }
   21137 
   21138    /* ------------------- (T1) SXTAB ------------------- */
   21139    /* ------------------- (T1) UXTAB ------------------- */
   21140    if ((INSN0(15,4) == 0xFA5      // UXTAB
   21141         || INSN0(15,4) == 0xFA4)  // SXTAB
   21142        && INSN1(15,12) == BITS4(1,1,1,1)
   21143        && INSN1(7,6) == BITS2(1,0)) {
   21144       Bool isU = INSN0(15,4) == 0xFA5;
   21145       UInt rN  = INSN0(3,0);
   21146       UInt rD  = INSN1(11,8);
   21147       UInt rM  = INSN1(3,0);
   21148       UInt rot = INSN1(5,4);
   21149       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   21150          IRTemp srcL = newTemp(Ity_I32);
   21151          IRTemp srcR = newTemp(Ity_I32);
   21152          IRTemp res  = newTemp(Ity_I32);
   21153          assign(srcR, getIRegT(rM));
   21154          assign(srcL, getIRegT(rN));
   21155          assign(res,  binop(Iop_Add32,
   21156                             mkexpr(srcL),
   21157                             unop(isU ? Iop_8Uto32 : Iop_8Sto32,
   21158                                  unop(Iop_32to8,
   21159                                       genROR32(srcR, 8 * rot)))));
   21160          putIRegT(rD, mkexpr(res), condT);
   21161          DIP("%cxtab r%u, r%u, r%u, ror #%u\n",
   21162              isU ? 'u' : 's', rD, rN, rM, rot);
   21163          goto decode_success;
   21164       }
   21165    }
   21166 
   21167    /* ------------------- (T1) CLZ ------------------- */
   21168    if (INSN0(15,4) == 0xFAB
   21169        && INSN1(15,12) == BITS4(1,1,1,1)
   21170        && INSN1(7,4) == BITS4(1,0,0,0)) {
   21171       UInt rM1 = INSN0(3,0);
   21172       UInt rD  = INSN1(11,8);
   21173       UInt rM2 = INSN1(3,0);
   21174       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
   21175          IRTemp arg = newTemp(Ity_I32);
   21176          IRTemp res = newTemp(Ity_I32);
   21177          assign(arg, getIRegT(rM1));
   21178          assign(res, IRExpr_ITE(
   21179                         binop(Iop_CmpEQ32, mkexpr(arg), mkU32(0)),
   21180                         mkU32(32),
   21181                         unop(Iop_Clz32, mkexpr(arg))
   21182          ));
   21183          putIRegT(rD, mkexpr(res), condT);
   21184          DIP("clz r%u, r%u\n", rD, rM1);
   21185          goto decode_success;
   21186       }
   21187    }
   21188 
   21189    /* ------------------- (T1) RBIT ------------------- */
   21190    if (INSN0(15,4) == 0xFA9
   21191        && INSN1(15,12) == BITS4(1,1,1,1)
   21192        && INSN1(7,4) == BITS4(1,0,1,0)) {
   21193       UInt rM1 = INSN0(3,0);
   21194       UInt rD  = INSN1(11,8);
   21195       UInt rM2 = INSN1(3,0);
   21196       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
   21197          IRTemp arg = newTemp(Ity_I32);
   21198          assign(arg, getIRegT(rM1));
   21199          IRTemp res = gen_BITREV(arg);
   21200          putIRegT(rD, mkexpr(res), condT);
   21201          DIP("rbit r%u, r%u\n", rD, rM1);
   21202          goto decode_success;
   21203       }
   21204    }
   21205 
   21206    /* ------------------- (T2) REV   ------------------- */
   21207    /* ------------------- (T2) REV16 ------------------- */
   21208    if (INSN0(15,4) == 0xFA9
   21209        && INSN1(15,12) == BITS4(1,1,1,1)
   21210        && (   INSN1(7,4) == BITS4(1,0,0,0)     // REV
   21211            || INSN1(7,4) == BITS4(1,0,0,1))) { // REV16
   21212       UInt rM1   = INSN0(3,0);
   21213       UInt rD    = INSN1(11,8);
   21214       UInt rM2   = INSN1(3,0);
   21215       Bool isREV = INSN1(7,4) == BITS4(1,0,0,0);
   21216       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
   21217          IRTemp arg = newTemp(Ity_I32);
   21218          assign(arg, getIRegT(rM1));
   21219          IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
   21220          putIRegT(rD, mkexpr(res), condT);
   21221          DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM1);
   21222          goto decode_success;
   21223       }
   21224    }
   21225 
   21226    /* ------------------- (T2) REVSH ------------------ */
   21227    if (INSN0(15,4) == 0xFA9
   21228        && INSN1(15,12) == BITS4(1,1,1,1)
   21229        && INSN1(7,4) == BITS4(1,0,1,1)) {
   21230       UInt rM1 = INSN0(3,0);
   21231       UInt rM2 = INSN1(3,0);
   21232       UInt rD  = INSN1(11,8);
   21233       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
   21234          IRTemp irt_rM  = newTemp(Ity_I32);
   21235          IRTemp irt_hi  = newTemp(Ity_I32);
   21236          IRTemp irt_low = newTemp(Ity_I32);
   21237          IRTemp irt_res = newTemp(Ity_I32);
   21238          assign(irt_rM, getIRegT(rM1));
   21239          assign(irt_hi,
   21240                 binop(Iop_Sar32,
   21241                       binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
   21242                       mkU8(16)
   21243                 )
   21244          );
   21245          assign(irt_low,
   21246                 binop(Iop_And32,
   21247                       binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
   21248                       mkU32(0xFF)
   21249                 )
   21250          );
   21251          assign(irt_res,
   21252                 binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
   21253          );
   21254          putIRegT(rD, mkexpr(irt_res), condT);
   21255          DIP("revsh r%u, r%u\n", rD, rM1);
   21256          goto decode_success;
   21257       }
   21258    }
   21259 
   21260    /* -------------- (T1) MSR apsr, reg -------------- */
   21261    if (INSN0(15,4) == 0xF38
   21262        && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(9,0) == 0x000) {
   21263       UInt rN          = INSN0(3,0);
   21264       UInt write_ge    = INSN1(10,10);
   21265       UInt write_nzcvq = INSN1(11,11);
   21266       if (!isBadRegT(rN) && (write_nzcvq || write_ge)) {
   21267          IRTemp rNt = newTemp(Ity_I32);
   21268          assign(rNt, getIRegT(rN));
   21269          desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
   21270          DIP("msr cpsr_%s%s, r%u\n",
   21271              write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
   21272          goto decode_success;
   21273       }
   21274    }
   21275 
   21276    /* -------------- (T1) MRS reg, apsr -------------- */
   21277    if (INSN0(15,0) == 0xF3EF
   21278        && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(7,0) == 0x00) {
   21279       UInt rD = INSN1(11,8);
   21280       if (!isBadRegT(rD)) {
   21281          IRTemp apsr = synthesise_APSR();
   21282          putIRegT( rD, mkexpr(apsr), condT );
   21283          DIP("mrs r%u, cpsr\n", rD);
   21284          goto decode_success;
   21285       }
   21286    }
   21287 
   21288    /* ----------------- (T1) LDREX ----------------- */
   21289    if (INSN0(15,4) == 0xE85 && INSN1(11,8) == BITS4(1,1,1,1)) {
   21290       UInt rN   = INSN0(3,0);
   21291       UInt rT   = INSN1(15,12);
   21292       UInt imm8 = INSN1(7,0);
   21293       if (!isBadRegT(rT) && rN != 15) {
   21294          IRTemp res;
   21295          // go uncond
   21296          mk_skip_over_T32_if_cond_is_false( condT );
   21297          // now uncond
   21298          res = newTemp(Ity_I32);
   21299          stmt( IRStmt_LLSC(Iend_LE,
   21300                            res,
   21301                            binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
   21302                            NULL/*this is a load*/ ));
   21303          putIRegT(rT, mkexpr(res), IRTemp_INVALID);
   21304          DIP("ldrex r%u, [r%u, #+%u]\n", rT, rN, imm8 * 4);
   21305          goto decode_success;
   21306       }
   21307    }
   21308 
   21309    /* --------------- (T1) LDREX{B,H} --------------- */
   21310    if (INSN0(15,4) == 0xE8D
   21311        && (INSN1(11,0) == 0xF4F || INSN1(11,0) == 0xF5F)) {
   21312       UInt rN  = INSN0(3,0);
   21313       UInt rT  = INSN1(15,12);
   21314       Bool isH = INSN1(11,0) == 0xF5F;
   21315       if (!isBadRegT(rT) && rN != 15) {
   21316          IRTemp res;
   21317          // go uncond
   21318          mk_skip_over_T32_if_cond_is_false( condT );
   21319          // now uncond
   21320          res = newTemp(isH ? Ity_I16 : Ity_I8);
   21321          stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
   21322                            NULL/*this is a load*/ ));
   21323          putIRegT(rT, unop(isH ? Iop_16Uto32 : Iop_8Uto32, mkexpr(res)),
   21324                       IRTemp_INVALID);
   21325          DIP("ldrex%c r%u, [r%u]\n", isH ? 'h' : 'b', rT, rN);
   21326          goto decode_success;
   21327       }
   21328    }
   21329 
   21330    /* --------------- (T1) LDREXD --------------- */
   21331    if (INSN0(15,4) == 0xE8D && INSN1(7,0) == 0x7F) {
   21332       UInt rN  = INSN0(3,0);
   21333       UInt rT  = INSN1(15,12);
   21334       UInt rT2 = INSN1(11,8);
   21335       if (!isBadRegT(rT) && !isBadRegT(rT2) && rT != rT2 && rN != 15) {
   21336          IRTemp res;
   21337          // go uncond
   21338          mk_skip_over_T32_if_cond_is_false( condT );
   21339          // now uncond
   21340          res = newTemp(Ity_I64);
   21341          // FIXME: assumes little-endian guest
   21342          stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
   21343                            NULL/*this is a load*/ ));
   21344          // FIXME: assumes little-endian guest
   21345          putIRegT(rT,  unop(Iop_64to32,   mkexpr(res)), IRTemp_INVALID);
   21346          putIRegT(rT2, unop(Iop_64HIto32, mkexpr(res)), IRTemp_INVALID);
   21347          DIP("ldrexd r%u, r%u, [r%u]\n", rT, rT2, rN);
   21348          goto decode_success;
   21349       }
   21350    }
   21351 
   21352    /* ----------------- (T1) STREX ----------------- */
   21353    if (INSN0(15,4) == 0xE84) {
   21354       UInt rN   = INSN0(3,0);
   21355       UInt rT   = INSN1(15,12);
   21356       UInt rD   = INSN1(11,8);
   21357       UInt imm8 = INSN1(7,0);
   21358       if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15
   21359           && rD != rN && rD != rT) {
   21360          IRTemp resSC1, resSC32;
   21361          // go uncond
   21362          mk_skip_over_T32_if_cond_is_false( condT );
   21363          // now uncond
   21364          /* Ok, now we're unconditional.  Do the store. */
   21365          resSC1 = newTemp(Ity_I1);
   21366          stmt( IRStmt_LLSC(Iend_LE,
   21367                            resSC1,
   21368                            binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
   21369                            getIRegT(rT)) );
   21370          /* Set rD to 1 on failure, 0 on success.  Currently we have
   21371             resSC1 == 0 on failure, 1 on success. */
   21372          resSC32 = newTemp(Ity_I32);
   21373          assign(resSC32,
   21374                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
   21375          putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
   21376          DIP("strex r%u, r%u, [r%u, #+%u]\n", rD, rT, rN, imm8 * 4);
   21377          goto decode_success;
   21378       }
   21379    }
   21380 
   21381    /* --------------- (T1) STREX{B,H} --------------- */
   21382    if (INSN0(15,4) == 0xE8C
   21383        && (INSN1(11,4) == 0xF4 || INSN1(11,4) == 0xF5)) {
   21384       UInt rN  = INSN0(3,0);
   21385       UInt rT  = INSN1(15,12);
   21386       UInt rD  = INSN1(3,0);
   21387       Bool isH = INSN1(11,4) == 0xF5;
   21388       if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15
   21389           && rD != rN && rD != rT) {
   21390          IRTemp resSC1, resSC32;
   21391          // go uncond
   21392          mk_skip_over_T32_if_cond_is_false( condT );
   21393          // now uncond
   21394          /* Ok, now we're unconditional.  Do the store. */
   21395          resSC1 = newTemp(Ity_I1);
   21396          stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN),
   21397                            unop(isH ? Iop_32to16 : Iop_32to8,
   21398                                 getIRegT(rT))) );
   21399          /* Set rD to 1 on failure, 0 on success.  Currently we have
   21400             resSC1 == 0 on failure, 1 on success. */
   21401          resSC32 = newTemp(Ity_I32);
   21402          assign(resSC32,
   21403                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
   21404          putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
   21405          DIP("strex%c r%u, r%u, [r%u]\n", isH ? 'h' : 'b', rD, rT, rN);
   21406          goto decode_success;
   21407       }
   21408    }
   21409 
   21410    /* ---------------- (T1) STREXD ---------------- */
   21411    if (INSN0(15,4) == 0xE8C && INSN1(7,4) == BITS4(0,1,1,1)) {
   21412       UInt rN  = INSN0(3,0);
   21413       UInt rT  = INSN1(15,12);
   21414       UInt rT2 = INSN1(11,8);
   21415       UInt rD  = INSN1(3,0);
   21416       if (!isBadRegT(rD) && !isBadRegT(rT) && !isBadRegT(rT2)
   21417           && rN != 15 && rD != rN && rD != rT && rD != rT) {
   21418          IRTemp resSC1, resSC32, data;
   21419          // go uncond
   21420          mk_skip_over_T32_if_cond_is_false( condT );
   21421          // now uncond
   21422          /* Ok, now we're unconditional.  Do the store. */
   21423          resSC1 = newTemp(Ity_I1);
   21424          data = newTemp(Ity_I64);
   21425          // FIXME: assumes little-endian guest
   21426          assign(data, binop(Iop_32HLto64, getIRegT(rT2), getIRegT(rT)));
   21427          // FIXME: assumes little-endian guest
   21428          stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN), mkexpr(data)));
   21429          /* Set rD to 1 on failure, 0 on success.  Currently we have
   21430             resSC1 == 0 on failure, 1 on success. */
   21431          resSC32 = newTemp(Ity_I32);
   21432          assign(resSC32,
   21433                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
   21434          putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
   21435          DIP("strexd r%u, r%u, r%u, [r%u]\n", rD, rT, rT2, rN);
   21436          goto decode_success;
   21437       }
   21438    }
   21439 
   21440    /* -------------- v7 barrier insns -------------- */
   21441    if (INSN0(15,0) == 0xF3BF && (INSN1(15,0) & 0xFF00) == 0x8F00) {
   21442       /* FIXME: should this be unconditional? */
   21443       /* XXX this isn't really right, is it?  The generated IR does
   21444          them unconditionally.  I guess it doesn't matter since it
   21445          doesn't do any harm to do them even when the guarding
   21446          condition is false -- it's just a performance loss. */
   21447       switch (INSN1(7,0)) {
   21448          case 0x4F: /* DSB sy */
   21449          case 0x4E: /* DSB st */
   21450          case 0x4B: /* DSB ish */
   21451          case 0x4A: /* DSB ishst */
   21452          case 0x47: /* DSB nsh */
   21453          case 0x46: /* DSB nshst */
   21454          case 0x43: /* DSB osh */
   21455          case 0x42: /* DSB oshst */
   21456             stmt( IRStmt_MBE(Imbe_Fence) );
   21457             DIP("DSB\n");
   21458             goto decode_success;
   21459          case 0x5F: /* DMB sy */
   21460          case 0x5E: /* DMB st */
   21461          case 0x5B: /* DMB ish */
   21462          case 0x5A: /* DMB ishst */
   21463          case 0x57: /* DMB nsh */
   21464          case 0x56: /* DMB nshst */
   21465          case 0x53: /* DMB osh */
   21466          case 0x52: /* DMB oshst */
   21467             stmt( IRStmt_MBE(Imbe_Fence) );
   21468             DIP("DMB\n");
   21469             goto decode_success;
   21470          case 0x6F: /* ISB */
   21471             stmt( IRStmt_MBE(Imbe_Fence) );
   21472             DIP("ISB\n");
   21473             goto decode_success;
   21474          default:
   21475             break;
   21476       }
   21477    }
   21478 
   21479    /* ---------------------- PLD{,W} ---------------------- */
   21480    if ((INSN0(15,4) & 0xFFD) == 0xF89 && INSN1(15,12) == 0xF) {
   21481       /* FIXME: should this be unconditional? */
   21482       /* PLD/PLDW immediate, encoding T1 */
   21483       UInt rN    = INSN0(3,0);
   21484       UInt bW    = INSN0(5,5);
   21485       UInt imm12 = INSN1(11,0);
   21486       DIP("pld%s [r%u, #%u]\n", bW ? "w" : "",  rN, imm12);
   21487       goto decode_success;
   21488    }
   21489 
   21490    if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,8) == 0xFC) {
   21491       /* FIXME: should this be unconditional? */
   21492       /* PLD/PLDW immediate, encoding T2 */
   21493       UInt rN    = INSN0(3,0);
   21494       UInt bW    = INSN0(5,5);
   21495       UInt imm8  = INSN1(7,0);
   21496       DIP("pld%s [r%u, #-%u]\n", bW ? "w" : "",  rN, imm8);
   21497       goto decode_success;
   21498    }
   21499 
   21500    if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,6) == 0x3C0) {
   21501       /* FIXME: should this be unconditional? */
   21502       /* PLD/PLDW register, encoding T1 */
   21503       UInt rN   = INSN0(3,0);
   21504       UInt rM   = INSN1(3,0);
   21505       UInt bW   = INSN0(5,5);
   21506       UInt imm2 = INSN1(5,4);
   21507       if (!isBadRegT(rM)) {
   21508          DIP("pld%s [r%u, r%u, lsl %d]\n", bW ? "w" : "", rN, rM, imm2);
   21509          goto decode_success;
   21510       }
   21511       /* fall through */
   21512    }
   21513 
   21514    /* -------------- read CP15 TPIDRURO register ------------- */
   21515    /* mrc     p15, 0,  r0, c13, c0, 3  up to
   21516       mrc     p15, 0, r14, c13, c0, 3
   21517    */
   21518    /* I don't know whether this is really v7-only.  But anyway, we
   21519       have to support it since arm-linux uses TPIDRURO as a thread
   21520       state register. */
   21521    if ((INSN0(15,0) == 0xEE1D) && (INSN1(11,0) == 0x0F70)) {
   21522       /* FIXME: should this be unconditional? */
   21523       UInt rD = INSN1(15,12);
   21524       if (!isBadRegT(rD)) {
   21525          putIRegT(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32), IRTemp_INVALID);
   21526          DIP("mrc p15,0, r%u, c13, c0, 3\n", rD);
   21527          goto decode_success;
   21528       }
   21529       /* fall through */
   21530    }
   21531 
   21532    /* ------------------- CLREX ------------------ */
   21533    if (INSN0(15,0) == 0xF3BF && INSN1(15,0) == 0x8F2F) {
   21534       /* AFAICS, this simply cancels a (all?) reservations made by a
   21535          (any?) preceding LDREX(es).  Arrange to hand it through to
   21536          the back end. */
   21537       mk_skip_over_T32_if_cond_is_false( condT );
   21538       stmt( IRStmt_MBE(Imbe_CancelReservation) );
   21539       DIP("clrex\n");
   21540       goto decode_success;
   21541    }
   21542 
   21543    /* ------------------- NOP ------------------ */
   21544    if (INSN0(15,0) == 0xF3AF && INSN1(15,0) == 0x8000) {
   21545       DIP("nop\n");
   21546       goto decode_success;
   21547    }
   21548 
   21549    /* -------------- (T1) LDRT reg+#imm8 -------------- */
   21550    /* Load Register Unprivileged:
   21551       ldrt Rt, [Rn, #imm8]
   21552    */
   21553    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,1) && INSN0(5,4) == BITS2(0,1)
   21554        && INSN1(11,8) == BITS4(1,1,1,0)) {
   21555       UInt rT    = INSN1(15,12);
   21556       UInt rN    = INSN0(3,0);
   21557       UInt imm8  = INSN1(7,0);
   21558       Bool valid = True;
   21559       if (rN == 15 || isBadRegT(rT)) valid = False;
   21560       if (valid) {
   21561          put_ITSTATE(old_itstate);
   21562          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   21563          IRTemp newRt = newTemp(Ity_I32);
   21564          loadGuardedLE( newRt, ILGop_Ident32, ea, llGetIReg(rT), condT );
   21565          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   21566          put_ITSTATE(new_itstate);
   21567          DIP("ldrt r%u, [r%u, #%u]\n", rT, rN, imm8);
   21568          goto decode_success;
   21569       }
   21570    }
   21571 
   21572    /* -------------- (T1) STRT reg+#imm8 -------------- */
   21573    /* Store Register Unprivileged:
   21574       strt Rt, [Rn, #imm8]
   21575    */
   21576    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,1) && INSN0(5,4) == BITS2(0,0)
   21577        && INSN1(11,8) == BITS4(1,1,1,0)) {
   21578       UInt rT    = INSN1(15,12);
   21579       UInt rN    = INSN0(3,0);
   21580       UInt imm8  = INSN1(7,0);
   21581       Bool valid = True;
   21582       if (rN == 15 || isBadRegT(rT)) valid = False;
   21583       if (valid) {
   21584          put_ITSTATE(old_itstate);
   21585          IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   21586          storeGuardedLE( address, llGetIReg(rT), condT );
   21587          put_ITSTATE(new_itstate);
   21588          DIP("strt r%u, [r%u, #%u]\n", rT, rN, imm8);
   21589          goto decode_success;
   21590       }
   21591    }
   21592 
   21593    /* -------------- (T1) STRBT reg+#imm8 -------------- */
   21594    /* Store Register Byte Unprivileged:
   21595       strbt Rt, [Rn, #imm8]
   21596    */
   21597    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(0,0)
   21598        && INSN1(11,8) == BITS4(1,1,1,0)) {
   21599       UInt rT    = INSN1(15,12);
   21600       UInt rN    = INSN0(3,0);
   21601       UInt imm8  = INSN1(7,0);
   21602       Bool valid = True;
   21603       if (rN == 15 || isBadRegT(rT)) valid = False;
   21604       if (valid) {
   21605          put_ITSTATE(old_itstate);
   21606          IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   21607          IRExpr* data = unop(Iop_32to8, llGetIReg(rT));
   21608          storeGuardedLE( address, data, condT );
   21609          put_ITSTATE(new_itstate);
   21610          DIP("strbt r%u, [r%u, #%u]\n", rT, rN, imm8);
   21611          goto decode_success;
   21612       }
   21613    }
   21614 
   21615    /* -------------- (T1) LDRHT reg+#imm8 -------------- */
   21616    /* Load Register Halfword Unprivileged:
   21617       ldrht Rt, [Rn, #imm8]
   21618    */
   21619    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(1,1)
   21620        && INSN1(11,8) == BITS4(1,1,1,0)) {
   21621       UInt rN    = INSN0(3,0);
   21622       Bool valid = True;
   21623       if (rN == 15) {
   21624          /* In this case our instruction is LDRH (literal), in fact:
   21625             LDRH (literal) was realized earlier, so we don't want to
   21626             make it twice. */
   21627          valid = False;
   21628       }
   21629       UInt rT    = INSN1(15,12);
   21630       UInt imm8  = INSN1(7,0);
   21631       if (isBadRegT(rT)) valid = False;
   21632       if (valid) {
   21633          put_ITSTATE(old_itstate);
   21634          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   21635          IRTemp newRt = newTemp(Ity_I32);
   21636          loadGuardedLE( newRt, ILGop_16Uto32, ea, llGetIReg(rT), condT );
   21637          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   21638          put_ITSTATE(new_itstate);
   21639          DIP("ldrht r%u, [r%u, #%u]\n", rT, rN, imm8);
   21640          goto decode_success;
   21641       }
   21642    }
   21643 
   21644    /* -------------- (T1) LDRSHT reg+#imm8 -------------- */
   21645    /* Load Register Signed Halfword Unprivileged:
   21646       ldrsht Rt, [Rn, #imm8]
   21647    */
   21648    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(1,1)
   21649        && INSN1(11,8) == BITS4(1,1,1,0)) {
   21650       UInt rN    = INSN0(3,0);
   21651       Bool valid = True;
   21652       if (rN == 15) {
   21653          /* In this case our instruction is LDRSH (literal), in fact:
   21654             LDRSH (literal) was realized earlier, so we don't want to
   21655             make it twice. */
   21656          valid = False;
   21657       }
   21658       UInt rT    = INSN1(15,12);
   21659       UInt imm8  = INSN1(7,0);
   21660       if (isBadRegT(rT)) valid = False;
   21661       if (valid) {
   21662          put_ITSTATE(old_itstate);
   21663          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   21664          IRTemp newRt = newTemp(Ity_I32);
   21665          loadGuardedLE( newRt, ILGop_16Sto32, ea, llGetIReg(rT), condT );
   21666          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   21667          put_ITSTATE(new_itstate);
   21668          DIP("ldrsht r%u, [r%u, #%u]\n", rT, rN, imm8);
   21669          goto decode_success;
   21670       }
   21671    }
   21672 
   21673    /* -------------- (T1) STRHT reg+#imm8 -------------- */
   21674    /* Store Register Halfword Unprivileged:
   21675       strht Rt, [Rn, #imm8]
   21676    */
   21677    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(1,0)
   21678        && INSN1(11,8) == BITS4(1,1,1,0)) {
   21679       UInt rT    = INSN1(15,12);
   21680       UInt rN    = INSN0(3,0);
   21681       UInt imm8  = INSN1(7,0);
   21682       Bool valid = True;
   21683       if (rN == 15 || isBadRegT(rT)) valid = False;
   21684       if (valid) {
   21685          put_ITSTATE(old_itstate);
   21686          IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   21687          IRExpr* data = unop(Iop_32to16, llGetIReg(rT));
   21688          storeGuardedLE( address, data, condT );
   21689          put_ITSTATE(new_itstate);
   21690          DIP("strht r%u, [r%u, #%u]\n", rT, rN, imm8);
   21691          goto decode_success;
   21692       }
   21693    }
   21694 
   21695    /* -------------- (T1) LDRBT reg+#imm8 -------------- */
   21696    /* Load Register Byte Unprivileged:
   21697       ldrbt Rt, [Rn, #imm8]
   21698    */
   21699    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(0,1)
   21700        && INSN1(11,8) == BITS4(1,1,1,0)) {
   21701       UInt rN    = INSN0(3,0);
   21702       UInt rT    = INSN1(15,12);
   21703       UInt imm8  = INSN1(7,0);
   21704       Bool valid = True;
   21705       if (rN == 15 /* insn is LDRB (literal) */) valid = False;
   21706       if (isBadRegT(rT)) valid = False;
   21707       if (valid) {
   21708          put_ITSTATE(old_itstate);
   21709          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   21710          IRTemp newRt = newTemp(Ity_I32);
   21711          loadGuardedLE( newRt, ILGop_8Uto32, ea, llGetIReg(rT), condT );
   21712          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   21713          put_ITSTATE(new_itstate);
   21714          DIP("ldrbt r%u, [r%u, #%u]\n", rT, rN, imm8);
   21715          goto decode_success;
   21716       }
   21717    }
   21718 
   21719    /* -------------- (T1) LDRSBT reg+#imm8 -------------- */
   21720    /* Load Register Signed Byte Unprivileged:
   21721       ldrsbt Rt, [Rn, #imm8]
   21722    */
   21723    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(0,1)
   21724        && INSN1(11,8) == BITS4(1,1,1,0)) {
   21725       UInt rN    = INSN0(3,0);
   21726       Bool valid = True;
   21727       UInt rT    = INSN1(15,12);
   21728       UInt imm8  = INSN1(7,0);
   21729       if (rN == 15 /* insn is LDRSB (literal) */) valid = False;
   21730       if (isBadRegT(rT)) valid = False;
   21731       if (valid) {
   21732          put_ITSTATE(old_itstate);
   21733          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   21734          IRTemp newRt = newTemp(Ity_I32);
   21735          loadGuardedLE( newRt, ILGop_8Sto32, ea, llGetIReg(rT), condT );
   21736          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   21737          put_ITSTATE(new_itstate);
   21738          DIP("ldrsbt r%u, [r%u, #%u]\n", rT, rN, imm8);
   21739          goto decode_success;
   21740       }
   21741    }
   21742 
   21743    /* -------------- (T1) PLI reg+#imm12 -------------- */
   21744    /* Preload Instruction:
   21745       pli [Rn, #imm12]
   21746    */
   21747    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,1,0) && INSN0(5,4) == BITS2(0,1)
   21748        && INSN1(15,12) == BITS4(1,1,1,1)) {
   21749       UInt rN    = INSN0(3,0);
   21750       UInt imm12 = INSN1(11,0);
   21751       if (rN != 15) {
   21752          DIP("pli [r%u, #%u]\n", rN, imm12);
   21753          goto decode_success;
   21754       }
   21755    }
   21756 
   21757    /* -------------- (T2) PLI reg-#imm8 -------------- */
   21758    /* Preload Instruction:
   21759       pli [Rn, #-imm8]
   21760    */
   21761    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(0,1)
   21762        && INSN1(15,8) == BITS8(1,1,1,1,1,1,0,0)) {
   21763       UInt rN   = INSN0(3,0);
   21764       UInt imm8 = INSN1(7,0);
   21765       if (rN != 15) {
   21766          DIP("pli [r%u, #-%u]\n", rN, imm8);
   21767          goto decode_success;
   21768       }
   21769    }
   21770 
   21771    /* -------------- (T3) PLI PC+/-#imm12 -------------- */
   21772    /* Preload Instruction:
   21773       pli [PC, #+/-imm12]
   21774    */
   21775    if (INSN0(15,8) == BITS8(1,1,1,1,1,0,0,1)
   21776        && INSN0(6,0) == BITS7(0,0,1,1,1,1,1)
   21777        && INSN1(15,12) == BITS4(1,1,1,1)) {
   21778       UInt imm12 = INSN1(11,0);
   21779       UInt bU    = INSN0(7,7);
   21780       DIP("pli [pc, #%c%u]\n", bU == 1 ? '+' : '-', imm12);
   21781       goto decode_success;
   21782    }
   21783 
   21784    /* ----------------------------------------------------------- */
   21785    /* -- VFP (CP 10, CP 11) instructions (in Thumb mode)       -- */
   21786    /* ----------------------------------------------------------- */
   21787 
   21788    if (INSN0(15,12) == BITS4(1,1,1,0)) {
   21789       UInt insn28 = (INSN0(11,0) << 16) | INSN1(15,0);
   21790       Bool ok_vfp = decode_CP10_CP11_instruction (
   21791                        &dres, insn28, condT, ARMCondAL/*bogus*/,
   21792                        True/*isT*/
   21793                     );
   21794       if (ok_vfp)
   21795          goto decode_success;
   21796    }
   21797 
   21798    /* ----------------------------------------------------------- */
   21799    /* -- NEON instructions (in Thumb mode)                     -- */
   21800    /* ----------------------------------------------------------- */
   21801 
   21802    if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
   21803       UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
   21804       Bool ok_neon = decode_NEON_instruction(
   21805                         &dres, insn32, condT, True/*isT*/
   21806                      );
   21807       if (ok_neon)
   21808          goto decode_success;
   21809    }
   21810 
   21811    /* ----------------------------------------------------------- */
   21812    /* -- v6 media instructions (in Thumb mode)                 -- */
   21813    /* ----------------------------------------------------------- */
   21814 
   21815    { UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
   21816      Bool ok_v6m = decode_V6MEDIA_instruction(
   21817                       &dres, insn32, condT, ARMCondAL/*bogus*/,
   21818                       True/*isT*/
   21819                    );
   21820      if (ok_v6m)
   21821         goto decode_success;
   21822    }
   21823 
   21824    /* ----------------------------------------------------------- */
   21825    /* -- Undecodable                                           -- */
   21826    /* ----------------------------------------------------------- */
   21827 
   21828    goto decode_failure;
   21829    /*NOTREACHED*/
   21830 
   21831   decode_failure:
   21832    /* All decode failures end up here. */
   21833    if (sigill_diag)
   21834       vex_printf("disInstr(thumb): unhandled instruction: "
   21835                  "0x%04x 0x%04x\n", (UInt)insn0, (UInt)insn1);
   21836 
   21837    /* Back up ITSTATE to the initial value for this instruction.
   21838       If we don't do that, any subsequent restart of the instruction
   21839       will restart with the wrong value. */
   21840    if (old_itstate != IRTemp_INVALID)
   21841       put_ITSTATE(old_itstate);
   21842 
   21843    /* Tell the dispatcher that this insn cannot be decoded, and so has
   21844       not been executed, and (is currently) the next to be executed.
   21845       R15 should be up-to-date since it made so at the start of each
   21846       insn, but nevertheless be paranoid and update it again right
   21847       now. */
   21848    vassert(0 == (guest_R15_curr_instr_notENC & 1));
   21849    llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC | 1) );
   21850    dres.whatNext    = Dis_StopHere;
   21851    dres.jk_StopHere = Ijk_NoDecode;
   21852    dres.len         = 0;
   21853    return dres;
   21854 
   21855   decode_success:
   21856    /* All decode successes end up here. */
   21857    vassert(dres.len == 4 || dres.len == 2 || dres.len == 20);
   21858    switch (dres.whatNext) {
   21859       case Dis_Continue:
   21860          llPutIReg(15, mkU32(dres.len + (guest_R15_curr_instr_notENC | 1)));
   21861          break;
   21862       case Dis_ResteerU:
   21863       case Dis_ResteerC:
   21864          llPutIReg(15, mkU32(dres.continueAt));
   21865          break;
   21866       case Dis_StopHere:
   21867          break;
   21868       default:
   21869          vassert(0);
   21870    }
   21871 
   21872    DIP("\n");
   21873 
   21874    return dres;
   21875 
   21876 #  undef INSN0
   21877 #  undef INSN1
   21878 }
   21879 
   21880 #undef DIP
   21881 #undef DIS
   21882 
   21883 
   21884 /* Helper table for figuring out how many insns an IT insn
   21885    conditionalises.
   21886 
   21887    An ITxyz instruction of the format "1011 1111 firstcond mask"
   21888    conditionalises some number of instructions, as indicated by the
   21889    following table.  A value of zero indicates the instruction is
   21890    invalid in some way.
   21891 
   21892    mask = 0 means this isn't an IT instruction
   21893    fc = 15 (NV) means unpredictable
   21894 
   21895    The line fc = 14 (AL) is different from the others; there are
   21896    additional constraints in this case.
   21897 
   21898           mask(0 ..                   15)
   21899         +--------------------------------
   21900    fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21901    ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21902         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21903         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21904         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21905         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21906         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21907         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21908         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21909         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21910         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21911         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21912         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21913         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21914         | 0 4 3 0 2 0 0 0 1 0 0 0 0 0 0 0
   21915    15)  | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
   21916 
   21917    To be conservative with the analysis, let's rule out the mask = 0
   21918    case, since that isn't an IT insn at all.  But for all the other
   21919    cases where the table contains zero, that means unpredictable, so
   21920    let's say 4 to be conservative.  Hence we have a safe value for any
   21921    IT (mask,fc) pair that the CPU would actually identify as an IT
   21922    instruction.  The final table is
   21923 
   21924           mask(0 ..                   15)
   21925         +--------------------------------
   21926    fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21927    ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21928         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21929         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21930         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21931         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21932         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21933         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21934         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21935         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21936         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21937         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21938         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21939         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21940         | 0 4 3 4 2 4 4 4 1 4 4 4 4 4 4 4
   21941    15)  | 0 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
   21942 */
   21943 static const UChar it_length_table[256]
   21944    = { 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   21945        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   21946        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   21947        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   21948        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   21949        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   21950        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   21951        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   21952        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   21953        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   21954        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   21955        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   21956        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   21957        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   21958        0, 4, 3, 4, 2, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4,
   21959        0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
   21960      };
   21961 
   21962 
   21963 /*------------------------------------------------------------*/
   21964 /*--- Top-level fn                                         ---*/
   21965 /*------------------------------------------------------------*/
   21966 
   21967 /* Disassemble a single instruction into IR.  The instruction
   21968    is located in host memory at &guest_code[delta]. */
   21969 
   21970 DisResult disInstr_ARM ( IRSB*        irsb_IN,
   21971                          Bool         (*resteerOkFn) ( void*, Addr64 ),
   21972                          Bool         resteerCisOk,
   21973                          void*        callback_opaque,
   21974                          UChar*       guest_code_IN,
   21975                          Long         delta_ENCODED,
   21976                          Addr64       guest_IP_ENCODED,
   21977                          VexArch      guest_arch,
   21978                          VexArchInfo* archinfo,
   21979                          VexAbiInfo*  abiinfo,
   21980                          Bool         host_bigendian_IN,
   21981                          Bool         sigill_diag_IN )
   21982 {
   21983    DisResult dres;
   21984    Bool isThumb = (Bool)(guest_IP_ENCODED & 1);
   21985 
   21986    /* Set globals (see top of this file) */
   21987    vassert(guest_arch == VexArchARM);
   21988 
   21989    irsb              = irsb_IN;
   21990    host_is_bigendian = host_bigendian_IN;
   21991    __curr_is_Thumb   = isThumb;
   21992 
   21993    if (isThumb) {
   21994       guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED - 1;
   21995    } else {
   21996       guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED;
   21997    }
   21998 
   21999    if (isThumb) {
   22000       dres = disInstr_THUMB_WRK ( resteerOkFn,
   22001                                   resteerCisOk, callback_opaque,
   22002                                   &guest_code_IN[delta_ENCODED - 1],
   22003                                   archinfo, abiinfo, sigill_diag_IN );
   22004    } else {
   22005       dres = disInstr_ARM_WRK ( resteerOkFn,
   22006                                 resteerCisOk, callback_opaque,
   22007                                 &guest_code_IN[delta_ENCODED],
   22008                                 archinfo, abiinfo, sigill_diag_IN );
   22009    }
   22010 
   22011    return dres;
   22012 }
   22013 
   22014 /* Test program for the conversion of IRCmpF64Result values to VFP
   22015    nzcv values.  See handling of FCMPD et al above. */
   22016 /*
   22017 UInt foo ( UInt x )
   22018 {
   22019    UInt ix    = ((x >> 5) & 3) | (x & 1);
   22020    UInt termL = (((((ix ^ 1) << 30) - 1) >> 29) + 1);
   22021    UInt termR = (ix & (ix >> 1) & 1);
   22022    return termL  -  termR;
   22023 }
   22024 
   22025 void try ( char* s, UInt ir, UInt req )
   22026 {
   22027    UInt act = foo(ir);
   22028    printf("%s 0x%02x -> req %d%d%d%d act %d%d%d%d (0x%x)\n",
   22029           s, ir, (req >> 3) & 1, (req >> 2) & 1,
   22030                  (req >> 1) & 1, (req >> 0) & 1,
   22031                  (act >> 3) & 1, (act >> 2) & 1,
   22032                  (act >> 1) & 1, (act >> 0) & 1, act);
   22033 
   22034 }
   22035 
   22036 int main ( void )
   22037 {
   22038    printf("\n");
   22039    try("UN", 0x45, 0b0011);
   22040    try("LT", 0x01, 0b1000);
   22041    try("GT", 0x00, 0b0010);
   22042    try("EQ", 0x40, 0b0110);
   22043    printf("\n");
   22044    return 0;
   22045 }
   22046 */
   22047 
   22048 /* Spare code for doing reference implementations of various 64-bit
   22049    SIMD interleaves/deinterleaves/concatenation ops. */
   22050 /*
   22051 // Split a 64 bit value into 4 16 bit ones, in 32-bit IRTemps with
   22052 // the top halves guaranteed to be zero.
   22053 static void break64to16s ( IRTemp* out3, IRTemp* out2, IRTemp* out1,
   22054                            IRTemp* out0, IRTemp v64 )
   22055 {
   22056   if (out3) *out3 = newTemp(Ity_I32);
   22057   if (out2) *out2 = newTemp(Ity_I32);
   22058   if (out1) *out1 = newTemp(Ity_I32);
   22059   if (out0) *out0 = newTemp(Ity_I32);
   22060   IRTemp hi32 = newTemp(Ity_I32);
   22061   IRTemp lo32 = newTemp(Ity_I32);
   22062   assign(hi32, unop(Iop_64HIto32, mkexpr(v64)) );
   22063   assign(lo32, unop(Iop_64to32, mkexpr(v64)) );
   22064   if (out3) assign(*out3, binop(Iop_Shr32, mkexpr(hi32), mkU8(16)));
   22065   if (out2) assign(*out2, binop(Iop_And32, mkexpr(hi32), mkU32(0xFFFF)));
   22066   if (out1) assign(*out1, binop(Iop_Shr32, mkexpr(lo32), mkU8(16)));
   22067   if (out0) assign(*out0, binop(Iop_And32, mkexpr(lo32), mkU32(0xFFFF)));
   22068 }
   22069 
   22070 // Make a 64 bit value from 4 16 bit ones, each of which is in a 32 bit
   22071 // IRTemp.
   22072 static IRTemp mk64from16s ( IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
   22073 {
   22074   IRTemp hi32 = newTemp(Ity_I32);
   22075   IRTemp lo32 = newTemp(Ity_I32);
   22076   assign(hi32,
   22077          binop(Iop_Or32,
   22078                binop(Iop_Shl32, mkexpr(in3), mkU8(16)),
   22079                binop(Iop_And32, mkexpr(in2), mkU32(0xFFFF))));
   22080   assign(lo32,
   22081          binop(Iop_Or32,
   22082                binop(Iop_Shl32, mkexpr(in1), mkU8(16)),
   22083                binop(Iop_And32, mkexpr(in0), mkU32(0xFFFF))));
   22084   IRTemp res = newTemp(Ity_I64);
   22085   assign(res, binop(Iop_32HLto64, mkexpr(hi32), mkexpr(lo32)));
   22086   return res;
   22087 }
   22088 
   22089 static IRExpr* mk_InterleaveLO16x4 ( IRTemp a3210, IRTemp b3210 )
   22090 {
   22091   // returns a1 b1 a0 b0
   22092   IRTemp a1, a0, b1, b0;
   22093   break64to16s(NULL, NULL, &a1, &a0, a3210);
   22094   break64to16s(NULL, NULL, &b1, &b0, b3210);
   22095   return mkexpr(mk64from16s(a1, b1, a0, b0));
   22096 }
   22097 
   22098 static IRExpr* mk_InterleaveHI16x4 ( IRTemp a3210, IRTemp b3210 )
   22099 {
   22100   // returns a3 b3 a2 b2
   22101   IRTemp a3, a2, b3, b2;
   22102   break64to16s(&a3, &a2, NULL, NULL, a3210);
   22103   break64to16s(&b3, &b2, NULL, NULL, b3210);
   22104   return mkexpr(mk64from16s(a3, b3, a2, b2));
   22105 }
   22106 
   22107 static IRExpr* mk_CatEvenLanes16x4 ( IRTemp a3210, IRTemp b3210 )
   22108 {
   22109   // returns a2 a0 b2 b0
   22110   IRTemp a2, a0, b2, b0;
   22111   break64to16s(NULL, &a2, NULL, &a0, a3210);
   22112   break64to16s(NULL, &b2, NULL, &b0, b3210);
   22113   return mkexpr(mk64from16s(a2, a0, b2, b0));
   22114 }
   22115 
   22116 static IRExpr* mk_CatOddLanes16x4 ( IRTemp a3210, IRTemp b3210 )
   22117 {
   22118   // returns a3 a1 b3 b1
   22119   IRTemp a3, a1, b3, b1;
   22120   break64to16s(&a3, NULL, &a1, NULL, a3210);
   22121   break64to16s(&b3, NULL, &b1, NULL, b3210);
   22122   return mkexpr(mk64from16s(a3, a1, b3, b1));
   22123 }
   22124 
   22125 static IRExpr* mk_InterleaveOddLanes16x4 ( IRTemp a3210, IRTemp b3210 )
   22126 {
   22127   // returns a3 b3 a1 b1
   22128   IRTemp a3, b3, a1, b1;
   22129   break64to16s(&a3, NULL, &a1, NULL, a3210);
   22130   break64to16s(&b3, NULL, &b1, NULL, b3210);
   22131   return mkexpr(mk64from16s(a3, b3, a1, b1));
   22132 }
   22133 
   22134 static IRExpr* mk_InterleaveEvenLanes16x4 ( IRTemp a3210, IRTemp b3210 )
   22135 {
   22136   // returns a2 b2 a0 b0
   22137   IRTemp a2, b2, a0, b0;
   22138   break64to16s(NULL, &a2, NULL, &a0, a3210);
   22139   break64to16s(NULL, &b2, NULL, &b0, b3210);
   22140   return mkexpr(mk64from16s(a2, b2, a0, b0));
   22141 }
   22142 
   22143 static void break64to8s ( IRTemp* out7, IRTemp* out6, IRTemp* out5,
   22144                           IRTemp* out4, IRTemp* out3, IRTemp* out2,
   22145                           IRTemp* out1,IRTemp* out0, IRTemp v64 )
   22146 {
   22147   if (out7) *out7 = newTemp(Ity_I32);
   22148   if (out6) *out6 = newTemp(Ity_I32);
   22149   if (out5) *out5 = newTemp(Ity_I32);
   22150   if (out4) *out4 = newTemp(Ity_I32);
   22151   if (out3) *out3 = newTemp(Ity_I32);
   22152   if (out2) *out2 = newTemp(Ity_I32);
   22153   if (out1) *out1 = newTemp(Ity_I32);
   22154   if (out0) *out0 = newTemp(Ity_I32);
   22155   IRTemp hi32 = newTemp(Ity_I32);
   22156   IRTemp lo32 = newTemp(Ity_I32);
   22157   assign(hi32, unop(Iop_64HIto32, mkexpr(v64)) );
   22158   assign(lo32, unop(Iop_64to32, mkexpr(v64)) );
   22159   if (out7)
   22160     assign(*out7, binop(Iop_And32,
   22161                         binop(Iop_Shr32, mkexpr(hi32), mkU8(24)),
   22162                         mkU32(0xFF)));
   22163   if (out6)
   22164     assign(*out6, binop(Iop_And32,
   22165                         binop(Iop_Shr32, mkexpr(hi32), mkU8(16)),
   22166                         mkU32(0xFF)));
   22167   if (out5)
   22168     assign(*out5, binop(Iop_And32,
   22169                         binop(Iop_Shr32, mkexpr(hi32), mkU8(8)),
   22170                         mkU32(0xFF)));
   22171   if (out4)
   22172     assign(*out4, binop(Iop_And32, mkexpr(hi32), mkU32(0xFF)));
   22173   if (out3)
   22174     assign(*out3, binop(Iop_And32,
   22175                         binop(Iop_Shr32, mkexpr(lo32), mkU8(24)),
   22176                         mkU32(0xFF)));
   22177   if (out2)
   22178     assign(*out2, binop(Iop_And32,
   22179                         binop(Iop_Shr32, mkexpr(lo32), mkU8(16)),
   22180                         mkU32(0xFF)));
   22181   if (out1)
   22182     assign(*out1, binop(Iop_And32,
   22183                         binop(Iop_Shr32, mkexpr(lo32), mkU8(8)),
   22184                         mkU32(0xFF)));
   22185   if (out0)
   22186     assign(*out0, binop(Iop_And32, mkexpr(lo32), mkU32(0xFF)));
   22187 }
   22188 
   22189 static IRTemp mk64from8s ( IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4,
   22190                            IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
   22191 {
   22192   IRTemp hi32 = newTemp(Ity_I32);
   22193   IRTemp lo32 = newTemp(Ity_I32);
   22194   assign(hi32,
   22195          binop(Iop_Or32,
   22196                binop(Iop_Or32,
   22197                      binop(Iop_Shl32,
   22198                            binop(Iop_And32, mkexpr(in7), mkU32(0xFF)),
   22199                            mkU8(24)),
   22200                      binop(Iop_Shl32,
   22201                            binop(Iop_And32, mkexpr(in6), mkU32(0xFF)),
   22202                            mkU8(16))),
   22203                binop(Iop_Or32,
   22204                      binop(Iop_Shl32,
   22205                            binop(Iop_And32, mkexpr(in5), mkU32(0xFF)), mkU8(8)),
   22206                      binop(Iop_And32,
   22207                            mkexpr(in4), mkU32(0xFF)))));
   22208   assign(lo32,
   22209          binop(Iop_Or32,
   22210                binop(Iop_Or32,
   22211                      binop(Iop_Shl32,
   22212                            binop(Iop_And32, mkexpr(in3), mkU32(0xFF)),
   22213                            mkU8(24)),
   22214                      binop(Iop_Shl32,
   22215                            binop(Iop_And32, mkexpr(in2), mkU32(0xFF)),
   22216                            mkU8(16))),
   22217                binop(Iop_Or32,
   22218                      binop(Iop_Shl32,
   22219                            binop(Iop_And32, mkexpr(in1), mkU32(0xFF)), mkU8(8)),
   22220                      binop(Iop_And32,
   22221                            mkexpr(in0), mkU32(0xFF)))));
   22222   IRTemp res = newTemp(Ity_I64);
   22223   assign(res, binop(Iop_32HLto64, mkexpr(hi32), mkexpr(lo32)));
   22224   return res;
   22225 }
   22226 
   22227 static IRExpr* mk_InterleaveLO8x8 ( IRTemp a76543210, IRTemp b76543210 )
   22228 {
   22229   // returns a3 b3 a2 b2 a1 b1 a0 b0
   22230   IRTemp a3, b3, a2, b2, a1, a0, b1, b0;
   22231   break64to8s(NULL, NULL, NULL, NULL, &a3, &a2, &a1, &a0, a76543210);
   22232   break64to8s(NULL, NULL, NULL, NULL, &b3, &b2, &b1, &b0, b76543210);
   22233   return mkexpr(mk64from8s(a3, b3, a2, b2, a1, b1, a0, b0));
   22234 }
   22235 
   22236 static IRExpr* mk_InterleaveHI8x8 ( IRTemp a76543210, IRTemp b76543210 )
   22237 {
   22238   // returns a7 b7 a6 b6 a5 b5 a4 b4
   22239   IRTemp a7, b7, a6, b6, a5, b5, a4, b4;
   22240   break64to8s(&a7, &a6, &a5, &a4, NULL, NULL, NULL, NULL, a76543210);
   22241   break64to8s(&b7, &b6, &b5, &b4, NULL, NULL, NULL, NULL, b76543210);
   22242   return mkexpr(mk64from8s(a7, b7, a6, b6, a5, b5, a4, b4));
   22243 }
   22244 
   22245 static IRExpr* mk_CatEvenLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
   22246 {
   22247   // returns a6 a4 a2 a0 b6 b4 b2 b0
   22248   IRTemp a6, a4, a2, a0, b6, b4, b2, b0;
   22249   break64to8s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
   22250   break64to8s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
   22251   return mkexpr(mk64from8s(a6, a4, a2, a0, b6, b4, b2, b0));
   22252 }
   22253 
   22254 static IRExpr* mk_CatOddLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
   22255 {
   22256   // returns a7 a5 a3 a1 b7 b5 b3 b1
   22257   IRTemp a7, a5, a3, a1, b7, b5, b3, b1;
   22258   break64to8s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
   22259   break64to8s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
   22260   return mkexpr(mk64from8s(a7, a5, a3, a1, b7, b5, b3, b1));
   22261 }
   22262 
   22263 static IRExpr* mk_InterleaveEvenLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
   22264 {
   22265   // returns a6 b6 a4 b4 a2 b2 a0 b0
   22266   IRTemp a6, b6, a4, b4, a2, b2, a0, b0;
   22267   break64to8s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
   22268   break64to8s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
   22269   return mkexpr(mk64from8s(a6, b6, a4, b4, a2, b2, a0, b0));
   22270 }
   22271 
   22272 static IRExpr* mk_InterleaveOddLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
   22273 {
   22274   // returns a7 b7 a5 b5 a3 b3 a1 b1
   22275   IRTemp a7, b7, a5, b5, a3, b3, a1, b1;
   22276   break64to8s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
   22277   break64to8s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
   22278   return mkexpr(mk64from8s(a7, b7, a5, b5, a3, b3, a1, b1));
   22279 }
   22280 
   22281 static IRExpr* mk_InterleaveLO32x2 ( IRTemp a10, IRTemp b10 )
   22282 {
   22283   // returns a0 b0
   22284   return binop(Iop_32HLto64, unop(Iop_64to32, mkexpr(a10)),
   22285                              unop(Iop_64to32, mkexpr(b10)));
   22286 }
   22287 
   22288 static IRExpr* mk_InterleaveHI32x2 ( IRTemp a10, IRTemp b10 )
   22289 {
   22290   // returns a1 b1
   22291   return binop(Iop_32HLto64, unop(Iop_64HIto32, mkexpr(a10)),
   22292                              unop(Iop_64HIto32, mkexpr(b10)));
   22293 }
   22294 */
   22295 
   22296 /*--------------------------------------------------------------------*/
   22297 /*--- end                                         guest_arm_toIR.c ---*/
   22298 /*--------------------------------------------------------------------*/
   22299