Home | History | Annotate | Download | only in priv
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- begin                                       guest_arm_toIR.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2015 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    NEON support is
     14    Copyright (C) 2010-2015 Samsung Electronics
     15    contributed by Dmitry Zhurikhin <zhur (at) ispras.ru>
     16               and Kirill Batuzov <batuzovk (at) ispras.ru>
     17 
     18    This program is free software; you can redistribute it and/or
     19    modify it under the terms of the GNU General Public License as
     20    published by the Free Software Foundation; either version 2 of the
     21    License, or (at your option) any later version.
     22 
     23    This program is distributed in the hope that it will be useful, but
     24    WITHOUT ANY WARRANTY; without even the implied warranty of
     25    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     26    General Public License for more details.
     27 
     28    You should have received a copy of the GNU General Public License
     29    along with this program; if not, write to the Free Software
     30    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     31    02110-1301, USA.
     32 
     33    The GNU General Public License is contained in the file COPYING.
     34 */
     35 
     36 /* XXXX thumb to check:
     37    that all cases where putIRegT writes r15, we generate a jump.
     38 
     39    All uses of newTemp assign to an IRTemp and not a UInt
     40 
     41    For all thumb loads and stores, including VFP ones, new-ITSTATE is
     42    backed out before the memory op, and restored afterwards.  This
     43    needs to happen even after we go uncond.  (and for sure it doesn't
     44    happen for VFP loads/stores right now).
     45 
     46    VFP on thumb: check that we exclude all r13/r15 cases that we
     47    should.
     48 
     49    XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
     50    taking into account the number of insns guarded by an IT.
     51 
     52    remove the nasty hack, in the spechelper, of looking for Or32(...,
     53    0xE0) in as the first arg to armg_calculate_condition, and instead
     54    use Slice44 as specified in comments in the spechelper.
     55 
     56    add specialisations for armg_calculate_flag_c and _v, as they
     57    are moderately often needed in Thumb code.
     58 
     59    Correctness: ITSTATE handling in Thumb SVCs is wrong.
     60 
     61    Correctness (obscure): in m_transtab, when invalidating code
     62    address ranges, invalidate up to 18 bytes after the end of the
     63    range.  This is because the ITSTATE optimisation at the top of
     64    _THUMB_WRK below analyses up to 18 bytes before the start of any
     65    given instruction, and so might depend on the invalidated area.
     66 */
     67 
     68 /* Limitations, etc
     69 
     70    - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD.
     71      These instructions are non-restartable in the case where the
     72      transfer(s) fault.
     73 
     74    - SWP: the restart jump back is Ijk_Boring; it should be
     75      Ijk_NoRedir but that's expensive.  See comments on casLE() in
     76      guest_x86_toIR.c.
     77 */
     78 
     79 /* "Special" instructions.
     80 
     81    This instruction decoder can decode four special instructions
     82    which mean nothing natively (are no-ops as far as regs/mem are
     83    concerned) but have meaning for supporting Valgrind.  A special
     84    instruction is flagged by a 16-byte preamble:
     85 
     86       E1A0C1EC E1A0C6EC E1A0CEEC E1A0C9EC
     87       (mov r12, r12, ROR #3;   mov r12, r12, ROR #13;
     88        mov r12, r12, ROR #29;  mov r12, r12, ROR #19)
     89 
     90    Following that, one of the following 3 are allowed
     91    (standard interpretation in parentheses):
     92 
     93       E18AA00A (orr r10,r10,r10)   R3 = client_request ( R4 )
     94       E18BB00B (orr r11,r11,r11)   R3 = guest_NRADDR
     95       E18CC00C (orr r12,r12,r12)   branch-and-link-to-noredir R4
     96       E1899009 (orr r9,r9,r9)      IR injection
     97 
     98    Any other bytes following the 16-byte preamble are illegal and
     99    constitute a failure in instruction decoding.  This all assumes
    100    that the preamble will never occur except in specific code
    101    fragments designed for Valgrind to catch.
    102 */
    103 
    104 /* Translates ARM(v5) code to IR. */
    105 
    106 #include "libvex_basictypes.h"
    107 #include "libvex_ir.h"
    108 #include "libvex.h"
    109 #include "libvex_guest_arm.h"
    110 
    111 #include "main_util.h"
    112 #include "main_globals.h"
    113 #include "guest_generic_bb_to_IR.h"
    114 #include "guest_arm_defs.h"
    115 
    116 
    117 /*------------------------------------------------------------*/
    118 /*--- Globals                                              ---*/
    119 /*------------------------------------------------------------*/
    120 
    121 /* These are set at the start of the translation of a instruction, so
    122    that we don't have to pass them around endlessly.  CONST means does
    123    not change during translation of the instruction.
    124 */
    125 
    126 /* CONST: what is the host's endianness?  This has to do with float vs
    127    double register accesses on VFP, but it's complex and not properly
    128    thought out. */
    129 static VexEndness host_endness;
    130 
    131 /* CONST: The guest address for the instruction currently being
    132    translated.  This is the real, "decoded" address (not subject
    133    to the CPSR.T kludge). */
    134 static Addr32 guest_R15_curr_instr_notENC;
    135 
    136 /* CONST, FOR ASSERTIONS ONLY.  Indicates whether currently processed
    137    insn is Thumb (True) or ARM (False). */
    138 static Bool __curr_is_Thumb;
    139 
    140 /* MOD: The IRSB* into which we're generating code. */
    141 static IRSB* irsb;
    142 
    143 /* These are to do with handling writes to r15.  They are initially
    144    set at the start of disInstr_ARM_WRK to indicate no update,
    145    possibly updated during the routine, and examined again at the end.
    146    If they have been set to indicate a r15 update then a jump is
    147    generated.  Note, "explicit" jumps (b, bx, etc) are generated
    148    directly, not using this mechanism -- this is intended to handle
    149    the implicit-style jumps resulting from (eg) assigning to r15 as
    150    the result of insns we wouldn't normally consider branchy. */
    151 
    152 /* MOD.  Initially False; set to True iff abovementioned handling is
    153    required. */
    154 static Bool r15written;
    155 
    156 /* MOD.  Initially IRTemp_INVALID.  If the r15 branch to be generated
    157    is conditional, this holds the gating IRTemp :: Ity_I32.  If the
    158    branch to be generated is unconditional, this remains
    159    IRTemp_INVALID. */
    160 static IRTemp r15guard; /* :: Ity_I32, 0 or 1 */
    161 
    162 /* MOD.  Initially Ijk_Boring.  If an r15 branch is to be generated,
    163    this holds the jump kind. */
    164 static IRTemp r15kind;
    165 
    166 
    167 /*------------------------------------------------------------*/
    168 /*--- Debugging output                                     ---*/
    169 /*------------------------------------------------------------*/
    170 
    171 #define DIP(format, args...)           \
    172    if (vex_traceflags & VEX_TRACE_FE)  \
    173       vex_printf(format, ## args)
    174 
    175 #define DIS(buf, format, args...)      \
    176    if (vex_traceflags & VEX_TRACE_FE)  \
    177       vex_sprintf(buf, format, ## args)
    178 
    179 #define ASSERT_IS_THUMB \
    180    do { vassert(__curr_is_Thumb); } while (0)
    181 
    182 #define ASSERT_IS_ARM \
    183    do { vassert(! __curr_is_Thumb); } while (0)
    184 
    185 
    186 /*------------------------------------------------------------*/
    187 /*--- Helper bits and pieces for deconstructing the        ---*/
    188 /*--- arm insn stream.                                     ---*/
    189 /*------------------------------------------------------------*/
    190 
    191 /* Do a little-endian load of a 32-bit word, regardless of the
    192    endianness of the underlying host. */
    193 static inline UInt getUIntLittleEndianly ( const UChar* p )
    194 {
    195    UInt w = 0;
    196    w = (w << 8) | p[3];
    197    w = (w << 8) | p[2];
    198    w = (w << 8) | p[1];
    199    w = (w << 8) | p[0];
    200    return w;
    201 }
    202 
    203 /* Do a little-endian load of a 16-bit word, regardless of the
    204    endianness of the underlying host. */
    205 static inline UShort getUShortLittleEndianly ( const UChar* p )
    206 {
    207    UShort w = 0;
    208    w = (w << 8) | p[1];
    209    w = (w << 8) | p[0];
    210    return w;
    211 }
    212 
    213 static UInt ROR32 ( UInt x, UInt sh ) {
    214    vassert(sh >= 0 && sh < 32);
    215    if (sh == 0)
    216       return x;
    217    else
    218       return (x << (32-sh)) | (x >> sh);
    219 }
    220 
    221 static Int popcount32 ( UInt x )
    222 {
    223    Int res = 0, i;
    224    for (i = 0; i < 32; i++) {
    225       res += (x & 1);
    226       x >>= 1;
    227    }
    228    return res;
    229 }
    230 
    231 static UInt setbit32 ( UInt x, Int ix, UInt b )
    232 {
    233    UInt mask = 1 << ix;
    234    x &= ~mask;
    235    x |= ((b << ix) & mask);
    236    return x;
    237 }
    238 
    239 #define BITS2(_b1,_b0) \
    240    (((_b1) << 1) | (_b0))
    241 
    242 #define BITS3(_b2,_b1,_b0)                      \
    243   (((_b2) << 2) | ((_b1) << 1) | (_b0))
    244 
    245 #define BITS4(_b3,_b2,_b1,_b0) \
    246    (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
    247 
    248 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    249    ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
    250     | BITS4((_b3),(_b2),(_b1),(_b0)))
    251 
    252 #define BITS5(_b4,_b3,_b2,_b1,_b0)  \
    253    (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
    254 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0)  \
    255    (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    256 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    257    (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    258 
    259 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)      \
    260    (((_b8) << 8) \
    261     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    262 
    263 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    264    (((_b9) << 9) | ((_b8) << 8)                                \
    265     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    266 
    267 /* produces _uint[_bMax:_bMin] */
    268 #define SLICE_UInt(_uint,_bMax,_bMin) \
    269    (( ((UInt)(_uint)) >> (_bMin)) \
    270     & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
    271 
    272 
    273 /*------------------------------------------------------------*/
    274 /*--- Helper bits and pieces for creating IR fragments.    ---*/
    275 /*------------------------------------------------------------*/
    276 
    277 static IRExpr* mkU64 ( ULong i )
    278 {
    279    return IRExpr_Const(IRConst_U64(i));
    280 }
    281 
    282 static IRExpr* mkU32 ( UInt i )
    283 {
    284    return IRExpr_Const(IRConst_U32(i));
    285 }
    286 
    287 static IRExpr* mkU8 ( UInt i )
    288 {
    289    vassert(i < 256);
    290    return IRExpr_Const(IRConst_U8( (UChar)i ));
    291 }
    292 
    293 static IRExpr* mkexpr ( IRTemp tmp )
    294 {
    295    return IRExpr_RdTmp(tmp);
    296 }
    297 
    298 static IRExpr* unop ( IROp op, IRExpr* a )
    299 {
    300    return IRExpr_Unop(op, a);
    301 }
    302 
    303 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
    304 {
    305    return IRExpr_Binop(op, a1, a2);
    306 }
    307 
    308 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
    309 {
    310    return IRExpr_Triop(op, a1, a2, a3);
    311 }
    312 
    313 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
    314 {
    315    return IRExpr_Load(Iend_LE, ty, addr);
    316 }
    317 
    318 /* Add a statement to the list held by "irbb". */
    319 static void stmt ( IRStmt* st )
    320 {
    321    addStmtToIRSB( irsb, st );
    322 }
    323 
    324 static void assign ( IRTemp dst, IRExpr* e )
    325 {
    326    stmt( IRStmt_WrTmp(dst, e) );
    327 }
    328 
    329 static void storeLE ( IRExpr* addr, IRExpr* data )
    330 {
    331    stmt( IRStmt_Store(Iend_LE, addr, data) );
    332 }
    333 
    334 static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
    335 {
    336    if (guardT == IRTemp_INVALID) {
    337       /* unconditional */
    338       storeLE(addr, data);
    339    } else {
    340       stmt( IRStmt_StoreG(Iend_LE, addr, data,
    341                           binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
    342    }
    343 }
    344 
    345 static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
    346                             IRExpr* addr, IRExpr* alt,
    347                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
    348 {
    349    if (guardT == IRTemp_INVALID) {
    350       /* unconditional */
    351       IRExpr* loaded = NULL;
    352       switch (cvt) {
    353          case ILGop_Ident32:
    354             loaded = loadLE(Ity_I32, addr); break;
    355          case ILGop_8Uto32:
    356             loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
    357          case ILGop_8Sto32:
    358             loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
    359          case ILGop_16Uto32:
    360             loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
    361          case ILGop_16Sto32:
    362             loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
    363          default:
    364             vassert(0);
    365       }
    366       vassert(loaded != NULL);
    367       assign(dst, loaded);
    368    } else {
    369       /* Generate a guarded load into 'dst', but apply 'cvt' to the
    370          loaded data before putting the data in 'dst'.  If the load
    371          does not take place, 'alt' is placed directly in 'dst'. */
    372       stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
    373                          binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
    374    }
    375 }
    376 
    377 /* Generate a new temporary of the given type. */
    378 static IRTemp newTemp ( IRType ty )
    379 {
    380    vassert(isPlausibleIRType(ty));
    381    return newIRTemp( irsb->tyenv, ty );
    382 }
    383 
    384 /* Produces a value in 0 .. 3, which is encoded as per the type
    385    IRRoundingMode. */
    386 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
    387 {
    388    return mkU32(Irrm_NEAREST);
    389 }
    390 
    391 /* Generate an expression for SRC rotated right by ROT. */
    392 static IRExpr* genROR32( IRTemp src, Int rot )
    393 {
    394    vassert(rot >= 0 && rot < 32);
    395    if (rot == 0)
    396       return mkexpr(src);
    397    return
    398       binop(Iop_Or32,
    399             binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
    400             binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
    401 }
    402 
    403 static IRExpr* mkU128 ( ULong i )
    404 {
    405    return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
    406 }
    407 
    408 /* Generate a 4-aligned version of the given expression if
    409    the given condition is true.  Else return it unchanged. */
    410 static IRExpr* align4if ( IRExpr* e, Bool b )
    411 {
    412    if (b)
    413       return binop(Iop_And32, e, mkU32(~3));
    414    else
    415       return e;
    416 }
    417 
    418 
    419 /*------------------------------------------------------------*/
    420 /*--- Helpers for accessing guest registers.               ---*/
    421 /*------------------------------------------------------------*/
    422 
    423 #define OFFB_R0       offsetof(VexGuestARMState,guest_R0)
    424 #define OFFB_R1       offsetof(VexGuestARMState,guest_R1)
    425 #define OFFB_R2       offsetof(VexGuestARMState,guest_R2)
    426 #define OFFB_R3       offsetof(VexGuestARMState,guest_R3)
    427 #define OFFB_R4       offsetof(VexGuestARMState,guest_R4)
    428 #define OFFB_R5       offsetof(VexGuestARMState,guest_R5)
    429 #define OFFB_R6       offsetof(VexGuestARMState,guest_R6)
    430 #define OFFB_R7       offsetof(VexGuestARMState,guest_R7)
    431 #define OFFB_R8       offsetof(VexGuestARMState,guest_R8)
    432 #define OFFB_R9       offsetof(VexGuestARMState,guest_R9)
    433 #define OFFB_R10      offsetof(VexGuestARMState,guest_R10)
    434 #define OFFB_R11      offsetof(VexGuestARMState,guest_R11)
    435 #define OFFB_R12      offsetof(VexGuestARMState,guest_R12)
    436 #define OFFB_R13      offsetof(VexGuestARMState,guest_R13)
    437 #define OFFB_R14      offsetof(VexGuestARMState,guest_R14)
    438 #define OFFB_R15T     offsetof(VexGuestARMState,guest_R15T)
    439 
    440 #define OFFB_CC_OP    offsetof(VexGuestARMState,guest_CC_OP)
    441 #define OFFB_CC_DEP1  offsetof(VexGuestARMState,guest_CC_DEP1)
    442 #define OFFB_CC_DEP2  offsetof(VexGuestARMState,guest_CC_DEP2)
    443 #define OFFB_CC_NDEP  offsetof(VexGuestARMState,guest_CC_NDEP)
    444 #define OFFB_NRADDR   offsetof(VexGuestARMState,guest_NRADDR)
    445 
    446 #define OFFB_D0       offsetof(VexGuestARMState,guest_D0)
    447 #define OFFB_D1       offsetof(VexGuestARMState,guest_D1)
    448 #define OFFB_D2       offsetof(VexGuestARMState,guest_D2)
    449 #define OFFB_D3       offsetof(VexGuestARMState,guest_D3)
    450 #define OFFB_D4       offsetof(VexGuestARMState,guest_D4)
    451 #define OFFB_D5       offsetof(VexGuestARMState,guest_D5)
    452 #define OFFB_D6       offsetof(VexGuestARMState,guest_D6)
    453 #define OFFB_D7       offsetof(VexGuestARMState,guest_D7)
    454 #define OFFB_D8       offsetof(VexGuestARMState,guest_D8)
    455 #define OFFB_D9       offsetof(VexGuestARMState,guest_D9)
    456 #define OFFB_D10      offsetof(VexGuestARMState,guest_D10)
    457 #define OFFB_D11      offsetof(VexGuestARMState,guest_D11)
    458 #define OFFB_D12      offsetof(VexGuestARMState,guest_D12)
    459 #define OFFB_D13      offsetof(VexGuestARMState,guest_D13)
    460 #define OFFB_D14      offsetof(VexGuestARMState,guest_D14)
    461 #define OFFB_D15      offsetof(VexGuestARMState,guest_D15)
    462 #define OFFB_D16      offsetof(VexGuestARMState,guest_D16)
    463 #define OFFB_D17      offsetof(VexGuestARMState,guest_D17)
    464 #define OFFB_D18      offsetof(VexGuestARMState,guest_D18)
    465 #define OFFB_D19      offsetof(VexGuestARMState,guest_D19)
    466 #define OFFB_D20      offsetof(VexGuestARMState,guest_D20)
    467 #define OFFB_D21      offsetof(VexGuestARMState,guest_D21)
    468 #define OFFB_D22      offsetof(VexGuestARMState,guest_D22)
    469 #define OFFB_D23      offsetof(VexGuestARMState,guest_D23)
    470 #define OFFB_D24      offsetof(VexGuestARMState,guest_D24)
    471 #define OFFB_D25      offsetof(VexGuestARMState,guest_D25)
    472 #define OFFB_D26      offsetof(VexGuestARMState,guest_D26)
    473 #define OFFB_D27      offsetof(VexGuestARMState,guest_D27)
    474 #define OFFB_D28      offsetof(VexGuestARMState,guest_D28)
    475 #define OFFB_D29      offsetof(VexGuestARMState,guest_D29)
    476 #define OFFB_D30      offsetof(VexGuestARMState,guest_D30)
    477 #define OFFB_D31      offsetof(VexGuestARMState,guest_D31)
    478 
    479 #define OFFB_FPSCR    offsetof(VexGuestARMState,guest_FPSCR)
    480 #define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO)
    481 #define OFFB_ITSTATE  offsetof(VexGuestARMState,guest_ITSTATE)
    482 #define OFFB_QFLAG32  offsetof(VexGuestARMState,guest_QFLAG32)
    483 #define OFFB_GEFLAG0  offsetof(VexGuestARMState,guest_GEFLAG0)
    484 #define OFFB_GEFLAG1  offsetof(VexGuestARMState,guest_GEFLAG1)
    485 #define OFFB_GEFLAG2  offsetof(VexGuestARMState,guest_GEFLAG2)
    486 #define OFFB_GEFLAG3  offsetof(VexGuestARMState,guest_GEFLAG3)
    487 
    488 #define OFFB_CMSTART  offsetof(VexGuestARMState,guest_CMSTART)
    489 #define OFFB_CMLEN    offsetof(VexGuestARMState,guest_CMLEN)
    490 
    491 
    492 /* ---------------- Integer registers ---------------- */
    493 
    494 static Int integerGuestRegOffset ( UInt iregNo )
    495 {
    496    /* Do we care about endianness here?  We do if sub-parts of integer
    497       registers are accessed, but I don't think that ever happens on
    498       ARM. */
    499    switch (iregNo) {
    500       case 0:  return OFFB_R0;
    501       case 1:  return OFFB_R1;
    502       case 2:  return OFFB_R2;
    503       case 3:  return OFFB_R3;
    504       case 4:  return OFFB_R4;
    505       case 5:  return OFFB_R5;
    506       case 6:  return OFFB_R6;
    507       case 7:  return OFFB_R7;
    508       case 8:  return OFFB_R8;
    509       case 9:  return OFFB_R9;
    510       case 10: return OFFB_R10;
    511       case 11: return OFFB_R11;
    512       case 12: return OFFB_R12;
    513       case 13: return OFFB_R13;
    514       case 14: return OFFB_R14;
    515       case 15: return OFFB_R15T;
    516       default: vassert(0);
    517    }
    518 }
    519 
    520 /* Plain ("low level") read from a reg; no +8 offset magic for r15. */
    521 static IRExpr* llGetIReg ( UInt iregNo )
    522 {
    523    vassert(iregNo < 16);
    524    return IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
    525 }
    526 
    527 /* Architected read from a reg in ARM mode.  This automagically adds 8
    528    to all reads of r15. */
    529 static IRExpr* getIRegA ( UInt iregNo )
    530 {
    531    IRExpr* e;
    532    ASSERT_IS_ARM;
    533    vassert(iregNo < 16);
    534    if (iregNo == 15) {
    535       /* If asked for r15, don't read the guest state value, as that
    536          may not be up to date in the case where loop unrolling has
    537          happened, because the first insn's write to the block is
    538          omitted; hence in the 2nd and subsequent unrollings we don't
    539          have a correct value in guest r15.  Instead produce the
    540          constant that we know would be produced at this point. */
    541       vassert(0 == (guest_R15_curr_instr_notENC & 3));
    542       e = mkU32(guest_R15_curr_instr_notENC + 8);
    543    } else {
    544       e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
    545    }
    546    return e;
    547 }
    548 
    549 /* Architected read from a reg in Thumb mode.  This automagically adds
    550    4 to all reads of r15. */
    551 static IRExpr* getIRegT ( UInt iregNo )
    552 {
    553    IRExpr* e;
    554    ASSERT_IS_THUMB;
    555    vassert(iregNo < 16);
    556    if (iregNo == 15) {
    557       /* Ditto comment in getIReg. */
    558       vassert(0 == (guest_R15_curr_instr_notENC & 1));
    559       e = mkU32(guest_R15_curr_instr_notENC + 4);
    560    } else {
    561       e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
    562    }
    563    return e;
    564 }
    565 
    566 /* Plain ("low level") write to a reg; no jump or alignment magic for
    567    r15. */
    568 static void llPutIReg ( UInt iregNo, IRExpr* e )
    569 {
    570    vassert(iregNo < 16);
    571    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
    572    stmt( IRStmt_Put(integerGuestRegOffset(iregNo), e) );
    573 }
    574 
    575 /* Architected write to an integer register in ARM mode.  If it is to
    576    r15, record info so at the end of this insn's translation, a branch
    577    to it can be made.  Also handles conditional writes to the
    578    register: if guardT == IRTemp_INVALID then the write is
    579    unconditional.  If writing r15, also 4-align it. */
    580 static void putIRegA ( UInt       iregNo,
    581                        IRExpr*    e,
    582                        IRTemp     guardT /* :: Ity_I32, 0 or 1 */,
    583                        IRJumpKind jk /* if a jump is generated */ )
    584 {
    585    /* if writing r15, force e to be 4-aligned. */
    586    // INTERWORKING FIXME.  this needs to be relaxed so that
    587    // puts caused by LDMxx which load r15 interwork right.
    588    // but is no aligned too relaxed?
    589    //if (iregNo == 15)
    590    //   e = binop(Iop_And32, e, mkU32(~3));
    591    ASSERT_IS_ARM;
    592    /* So, generate either an unconditional or a conditional write to
    593       the reg. */
    594    if (guardT == IRTemp_INVALID) {
    595       /* unconditional write */
    596       llPutIReg( iregNo, e );
    597    } else {
    598       llPutIReg( iregNo,
    599                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    600                              e, llGetIReg(iregNo) ));
    601    }
    602    if (iregNo == 15) {
    603       // assert against competing r15 updates.  Shouldn't
    604       // happen; should be ruled out by the instr matching
    605       // logic.
    606       vassert(r15written == False);
    607       vassert(r15guard   == IRTemp_INVALID);
    608       vassert(r15kind    == Ijk_Boring);
    609       r15written = True;
    610       r15guard   = guardT;
    611       r15kind    = jk;
    612    }
    613 }
    614 
    615 
    616 /* Architected write to an integer register in Thumb mode.  Writes to
    617    r15 are not allowed.  Handles conditional writes to the register:
    618    if guardT == IRTemp_INVALID then the write is unconditional. */
    619 static void putIRegT ( UInt       iregNo,
    620                        IRExpr*    e,
    621                        IRTemp     guardT /* :: Ity_I32, 0 or 1 */ )
    622 {
    623    /* So, generate either an unconditional or a conditional write to
    624       the reg. */
    625    ASSERT_IS_THUMB;
    626    vassert(iregNo >= 0 && iregNo <= 14);
    627    if (guardT == IRTemp_INVALID) {
    628       /* unconditional write */
    629       llPutIReg( iregNo, e );
    630    } else {
    631       llPutIReg( iregNo,
    632                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    633                              e, llGetIReg(iregNo) ));
    634    }
    635 }
    636 
    637 
    638 /* Thumb16 and Thumb32 only.
    639    Returns true if reg is 13 or 15.  Implements the BadReg
    640    predicate in the ARM ARM. */
    641 static Bool isBadRegT ( UInt r )
    642 {
    643    vassert(r <= 15);
    644    ASSERT_IS_THUMB;
    645    return r == 13 || r == 15;
    646 }
    647 
    648 
    649 /* ---------------- Double registers ---------------- */
    650 
    651 static Int doubleGuestRegOffset ( UInt dregNo )
    652 {
    653    /* Do we care about endianness here?  Probably do if we ever get
    654       into the situation of dealing with the single-precision VFP
    655       registers. */
    656    switch (dregNo) {
    657       case 0:  return OFFB_D0;
    658       case 1:  return OFFB_D1;
    659       case 2:  return OFFB_D2;
    660       case 3:  return OFFB_D3;
    661       case 4:  return OFFB_D4;
    662       case 5:  return OFFB_D5;
    663       case 6:  return OFFB_D6;
    664       case 7:  return OFFB_D7;
    665       case 8:  return OFFB_D8;
    666       case 9:  return OFFB_D9;
    667       case 10: return OFFB_D10;
    668       case 11: return OFFB_D11;
    669       case 12: return OFFB_D12;
    670       case 13: return OFFB_D13;
    671       case 14: return OFFB_D14;
    672       case 15: return OFFB_D15;
    673       case 16: return OFFB_D16;
    674       case 17: return OFFB_D17;
    675       case 18: return OFFB_D18;
    676       case 19: return OFFB_D19;
    677       case 20: return OFFB_D20;
    678       case 21: return OFFB_D21;
    679       case 22: return OFFB_D22;
    680       case 23: return OFFB_D23;
    681       case 24: return OFFB_D24;
    682       case 25: return OFFB_D25;
    683       case 26: return OFFB_D26;
    684       case 27: return OFFB_D27;
    685       case 28: return OFFB_D28;
    686       case 29: return OFFB_D29;
    687       case 30: return OFFB_D30;
    688       case 31: return OFFB_D31;
    689       default: vassert(0);
    690    }
    691 }
    692 
    693 /* Plain ("low level") read from a VFP Dreg. */
    694 static IRExpr* llGetDReg ( UInt dregNo )
    695 {
    696    vassert(dregNo < 32);
    697    return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_F64 );
    698 }
    699 
    700 /* Architected read from a VFP Dreg. */
    701 static IRExpr* getDReg ( UInt dregNo ) {
    702    return llGetDReg( dregNo );
    703 }
    704 
    705 /* Plain ("low level") write to a VFP Dreg. */
    706 static void llPutDReg ( UInt dregNo, IRExpr* e )
    707 {
    708    vassert(dregNo < 32);
    709    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64);
    710    stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
    711 }
    712 
    713 /* Architected write to a VFP Dreg.  Handles conditional writes to the
    714    register: if guardT == IRTemp_INVALID then the write is
    715    unconditional. */
    716 static void putDReg ( UInt    dregNo,
    717                       IRExpr* e,
    718                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    719 {
    720    /* So, generate either an unconditional or a conditional write to
    721       the reg. */
    722    if (guardT == IRTemp_INVALID) {
    723       /* unconditional write */
    724       llPutDReg( dregNo, e );
    725    } else {
    726       llPutDReg( dregNo,
    727                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    728                              e, llGetDReg(dregNo) ));
    729    }
    730 }
    731 
    732 /* And now exactly the same stuff all over again, but this time
    733    taking/returning I64 rather than F64, to support 64-bit Neon
    734    ops. */
    735 
    736 /* Plain ("low level") read from a Neon Integer Dreg. */
    737 static IRExpr* llGetDRegI64 ( UInt dregNo )
    738 {
    739    vassert(dregNo < 32);
    740    return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_I64 );
    741 }
    742 
    743 /* Architected read from a Neon Integer Dreg. */
    744 static IRExpr* getDRegI64 ( UInt dregNo ) {
    745    return llGetDRegI64( dregNo );
    746 }
    747 
    748 /* Plain ("low level") write to a Neon Integer Dreg. */
    749 static void llPutDRegI64 ( UInt dregNo, IRExpr* e )
    750 {
    751    vassert(dregNo < 32);
    752    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
    753    stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
    754 }
    755 
    756 /* Architected write to a Neon Integer Dreg.  Handles conditional
    757    writes to the register: if guardT == IRTemp_INVALID then the write
    758    is unconditional. */
    759 static void putDRegI64 ( UInt    dregNo,
    760                          IRExpr* e,
    761                          IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    762 {
    763    /* So, generate either an unconditional or a conditional write to
    764       the reg. */
    765    if (guardT == IRTemp_INVALID) {
    766       /* unconditional write */
    767       llPutDRegI64( dregNo, e );
    768    } else {
    769       llPutDRegI64( dregNo,
    770                     IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    771                                 e, llGetDRegI64(dregNo) ));
    772    }
    773 }
    774 
    775 /* ---------------- Quad registers ---------------- */
    776 
    777 static Int quadGuestRegOffset ( UInt qregNo )
    778 {
    779    /* Do we care about endianness here?  Probably do if we ever get
    780       into the situation of dealing with the 64 bit Neon registers. */
    781    switch (qregNo) {
    782       case 0:  return OFFB_D0;
    783       case 1:  return OFFB_D2;
    784       case 2:  return OFFB_D4;
    785       case 3:  return OFFB_D6;
    786       case 4:  return OFFB_D8;
    787       case 5:  return OFFB_D10;
    788       case 6:  return OFFB_D12;
    789       case 7:  return OFFB_D14;
    790       case 8:  return OFFB_D16;
    791       case 9:  return OFFB_D18;
    792       case 10: return OFFB_D20;
    793       case 11: return OFFB_D22;
    794       case 12: return OFFB_D24;
    795       case 13: return OFFB_D26;
    796       case 14: return OFFB_D28;
    797       case 15: return OFFB_D30;
    798       default: vassert(0);
    799    }
    800 }
    801 
    802 /* Plain ("low level") read from a Neon Qreg. */
    803 static IRExpr* llGetQReg ( UInt qregNo )
    804 {
    805    vassert(qregNo < 16);
    806    return IRExpr_Get( quadGuestRegOffset(qregNo), Ity_V128 );
    807 }
    808 
    809 /* Architected read from a Neon Qreg. */
    810 static IRExpr* getQReg ( UInt qregNo ) {
    811    return llGetQReg( qregNo );
    812 }
    813 
    814 /* Plain ("low level") write to a Neon Qreg. */
    815 static void llPutQReg ( UInt qregNo, IRExpr* e )
    816 {
    817    vassert(qregNo < 16);
    818    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
    819    stmt( IRStmt_Put(quadGuestRegOffset(qregNo), e) );
    820 }
    821 
    822 /* Architected write to a Neon Qreg.  Handles conditional writes to the
    823    register: if guardT == IRTemp_INVALID then the write is
    824    unconditional. */
    825 static void putQReg ( UInt    qregNo,
    826                       IRExpr* e,
    827                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    828 {
    829    /* So, generate either an unconditional or a conditional write to
    830       the reg. */
    831    if (guardT == IRTemp_INVALID) {
    832       /* unconditional write */
    833       llPutQReg( qregNo, e );
    834    } else {
    835       llPutQReg( qregNo,
    836                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    837                              e, llGetQReg(qregNo) ));
    838    }
    839 }
    840 
    841 
    842 /* ---------------- Float registers ---------------- */
    843 
    844 static Int floatGuestRegOffset ( UInt fregNo )
    845 {
    846    /* Start with the offset of the containing double, and then correct
    847       for endianness.  Actually this is completely bogus and needs
    848       careful thought. */
    849    Int off;
    850    vassert(fregNo < 32);
    851    off = doubleGuestRegOffset(fregNo >> 1);
    852    if (host_endness == VexEndnessLE) {
    853       if (fregNo & 1)
    854          off += 4;
    855    } else {
    856       vassert(0);
    857    }
    858    return off;
    859 }
    860 
    861 /* Plain ("low level") read from a VFP Freg. */
    862 static IRExpr* llGetFReg ( UInt fregNo )
    863 {
    864    vassert(fregNo < 32);
    865    return IRExpr_Get( floatGuestRegOffset(fregNo), Ity_F32 );
    866 }
    867 
    868 /* Architected read from a VFP Freg. */
    869 static IRExpr* getFReg ( UInt fregNo ) {
    870    return llGetFReg( fregNo );
    871 }
    872 
    873 /* Plain ("low level") write to a VFP Freg. */
    874 static void llPutFReg ( UInt fregNo, IRExpr* e )
    875 {
    876    vassert(fregNo < 32);
    877    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32);
    878    stmt( IRStmt_Put(floatGuestRegOffset(fregNo), e) );
    879 }
    880 
    881 /* Architected write to a VFP Freg.  Handles conditional writes to the
    882    register: if guardT == IRTemp_INVALID then the write is
    883    unconditional. */
    884 static void putFReg ( UInt    fregNo,
    885                       IRExpr* e,
    886                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    887 {
    888    /* So, generate either an unconditional or a conditional write to
    889       the reg. */
    890    if (guardT == IRTemp_INVALID) {
    891       /* unconditional write */
    892       llPutFReg( fregNo, e );
    893    } else {
    894       llPutFReg( fregNo,
    895                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    896                              e, llGetFReg(fregNo) ));
    897    }
    898 }
    899 
    900 
    901 /* ---------------- Misc registers ---------------- */
    902 
    903 static void putMiscReg32 ( UInt    gsoffset,
    904                            IRExpr* e, /* :: Ity_I32 */
    905                            IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    906 {
    907    switch (gsoffset) {
    908       case OFFB_FPSCR:   break;
    909       case OFFB_QFLAG32: break;
    910       case OFFB_GEFLAG0: break;
    911       case OFFB_GEFLAG1: break;
    912       case OFFB_GEFLAG2: break;
    913       case OFFB_GEFLAG3: break;
    914       default: vassert(0); /* awaiting more cases */
    915    }
    916    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
    917 
    918    if (guardT == IRTemp_INVALID) {
    919       /* unconditional write */
    920       stmt(IRStmt_Put(gsoffset, e));
    921    } else {
    922       stmt(IRStmt_Put(
    923          gsoffset,
    924          IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    925                      e, IRExpr_Get(gsoffset, Ity_I32) )
    926       ));
    927    }
    928 }
    929 
    930 static IRTemp get_ITSTATE ( void )
    931 {
    932    ASSERT_IS_THUMB;
    933    IRTemp t = newTemp(Ity_I32);
    934    assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
    935    return t;
    936 }
    937 
    938 static void put_ITSTATE ( IRTemp t )
    939 {
    940    ASSERT_IS_THUMB;
    941    stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
    942 }
    943 
    944 static IRTemp get_QFLAG32 ( void )
    945 {
    946    IRTemp t = newTemp(Ity_I32);
    947    assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
    948    return t;
    949 }
    950 
    951 static void put_QFLAG32 ( IRTemp t, IRTemp condT )
    952 {
    953    putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
    954 }
    955 
    956 /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
    957    Status Register) to indicate that overflow or saturation occurred.
    958    Nb: t must be zero to denote no saturation, and any nonzero
    959    value to indicate saturation. */
    960 static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
    961 {
    962    IRTemp old = get_QFLAG32();
    963    IRTemp nyu = newTemp(Ity_I32);
    964    assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
    965    put_QFLAG32(nyu, condT);
    966 }
    967 
    968 /* Generate code to set APSR.GE[flagNo]. Each fn call sets 1 bit.
    969    flagNo: which flag bit to set [3...0]
    970    lowbits_to_ignore:  0 = look at all 32 bits
    971                        8 = look at top 24 bits only
    972                       16 = look at top 16 bits only
    973                       31 = look at the top bit only
    974    e: input value to be evaluated.
    975    The new value is taken from 'e' with the lowest 'lowbits_to_ignore'
    976    masked out.  If the resulting value is zero then the GE flag is
    977    set to 0; any other value sets the flag to 1. */
    978 static void put_GEFLAG32 ( Int flagNo,            /* 0, 1, 2 or 3 */
    979                            Int lowbits_to_ignore, /* 0, 8, 16 or 31   */
    980                            IRExpr* e,             /* Ity_I32 */
    981                            IRTemp condT )
    982 {
    983    vassert( flagNo >= 0 && flagNo <= 3 );
    984    vassert( lowbits_to_ignore == 0  ||
    985             lowbits_to_ignore == 8  ||
    986             lowbits_to_ignore == 16 ||
    987             lowbits_to_ignore == 31 );
    988    IRTemp masked = newTemp(Ity_I32);
    989    assign(masked, binop(Iop_Shr32, e, mkU8(lowbits_to_ignore)));
    990 
    991    switch (flagNo) {
    992       case 0: putMiscReg32(OFFB_GEFLAG0, mkexpr(masked), condT); break;
    993       case 1: putMiscReg32(OFFB_GEFLAG1, mkexpr(masked), condT); break;
    994       case 2: putMiscReg32(OFFB_GEFLAG2, mkexpr(masked), condT); break;
    995       case 3: putMiscReg32(OFFB_GEFLAG3, mkexpr(masked), condT); break;
    996       default: vassert(0);
    997    }
    998 }
    999 
   1000 /* Return the (32-bit, zero-or-nonzero representation scheme) of
   1001    the specified GE flag. */
   1002 static IRExpr* get_GEFLAG32( Int flagNo /* 0, 1, 2, 3 */ )
   1003 {
   1004    switch (flagNo) {
   1005       case 0: return IRExpr_Get( OFFB_GEFLAG0, Ity_I32 );
   1006       case 1: return IRExpr_Get( OFFB_GEFLAG1, Ity_I32 );
   1007       case 2: return IRExpr_Get( OFFB_GEFLAG2, Ity_I32 );
   1008       case 3: return IRExpr_Get( OFFB_GEFLAG3, Ity_I32 );
   1009       default: vassert(0);
   1010    }
   1011 }
   1012 
   1013 /* Set all 4 GE flags from the given 32-bit value as follows: GE 3 and
   1014    2 are set from bit 31 of the value, and GE 1 and 0 are set from bit
   1015    15 of the value.  All other bits are ignored. */
   1016 static void set_GE_32_10_from_bits_31_15 ( IRTemp t32, IRTemp condT )
   1017 {
   1018    IRTemp ge10 = newTemp(Ity_I32);
   1019    IRTemp ge32 = newTemp(Ity_I32);
   1020    assign(ge10, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
   1021    assign(ge32, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
   1022    put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
   1023    put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
   1024    put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
   1025    put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
   1026 }
   1027 
   1028 
   1029 /* Set all 4 GE flags from the given 32-bit value as follows: GE 3
   1030    from bit 31, GE 2 from bit 23, GE 1 from bit 15, and GE0 from
   1031    bit 7.  All other bits are ignored. */
   1032 static void set_GE_3_2_1_0_from_bits_31_23_15_7 ( IRTemp t32, IRTemp condT )
   1033 {
   1034    IRTemp ge0 = newTemp(Ity_I32);
   1035    IRTemp ge1 = newTemp(Ity_I32);
   1036    IRTemp ge2 = newTemp(Ity_I32);
   1037    IRTemp ge3 = newTemp(Ity_I32);
   1038    assign(ge0, binop(Iop_And32, mkexpr(t32), mkU32(0x00000080)));
   1039    assign(ge1, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
   1040    assign(ge2, binop(Iop_And32, mkexpr(t32), mkU32(0x00800000)));
   1041    assign(ge3, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
   1042    put_GEFLAG32( 0, 0, mkexpr(ge0), condT );
   1043    put_GEFLAG32( 1, 0, mkexpr(ge1), condT );
   1044    put_GEFLAG32( 2, 0, mkexpr(ge2), condT );
   1045    put_GEFLAG32( 3, 0, mkexpr(ge3), condT );
   1046 }
   1047 
   1048 
   1049 /* ---------------- FPSCR stuff ---------------- */
   1050 
   1051 /* Generate IR to get hold of the rounding mode bits in FPSCR, and
   1052    convert them to IR format.  Bind the final result to the
   1053    returned temp. */
   1054 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
   1055 {
   1056    /* The ARMvfp encoding for rounding mode bits is:
   1057          00  to nearest
   1058          01  to +infinity
   1059          10  to -infinity
   1060          11  to zero
   1061       We need to convert that to the IR encoding:
   1062          00  to nearest (the default)
   1063          10  to +infinity
   1064          01  to -infinity
   1065          11  to zero
   1066       Which can be done by swapping bits 0 and 1.
   1067       The rmode bits are at 23:22 in FPSCR.
   1068    */
   1069    IRTemp armEncd = newTemp(Ity_I32);
   1070    IRTemp swapped = newTemp(Ity_I32);
   1071    /* Fish FPSCR[23:22] out, and slide to bottom.  Doesn't matter that
   1072       we don't zero out bits 24 and above, since the assignment to
   1073       'swapped' will mask them out anyway. */
   1074    assign(armEncd,
   1075           binop(Iop_Shr32, IRExpr_Get(OFFB_FPSCR, Ity_I32), mkU8(22)));
   1076    /* Now swap them. */
   1077    assign(swapped,
   1078           binop(Iop_Or32,
   1079                 binop(Iop_And32,
   1080                       binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
   1081                       mkU32(2)),
   1082                 binop(Iop_And32,
   1083                       binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
   1084                       mkU32(1))
   1085          ));
   1086    return swapped;
   1087 }
   1088 
   1089 
   1090 /*------------------------------------------------------------*/
   1091 /*--- Helpers for flag handling and conditional insns      ---*/
   1092 /*------------------------------------------------------------*/
   1093 
   1094 static const HChar* name_ARMCondcode ( ARMCondcode cond )
   1095 {
   1096    switch (cond) {
   1097       case ARMCondEQ:  return "{eq}";
   1098       case ARMCondNE:  return "{ne}";
   1099       case ARMCondHS:  return "{hs}";  // or 'cs'
   1100       case ARMCondLO:  return "{lo}";  // or 'cc'
   1101       case ARMCondMI:  return "{mi}";
   1102       case ARMCondPL:  return "{pl}";
   1103       case ARMCondVS:  return "{vs}";
   1104       case ARMCondVC:  return "{vc}";
   1105       case ARMCondHI:  return "{hi}";
   1106       case ARMCondLS:  return "{ls}";
   1107       case ARMCondGE:  return "{ge}";
   1108       case ARMCondLT:  return "{lt}";
   1109       case ARMCondGT:  return "{gt}";
   1110       case ARMCondLE:  return "{le}";
   1111       case ARMCondAL:  return ""; // {al}: is the default
   1112       case ARMCondNV:  return "{nv}";
   1113       default: vpanic("name_ARMCondcode");
   1114    }
   1115 }
   1116 /* and a handy shorthand for it */
   1117 static const HChar* nCC ( ARMCondcode cond ) {
   1118    return name_ARMCondcode(cond);
   1119 }
   1120 
   1121 
   1122 /* Build IR to calculate some particular condition from stored
   1123    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
   1124    Ity_I32, suitable for narrowing.  Although the return type is
   1125    Ity_I32, the returned value is either 0 or 1.  'cond' must be
   1126    :: Ity_I32 and must denote the condition to compute in
   1127    bits 7:4, and be zero everywhere else.
   1128 */
   1129 static IRExpr* mk_armg_calculate_condition_dyn ( IRExpr* cond )
   1130 {
   1131    vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I32);
   1132    /* And 'cond' had better produce a value in which only bits 7:4 are
   1133       nonzero.  However, obviously we can't assert for that. */
   1134 
   1135    /* So what we're constructing for the first argument is
   1136       "(cond << 4) | stored-operation".
   1137       However, as per comments above, 'cond' must be supplied
   1138       pre-shifted to this function.
   1139 
   1140       This pairing scheme requires that the ARM_CC_OP_ values all fit
   1141       in 4 bits.  Hence we are passing a (COND, OP) pair in the lowest
   1142       8 bits of the first argument. */
   1143    IRExpr** args
   1144       = mkIRExprVec_4(
   1145            binop(Iop_Or32, IRExpr_Get(OFFB_CC_OP, Ity_I32), cond),
   1146            IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1147            IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1148            IRExpr_Get(OFFB_CC_NDEP, Ity_I32)
   1149         );
   1150    IRExpr* call
   1151       = mkIRExprCCall(
   1152            Ity_I32,
   1153            0/*regparm*/,
   1154            "armg_calculate_condition", &armg_calculate_condition,
   1155            args
   1156         );
   1157 
   1158    /* Exclude the requested condition, OP and NDEP from definedness
   1159       checking.  We're only interested in DEP1 and DEP2. */
   1160    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1161    return call;
   1162 }
   1163 
   1164 
   1165 /* Build IR to calculate some particular condition from stored
   1166    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
   1167    Ity_I32, suitable for narrowing.  Although the return type is
   1168    Ity_I32, the returned value is either 0 or 1.
   1169 */
   1170 static IRExpr* mk_armg_calculate_condition ( ARMCondcode cond )
   1171 {
   1172   /* First arg is "(cond << 4) | condition".  This requires that the
   1173      ARM_CC_OP_ values all fit in 4 bits.  Hence we are passing a
   1174      (COND, OP) pair in the lowest 8 bits of the first argument. */
   1175    vassert(cond >= 0 && cond <= 15);
   1176    return mk_armg_calculate_condition_dyn( mkU32(cond << 4) );
   1177 }
   1178 
   1179 
   1180 /* Build IR to calculate just the carry flag from stored
   1181    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
   1182    Ity_I32. */
   1183 static IRExpr* mk_armg_calculate_flag_c ( void )
   1184 {
   1185    IRExpr** args
   1186       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
   1187                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1188                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1189                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
   1190    IRExpr* call
   1191       = mkIRExprCCall(
   1192            Ity_I32,
   1193            0/*regparm*/,
   1194            "armg_calculate_flag_c", &armg_calculate_flag_c,
   1195            args
   1196         );
   1197    /* Exclude OP and NDEP from definedness checking.  We're only
   1198       interested in DEP1 and DEP2. */
   1199    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1200    return call;
   1201 }
   1202 
   1203 
   1204 /* Build IR to calculate just the overflow flag from stored
   1205    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
   1206    Ity_I32. */
   1207 static IRExpr* mk_armg_calculate_flag_v ( void )
   1208 {
   1209    IRExpr** args
   1210       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
   1211                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1212                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1213                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
   1214    IRExpr* call
   1215       = mkIRExprCCall(
   1216            Ity_I32,
   1217            0/*regparm*/,
   1218            "armg_calculate_flag_v", &armg_calculate_flag_v,
   1219            args
   1220         );
   1221    /* Exclude OP and NDEP from definedness checking.  We're only
   1222       interested in DEP1 and DEP2. */
   1223    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1224    return call;
   1225 }
   1226 
   1227 
   1228 /* Build IR to calculate N Z C V in bits 31:28 of the
   1229    returned word. */
   1230 static IRExpr* mk_armg_calculate_flags_nzcv ( void )
   1231 {
   1232    IRExpr** args
   1233       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
   1234                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1235                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1236                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
   1237    IRExpr* call
   1238       = mkIRExprCCall(
   1239            Ity_I32,
   1240            0/*regparm*/,
   1241            "armg_calculate_flags_nzcv", &armg_calculate_flags_nzcv,
   1242            args
   1243         );
   1244    /* Exclude OP and NDEP from definedness checking.  We're only
   1245       interested in DEP1 and DEP2. */
   1246    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1247    return call;
   1248 }
   1249 
   1250 static IRExpr* mk_armg_calculate_flag_qc ( IRExpr* resL, IRExpr* resR, Bool Q )
   1251 {
   1252    IRExpr** args1;
   1253    IRExpr** args2;
   1254    IRExpr *call1, *call2, *res;
   1255 
   1256    if (Q) {
   1257       args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(0)),
   1258                               binop(Iop_GetElem32x4, resL, mkU8(1)),
   1259                               binop(Iop_GetElem32x4, resR, mkU8(0)),
   1260                               binop(Iop_GetElem32x4, resR, mkU8(1)) );
   1261       args2 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(2)),
   1262                               binop(Iop_GetElem32x4, resL, mkU8(3)),
   1263                               binop(Iop_GetElem32x4, resR, mkU8(2)),
   1264                               binop(Iop_GetElem32x4, resR, mkU8(3)) );
   1265    } else {
   1266       args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x2, resL, mkU8(0)),
   1267                               binop(Iop_GetElem32x2, resL, mkU8(1)),
   1268                               binop(Iop_GetElem32x2, resR, mkU8(0)),
   1269                               binop(Iop_GetElem32x2, resR, mkU8(1)) );
   1270    }
   1271 
   1272    call1 = mkIRExprCCall(
   1273              Ity_I32,
   1274              0/*regparm*/,
   1275              "armg_calculate_flag_qc", &armg_calculate_flag_qc,
   1276              args1
   1277           );
   1278    if (Q) {
   1279       call2 = mkIRExprCCall(
   1280                 Ity_I32,
   1281                 0/*regparm*/,
   1282                 "armg_calculate_flag_qc", &armg_calculate_flag_qc,
   1283                 args2
   1284              );
   1285    }
   1286    if (Q) {
   1287       res = binop(Iop_Or32, call1, call2);
   1288    } else {
   1289       res = call1;
   1290    }
   1291    return res;
   1292 }
   1293 
   1294 // FIXME: this is named wrongly .. looks like a sticky set of
   1295 // QC, not a write to it.
   1296 static void setFlag_QC ( IRExpr* resL, IRExpr* resR, Bool Q,
   1297                          IRTemp condT )
   1298 {
   1299    putMiscReg32 (OFFB_FPSCR,
   1300                  binop(Iop_Or32,
   1301                        IRExpr_Get(OFFB_FPSCR, Ity_I32),
   1302                        binop(Iop_Shl32,
   1303                              mk_armg_calculate_flag_qc(resL, resR, Q),
   1304                              mkU8(27))),
   1305                  condT);
   1306 }
   1307 
   1308 /* Build IR to conditionally set the flags thunk.  As with putIReg, if
   1309    guard is IRTemp_INVALID then it's unconditional, else it holds a
   1310    condition :: Ity_I32. */
   1311 static
   1312 void setFlags_D1_D2_ND ( UInt cc_op, IRTemp t_dep1,
   1313                          IRTemp t_dep2, IRTemp t_ndep,
   1314                          IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   1315 {
   1316    vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I32));
   1317    vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I32));
   1318    vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I32));
   1319    vassert(cc_op >= ARMG_CC_OP_COPY && cc_op < ARMG_CC_OP_NUMBER);
   1320    if (guardT == IRTemp_INVALID) {
   1321       /* unconditional */
   1322       stmt( IRStmt_Put( OFFB_CC_OP,   mkU32(cc_op) ));
   1323       stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
   1324       stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
   1325       stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
   1326    } else {
   1327       /* conditional */
   1328       IRTemp c1 = newTemp(Ity_I1);
   1329       assign( c1, binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)) );
   1330       stmt( IRStmt_Put(
   1331                OFFB_CC_OP,
   1332                IRExpr_ITE( mkexpr(c1),
   1333                            mkU32(cc_op),
   1334                            IRExpr_Get(OFFB_CC_OP, Ity_I32) ) ));
   1335       stmt( IRStmt_Put(
   1336                OFFB_CC_DEP1,
   1337                IRExpr_ITE( mkexpr(c1),
   1338                            mkexpr(t_dep1),
   1339                            IRExpr_Get(OFFB_CC_DEP1, Ity_I32) ) ));
   1340       stmt( IRStmt_Put(
   1341                OFFB_CC_DEP2,
   1342                IRExpr_ITE( mkexpr(c1),
   1343                            mkexpr(t_dep2),
   1344                            IRExpr_Get(OFFB_CC_DEP2, Ity_I32) ) ));
   1345       stmt( IRStmt_Put(
   1346                OFFB_CC_NDEP,
   1347                IRExpr_ITE( mkexpr(c1),
   1348                            mkexpr(t_ndep),
   1349                            IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ) ));
   1350    }
   1351 }
   1352 
   1353 
   1354 /* Minor variant of the above that sets NDEP to zero (if it
   1355    sets it at all) */
   1356 static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
   1357                              IRTemp t_dep2,
   1358                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   1359 {
   1360    IRTemp z32 = newTemp(Ity_I32);
   1361    assign( z32, mkU32(0) );
   1362    setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
   1363 }
   1364 
   1365 
   1366 /* Minor variant of the above that sets DEP2 to zero (if it
   1367    sets it at all) */
   1368 static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
   1369                              IRTemp t_ndep,
   1370                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   1371 {
   1372    IRTemp z32 = newTemp(Ity_I32);
   1373    assign( z32, mkU32(0) );
   1374    setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
   1375 }
   1376 
   1377 
   1378 /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
   1379    sets them at all) */
   1380 static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
   1381                           IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   1382 {
   1383    IRTemp z32 = newTemp(Ity_I32);
   1384    assign( z32, mkU32(0) );
   1385    setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
   1386 }
   1387 
   1388 
   1389 /* ARM only */
   1390 /* Generate a side-exit to the next instruction, if the given guard
   1391    expression :: Ity_I32 is 0 (note!  the side exit is taken if the
   1392    condition is false!)  This is used to skip over conditional
   1393    instructions which we can't generate straight-line code for, either
   1394    because they are too complex or (more likely) they potentially
   1395    generate exceptions.
   1396 */
   1397 static void mk_skip_over_A32_if_cond_is_false (
   1398                IRTemp guardT /* :: Ity_I32, 0 or 1 */
   1399             )
   1400 {
   1401    ASSERT_IS_ARM;
   1402    vassert(guardT != IRTemp_INVALID);
   1403    vassert(0 == (guest_R15_curr_instr_notENC & 3));
   1404    stmt( IRStmt_Exit(
   1405             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
   1406             Ijk_Boring,
   1407             IRConst_U32(toUInt(guest_R15_curr_instr_notENC + 4)),
   1408             OFFB_R15T
   1409        ));
   1410 }
   1411 
   1412 /* Thumb16 only */
   1413 /* ditto, but jump over a 16-bit thumb insn */
   1414 static void mk_skip_over_T16_if_cond_is_false (
   1415                IRTemp guardT /* :: Ity_I32, 0 or 1 */
   1416             )
   1417 {
   1418    ASSERT_IS_THUMB;
   1419    vassert(guardT != IRTemp_INVALID);
   1420    vassert(0 == (guest_R15_curr_instr_notENC & 1));
   1421    stmt( IRStmt_Exit(
   1422             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
   1423             Ijk_Boring,
   1424             IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 2) | 1)),
   1425             OFFB_R15T
   1426        ));
   1427 }
   1428 
   1429 
   1430 /* Thumb32 only */
   1431 /* ditto, but jump over a 32-bit thumb insn */
   1432 static void mk_skip_over_T32_if_cond_is_false (
   1433                IRTemp guardT /* :: Ity_I32, 0 or 1 */
   1434             )
   1435 {
   1436    ASSERT_IS_THUMB;
   1437    vassert(guardT != IRTemp_INVALID);
   1438    vassert(0 == (guest_R15_curr_instr_notENC & 1));
   1439    stmt( IRStmt_Exit(
   1440             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
   1441             Ijk_Boring,
   1442             IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 4) | 1)),
   1443             OFFB_R15T
   1444        ));
   1445 }
   1446 
   1447 
   1448 /* Thumb16 and Thumb32 only
   1449    Generate a SIGILL followed by a restart of the current instruction
   1450    if the given temp is nonzero. */
   1451 static void gen_SIGILL_T_if_nonzero ( IRTemp t /* :: Ity_I32 */ )
   1452 {
   1453    ASSERT_IS_THUMB;
   1454    vassert(t != IRTemp_INVALID);
   1455    vassert(0 == (guest_R15_curr_instr_notENC & 1));
   1456    stmt(
   1457       IRStmt_Exit(
   1458          binop(Iop_CmpNE32, mkexpr(t), mkU32(0)),
   1459          Ijk_NoDecode,
   1460          IRConst_U32(toUInt(guest_R15_curr_instr_notENC | 1)),
   1461          OFFB_R15T
   1462       )
   1463    );
   1464 }
   1465 
   1466 
   1467 /* Inspect the old_itstate, and generate a SIGILL if it indicates that
   1468    we are currently in an IT block and are not the last in the block.
   1469    This also rolls back guest_ITSTATE to its old value before the exit
   1470    and restores it to its new value afterwards.  This is so that if
   1471    the exit is taken, we have an up to date version of ITSTATE
   1472    available.  Without doing that, we have no hope of making precise
   1473    exceptions work. */
   1474 static void gen_SIGILL_T_if_in_but_NLI_ITBlock (
   1475                IRTemp old_itstate /* :: Ity_I32 */,
   1476                IRTemp new_itstate /* :: Ity_I32 */
   1477             )
   1478 {
   1479    ASSERT_IS_THUMB;
   1480    put_ITSTATE(old_itstate); // backout
   1481    IRTemp guards_for_next3 = newTemp(Ity_I32);
   1482    assign(guards_for_next3,
   1483           binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
   1484    gen_SIGILL_T_if_nonzero(guards_for_next3);
   1485    put_ITSTATE(new_itstate); //restore
   1486 }
   1487 
   1488 
   1489 /* Simpler version of the above, which generates a SIGILL if
   1490    we're anywhere within an IT block. */
   1491 static void gen_SIGILL_T_if_in_ITBlock (
   1492                IRTemp old_itstate /* :: Ity_I32 */,
   1493                IRTemp new_itstate /* :: Ity_I32 */
   1494             )
   1495 {
   1496    put_ITSTATE(old_itstate); // backout
   1497    gen_SIGILL_T_if_nonzero(old_itstate);
   1498    put_ITSTATE(new_itstate); //restore
   1499 }
   1500 
   1501 
   1502 /* Generate an APSR value, from the NZCV thunk, and
   1503    from QFLAG32 and GEFLAG0 .. GEFLAG3. */
   1504 static IRTemp synthesise_APSR ( void )
   1505 {
   1506    IRTemp res1 = newTemp(Ity_I32);
   1507    // Get NZCV
   1508    assign( res1, mk_armg_calculate_flags_nzcv() );
   1509    // OR in the Q value
   1510    IRTemp res2 = newTemp(Ity_I32);
   1511    assign(
   1512       res2,
   1513       binop(Iop_Or32,
   1514             mkexpr(res1),
   1515             binop(Iop_Shl32,
   1516                   unop(Iop_1Uto32,
   1517                        binop(Iop_CmpNE32,
   1518                              mkexpr(get_QFLAG32()),
   1519                              mkU32(0))),
   1520                   mkU8(ARMG_CC_SHIFT_Q)))
   1521    );
   1522    // OR in GE0 .. GE3
   1523    IRExpr* ge0
   1524       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(0), mkU32(0)));
   1525    IRExpr* ge1
   1526       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(1), mkU32(0)));
   1527    IRExpr* ge2
   1528       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(2), mkU32(0)));
   1529    IRExpr* ge3
   1530       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(3), mkU32(0)));
   1531    IRTemp res3 = newTemp(Ity_I32);
   1532    assign(res3,
   1533           binop(Iop_Or32,
   1534                 mkexpr(res2),
   1535                 binop(Iop_Or32,
   1536                       binop(Iop_Or32,
   1537                             binop(Iop_Shl32, ge0, mkU8(16)),
   1538                             binop(Iop_Shl32, ge1, mkU8(17))),
   1539                       binop(Iop_Or32,
   1540                             binop(Iop_Shl32, ge2, mkU8(18)),
   1541                             binop(Iop_Shl32, ge3, mkU8(19))) )));
   1542    return res3;
   1543 }
   1544 
   1545 
   1546 /* and the inverse transformation: given an APSR value,
   1547    set the NZCV thunk, the Q flag, and the GE flags. */
   1548 static void desynthesise_APSR ( Bool write_nzcvq, Bool write_ge,
   1549                                 IRTemp apsrT, IRTemp condT )
   1550 {
   1551    vassert(write_nzcvq || write_ge);
   1552    if (write_nzcvq) {
   1553       // Do NZCV
   1554       IRTemp immT = newTemp(Ity_I32);
   1555       assign(immT, binop(Iop_And32, mkexpr(apsrT), mkU32(0xF0000000)) );
   1556       setFlags_D1(ARMG_CC_OP_COPY, immT, condT);
   1557       // Do Q
   1558       IRTemp qnewT = newTemp(Ity_I32);
   1559       assign(qnewT, binop(Iop_And32, mkexpr(apsrT), mkU32(ARMG_CC_MASK_Q)));
   1560       put_QFLAG32(qnewT, condT);
   1561    }
   1562    if (write_ge) {
   1563       // Do GE3..0
   1564       put_GEFLAG32(0, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<16)),
   1565                    condT);
   1566       put_GEFLAG32(1, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<17)),
   1567                    condT);
   1568       put_GEFLAG32(2, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<18)),
   1569                    condT);
   1570       put_GEFLAG32(3, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<19)),
   1571                    condT);
   1572    }
   1573 }
   1574 
   1575 
   1576 /*------------------------------------------------------------*/
   1577 /*--- Helpers for saturation                               ---*/
   1578 /*------------------------------------------------------------*/
   1579 
   1580 /* FIXME: absolutely the only diff. between (a) armUnsignedSatQ and
   1581    (b) armSignedSatQ is that in (a) the floor is set to 0, whereas in
   1582    (b) the floor is computed from the value of imm5.  these two fnsn
   1583    should be commoned up. */
   1584 
   1585 /* UnsignedSatQ(): 'clamp' each value so it lies between 0 <= x <= (2^N)-1
   1586    Optionally return flag resQ saying whether saturation occurred.
   1587    See definition in manual, section A2.2.1, page 41
   1588    (bits(N), boolean) UnsignedSatQ( integer i, integer N )
   1589    {
   1590      if ( i > (2^N)-1 ) { result = (2^N)-1; saturated = TRUE; }
   1591      elsif ( i < 0 )    { result = 0; saturated = TRUE; }
   1592      else               { result = i; saturated = FALSE; }
   1593      return ( result<N-1:0>, saturated );
   1594    }
   1595 */
   1596 static void armUnsignedSatQ( IRTemp* res,  /* OUT - Ity_I32 */
   1597                              IRTemp* resQ, /* OUT - Ity_I32  */
   1598                              IRTemp regT,  /* value to clamp - Ity_I32 */
   1599                              UInt imm5 )   /* saturation ceiling */
   1600 {
   1601    UInt ceil  = (1 << imm5) - 1;    // (2^imm5)-1
   1602    UInt floor = 0;
   1603 
   1604    IRTemp nd0 = newTemp(Ity_I32);
   1605    IRTemp nd1 = newTemp(Ity_I32);
   1606    IRTemp nd2 = newTemp(Ity_I1);
   1607    IRTemp nd3 = newTemp(Ity_I32);
   1608    IRTemp nd4 = newTemp(Ity_I32);
   1609    IRTemp nd5 = newTemp(Ity_I1);
   1610    IRTemp nd6 = newTemp(Ity_I32);
   1611 
   1612    assign( nd0, mkexpr(regT) );
   1613    assign( nd1, mkU32(ceil) );
   1614    assign( nd2, binop( Iop_CmpLT32S, mkexpr(nd1), mkexpr(nd0) ) );
   1615    assign( nd3, IRExpr_ITE(mkexpr(nd2), mkexpr(nd1), mkexpr(nd0)) );
   1616    assign( nd4, mkU32(floor) );
   1617    assign( nd5, binop( Iop_CmpLT32S, mkexpr(nd3), mkexpr(nd4) ) );
   1618    assign( nd6, IRExpr_ITE(mkexpr(nd5), mkexpr(nd4), mkexpr(nd3)) );
   1619    assign( *res, mkexpr(nd6) );
   1620 
   1621    /* if saturation occurred, then resQ is set to some nonzero value
   1622       if sat did not occur, resQ is guaranteed to be zero. */
   1623    if (resQ) {
   1624       assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
   1625    }
   1626 }
   1627 
   1628 
   1629 /* SignedSatQ(): 'clamp' each value so it lies between  -2^N <= x <= (2^N) - 1
   1630    Optionally return flag resQ saying whether saturation occurred.
   1631    - see definition in manual, section A2.2.1, page 41
   1632    (bits(N), boolean ) SignedSatQ( integer i, integer N )
   1633    {
   1634      if ( i > 2^(N-1) - 1 )    { result = 2^(N-1) - 1; saturated = TRUE; }
   1635      elsif ( i < -(2^(N-1)) )  { result = -(2^(N-1));  saturated = FALSE; }
   1636      else                      { result = i;           saturated = FALSE; }
   1637      return ( result[N-1:0], saturated );
   1638    }
   1639 */
   1640 static void armSignedSatQ( IRTemp regT,    /* value to clamp - Ity_I32 */
   1641                            UInt imm5,      /* saturation ceiling */
   1642                            IRTemp* res,    /* OUT - Ity_I32 */
   1643                            IRTemp* resQ )  /* OUT - Ity_I32  */
   1644 {
   1645    Int ceil  =  (1 << (imm5-1)) - 1;  //  (2^(imm5-1))-1
   1646    Int floor = -(1 << (imm5-1));      // -(2^(imm5-1))
   1647 
   1648    IRTemp nd0 = newTemp(Ity_I32);
   1649    IRTemp nd1 = newTemp(Ity_I32);
   1650    IRTemp nd2 = newTemp(Ity_I1);
   1651    IRTemp nd3 = newTemp(Ity_I32);
   1652    IRTemp nd4 = newTemp(Ity_I32);
   1653    IRTemp nd5 = newTemp(Ity_I1);
   1654    IRTemp nd6 = newTemp(Ity_I32);
   1655 
   1656    assign( nd0, mkexpr(regT) );
   1657    assign( nd1, mkU32(ceil) );
   1658    assign( nd2, binop( Iop_CmpLT32S, mkexpr(nd1), mkexpr(nd0) ) );
   1659    assign( nd3, IRExpr_ITE( mkexpr(nd2), mkexpr(nd1), mkexpr(nd0) ) );
   1660    assign( nd4, mkU32(floor) );
   1661    assign( nd5, binop( Iop_CmpLT32S, mkexpr(nd3), mkexpr(nd4) ) );
   1662    assign( nd6, IRExpr_ITE( mkexpr(nd5), mkexpr(nd4), mkexpr(nd3) ) );
   1663    assign( *res, mkexpr(nd6) );
   1664 
   1665    /* if saturation occurred, then resQ is set to some nonzero value
   1666       if sat did not occur, resQ is guaranteed to be zero. */
   1667    if (resQ) {
   1668      assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
   1669    }
   1670 }
   1671 
   1672 
   1673 /* Compute a value 0 :: I32 or 1 :: I32, indicating whether signed
   1674    overflow occurred for 32-bit addition.  Needs both args and the
   1675    result.  HD p27. */
   1676 static
   1677 IRExpr* signed_overflow_after_Add32 ( IRExpr* resE,
   1678                                       IRTemp argL, IRTemp argR )
   1679 {
   1680    IRTemp res = newTemp(Ity_I32);
   1681    assign(res, resE);
   1682    return
   1683       binop( Iop_Shr32,
   1684              binop( Iop_And32,
   1685                     binop( Iop_Xor32, mkexpr(res), mkexpr(argL) ),
   1686                     binop( Iop_Xor32, mkexpr(res), mkexpr(argR) )),
   1687              mkU8(31) );
   1688 }
   1689 
   1690 /* Similarly .. also from HD p27 .. */
   1691 static
   1692 IRExpr* signed_overflow_after_Sub32 ( IRExpr* resE,
   1693                                       IRTemp argL, IRTemp argR )
   1694 {
   1695    IRTemp res = newTemp(Ity_I32);
   1696    assign(res, resE);
   1697    return
   1698       binop( Iop_Shr32,
   1699              binop( Iop_And32,
   1700                     binop( Iop_Xor32, mkexpr(argL), mkexpr(argR) ),
   1701                     binop( Iop_Xor32, mkexpr(res),  mkexpr(argL) )),
   1702              mkU8(31) );
   1703 }
   1704 
   1705 
   1706 /*------------------------------------------------------------*/
   1707 /*--- Larger helpers                                       ---*/
   1708 /*------------------------------------------------------------*/
   1709 
   1710 /* Compute both the result and new C flag value for a LSL by an imm5
   1711    or by a register operand.  May generate reads of the old C value
   1712    (hence only safe to use before any writes to guest state happen).
   1713    Are factored out so can be used by both ARM and Thumb.
   1714 
   1715    Note that in compute_result_and_C_after_{LSL,LSR,ASR}_by{imm5,reg},
   1716    "res" (the result)  is a.k.a. "shop", shifter operand
   1717    "newC" (the new C)  is a.k.a. "shco", shifter carry out
   1718 
   1719    The calling convention for res and newC is a bit funny.  They could
   1720    be passed by value, but instead are passed by ref.
   1721 
   1722    The C (shco) value computed must be zero in bits 31:1, as the IR
   1723    optimisations for flag handling (guest_arm_spechelper) rely on
   1724    that, and the slow-path handlers (armg_calculate_flags_nzcv) assert
   1725    for it.  Same applies to all these functions that compute shco
   1726    after a shift or rotate, not just this one.
   1727 */
   1728 
   1729 static void compute_result_and_C_after_LSL_by_imm5 (
   1730                /*OUT*/HChar* buf,
   1731                IRTemp* res,
   1732                IRTemp* newC,
   1733                IRTemp rMt, UInt shift_amt, /* operands */
   1734                UInt rM      /* only for debug printing */
   1735             )
   1736 {
   1737    if (shift_amt == 0) {
   1738       if (newC) {
   1739          assign( *newC, mk_armg_calculate_flag_c() );
   1740       }
   1741       assign( *res, mkexpr(rMt) );
   1742       DIS(buf, "r%u", rM);
   1743    } else {
   1744       vassert(shift_amt >= 1 && shift_amt <= 31);
   1745       if (newC) {
   1746          assign( *newC,
   1747                  binop(Iop_And32,
   1748                        binop(Iop_Shr32, mkexpr(rMt),
   1749                                         mkU8(32 - shift_amt)),
   1750                        mkU32(1)));
   1751       }
   1752       assign( *res,
   1753               binop(Iop_Shl32, mkexpr(rMt), mkU8(shift_amt)) );
   1754       DIS(buf, "r%u, LSL #%u", rM, shift_amt);
   1755    }
   1756 }
   1757 
   1758 
   1759 static void compute_result_and_C_after_LSL_by_reg (
   1760                /*OUT*/HChar* buf,
   1761                IRTemp* res,
   1762                IRTemp* newC,
   1763                IRTemp rMt, IRTemp rSt,  /* operands */
   1764                UInt rM,    UInt rS      /* only for debug printing */
   1765             )
   1766 {
   1767    // shift left in range 0 .. 255
   1768    // amt  = rS & 255
   1769    // res  = amt < 32 ?  Rm << amt  : 0
   1770    // newC = amt == 0     ? oldC  :
   1771    //        amt in 1..32 ?  Rm[32-amt]  : 0
   1772    IRTemp amtT = newTemp(Ity_I32);
   1773    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   1774    if (newC) {
   1775       /* mux0X(amt == 0,
   1776                mux0X(amt < 32,
   1777                      0,
   1778                      Rm[(32-amt) & 31]),
   1779                oldC)
   1780       */
   1781       /* About the best you can do is pray that iropt is able
   1782          to nuke most or all of the following junk. */
   1783       IRTemp oldC = newTemp(Ity_I32);
   1784       assign(oldC, mk_armg_calculate_flag_c() );
   1785       assign(
   1786          *newC,
   1787          IRExpr_ITE(
   1788             binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
   1789             mkexpr(oldC),
   1790             IRExpr_ITE(
   1791                binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
   1792                binop(Iop_And32,
   1793                      binop(Iop_Shr32,
   1794                            mkexpr(rMt),
   1795                            unop(Iop_32to8,
   1796                                 binop(Iop_And32,
   1797                                       binop(Iop_Sub32,
   1798                                             mkU32(32),
   1799                                             mkexpr(amtT)),
   1800                                       mkU32(31)
   1801                                 )
   1802                            )
   1803                      ),
   1804                      mkU32(1)
   1805                      ),
   1806                mkU32(0)
   1807             )
   1808          )
   1809       );
   1810    }
   1811    // (Rm << (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
   1812    // Lhs of the & limits the shift to 31 bits, so as to
   1813    // give known IR semantics.  Rhs of the & is all 1s for
   1814    // Rs <= 31 and all 0s for Rs >= 32.
   1815    assign(
   1816       *res,
   1817       binop(
   1818          Iop_And32,
   1819          binop(Iop_Shl32,
   1820                mkexpr(rMt),
   1821                unop(Iop_32to8,
   1822                     binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
   1823          binop(Iop_Sar32,
   1824                binop(Iop_Sub32,
   1825                      mkexpr(amtT),
   1826                      mkU32(32)),
   1827                mkU8(31))));
   1828     DIS(buf, "r%u, LSL r%u", rM, rS);
   1829 }
   1830 
   1831 
   1832 static void compute_result_and_C_after_LSR_by_imm5 (
   1833                /*OUT*/HChar* buf,
   1834                IRTemp* res,
   1835                IRTemp* newC,
   1836                IRTemp rMt, UInt shift_amt, /* operands */
   1837                UInt rM      /* only for debug printing */
   1838             )
   1839 {
   1840    if (shift_amt == 0) {
   1841       // conceptually a 32-bit shift, however:
   1842       // res  = 0
   1843       // newC = Rm[31]
   1844       if (newC) {
   1845          assign( *newC,
   1846                  binop(Iop_And32,
   1847                        binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
   1848                        mkU32(1)));
   1849       }
   1850       assign( *res, mkU32(0) );
   1851       DIS(buf, "r%u, LSR #0(a.k.a. 32)", rM);
   1852    } else {
   1853       // shift in range 1..31
   1854       // res  = Rm >>u shift_amt
   1855       // newC = Rm[shift_amt - 1]
   1856       vassert(shift_amt >= 1 && shift_amt <= 31);
   1857       if (newC) {
   1858          assign( *newC,
   1859                  binop(Iop_And32,
   1860                        binop(Iop_Shr32, mkexpr(rMt),
   1861                                         mkU8(shift_amt - 1)),
   1862                        mkU32(1)));
   1863       }
   1864       assign( *res,
   1865               binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)) );
   1866       DIS(buf, "r%u, LSR #%u", rM, shift_amt);
   1867    }
   1868 }
   1869 
   1870 
   1871 static void compute_result_and_C_after_LSR_by_reg (
   1872                /*OUT*/HChar* buf,
   1873                IRTemp* res,
   1874                IRTemp* newC,
   1875                IRTemp rMt, IRTemp rSt,  /* operands */
   1876                UInt rM,    UInt rS      /* only for debug printing */
   1877             )
   1878 {
   1879    // shift right in range 0 .. 255
   1880    // amt = rS & 255
   1881    // res  = amt < 32 ?  Rm >>u amt  : 0
   1882    // newC = amt == 0     ? oldC  :
   1883    //        amt in 1..32 ?  Rm[amt-1]  : 0
   1884    IRTemp amtT = newTemp(Ity_I32);
   1885    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   1886    if (newC) {
   1887       /* mux0X(amt == 0,
   1888                mux0X(amt < 32,
   1889                      0,
   1890                      Rm[(amt-1) & 31]),
   1891                oldC)
   1892       */
   1893       IRTemp oldC = newTemp(Ity_I32);
   1894       assign(oldC, mk_armg_calculate_flag_c() );
   1895       assign(
   1896          *newC,
   1897          IRExpr_ITE(
   1898             binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
   1899             mkexpr(oldC),
   1900             IRExpr_ITE(
   1901                binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
   1902                binop(Iop_And32,
   1903                      binop(Iop_Shr32,
   1904                            mkexpr(rMt),
   1905                            unop(Iop_32to8,
   1906                                 binop(Iop_And32,
   1907                                       binop(Iop_Sub32,
   1908                                             mkexpr(amtT),
   1909                                             mkU32(1)),
   1910                                       mkU32(31)
   1911                                 )
   1912                            )
   1913                      ),
   1914                      mkU32(1)
   1915                      ),
   1916                mkU32(0)
   1917             )
   1918          )
   1919       );
   1920    }
   1921    // (Rm >>u (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
   1922    // Lhs of the & limits the shift to 31 bits, so as to
   1923    // give known IR semantics.  Rhs of the & is all 1s for
   1924    // Rs <= 31 and all 0s for Rs >= 32.
   1925    assign(
   1926       *res,
   1927       binop(
   1928          Iop_And32,
   1929          binop(Iop_Shr32,
   1930                mkexpr(rMt),
   1931                unop(Iop_32to8,
   1932                     binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
   1933          binop(Iop_Sar32,
   1934                binop(Iop_Sub32,
   1935                      mkexpr(amtT),
   1936                      mkU32(32)),
   1937                mkU8(31))));
   1938     DIS(buf, "r%u, LSR r%u", rM, rS);
   1939 }
   1940 
   1941 
   1942 static void compute_result_and_C_after_ASR_by_imm5 (
   1943                /*OUT*/HChar* buf,
   1944                IRTemp* res,
   1945                IRTemp* newC,
   1946                IRTemp rMt, UInt shift_amt, /* operands */
   1947                UInt rM      /* only for debug printing */
   1948             )
   1949 {
   1950    if (shift_amt == 0) {
   1951       // conceptually a 32-bit shift, however:
   1952       // res  = Rm >>s 31
   1953       // newC = Rm[31]
   1954       if (newC) {
   1955          assign( *newC,
   1956                  binop(Iop_And32,
   1957                        binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
   1958                        mkU32(1)));
   1959       }
   1960       assign( *res, binop(Iop_Sar32, mkexpr(rMt), mkU8(31)) );
   1961       DIS(buf, "r%u, ASR #0(a.k.a. 32)", rM);
   1962    } else {
   1963       // shift in range 1..31
   1964       // res = Rm >>s shift_amt
   1965       // newC = Rm[shift_amt - 1]
   1966       vassert(shift_amt >= 1 && shift_amt <= 31);
   1967       if (newC) {
   1968          assign( *newC,
   1969                  binop(Iop_And32,
   1970                        binop(Iop_Shr32, mkexpr(rMt),
   1971                                         mkU8(shift_amt - 1)),
   1972                        mkU32(1)));
   1973       }
   1974       assign( *res,
   1975               binop(Iop_Sar32, mkexpr(rMt), mkU8(shift_amt)) );
   1976       DIS(buf, "r%u, ASR #%u", rM, shift_amt);
   1977    }
   1978 }
   1979 
   1980 
   1981 static void compute_result_and_C_after_ASR_by_reg (
   1982                /*OUT*/HChar* buf,
   1983                IRTemp* res,
   1984                IRTemp* newC,
   1985                IRTemp rMt, IRTemp rSt,  /* operands */
   1986                UInt rM,    UInt rS      /* only for debug printing */
   1987             )
   1988 {
   1989    // arithmetic shift right in range 0 .. 255
   1990    // amt = rS & 255
   1991    // res  = amt < 32 ?  Rm >>s amt  : Rm >>s 31
   1992    // newC = amt == 0     ? oldC  :
   1993    //        amt in 1..32 ?  Rm[amt-1]  : Rm[31]
   1994    IRTemp amtT = newTemp(Ity_I32);
   1995    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   1996    if (newC) {
   1997       /* mux0X(amt == 0,
   1998                mux0X(amt < 32,
   1999                      Rm[31],
   2000                      Rm[(amt-1) & 31])
   2001                oldC)
   2002       */
   2003       IRTemp oldC = newTemp(Ity_I32);
   2004       assign(oldC, mk_armg_calculate_flag_c() );
   2005       assign(
   2006          *newC,
   2007          IRExpr_ITE(
   2008             binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
   2009             mkexpr(oldC),
   2010             IRExpr_ITE(
   2011                binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
   2012                binop(Iop_And32,
   2013                      binop(Iop_Shr32,
   2014                            mkexpr(rMt),
   2015                            unop(Iop_32to8,
   2016                                 binop(Iop_And32,
   2017                                       binop(Iop_Sub32,
   2018                                             mkexpr(amtT),
   2019                                             mkU32(1)),
   2020                                       mkU32(31)
   2021                                 )
   2022                            )
   2023                      ),
   2024                      mkU32(1)
   2025                      ),
   2026                binop(Iop_And32,
   2027                      binop(Iop_Shr32,
   2028                            mkexpr(rMt),
   2029                            mkU8(31)
   2030                      ),
   2031                      mkU32(1)
   2032                )
   2033             )
   2034          )
   2035       );
   2036    }
   2037    // (Rm >>s (amt <u 32 ? amt : 31))
   2038    assign(
   2039       *res,
   2040       binop(
   2041          Iop_Sar32,
   2042          mkexpr(rMt),
   2043          unop(
   2044             Iop_32to8,
   2045             IRExpr_ITE(
   2046                binop(Iop_CmpLT32U, mkexpr(amtT), mkU32(32)),
   2047                mkexpr(amtT),
   2048                mkU32(31)))));
   2049     DIS(buf, "r%u, ASR r%u", rM, rS);
   2050 }
   2051 
   2052 
   2053 static void compute_result_and_C_after_ROR_by_reg (
   2054                /*OUT*/HChar* buf,
   2055                IRTemp* res,
   2056                IRTemp* newC,
   2057                IRTemp rMt, IRTemp rSt,  /* operands */
   2058                UInt rM,    UInt rS      /* only for debug printing */
   2059             )
   2060 {
   2061    // rotate right in range 0 .. 255
   2062    // amt = rS & 255
   2063    // shop =  Rm `ror` (amt & 31)
   2064    // shco =  amt == 0 ? oldC : Rm[(amt-1) & 31]
   2065    IRTemp amtT = newTemp(Ity_I32);
   2066    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   2067    IRTemp amt5T = newTemp(Ity_I32);
   2068    assign( amt5T, binop(Iop_And32, mkexpr(rSt), mkU32(31)) );
   2069    IRTemp oldC = newTemp(Ity_I32);
   2070    assign(oldC, mk_armg_calculate_flag_c() );
   2071    if (newC) {
   2072       assign(
   2073          *newC,
   2074          IRExpr_ITE(
   2075             binop(Iop_CmpNE32, mkexpr(amtT), mkU32(0)),
   2076             binop(Iop_And32,
   2077                   binop(Iop_Shr32,
   2078                         mkexpr(rMt),
   2079                         unop(Iop_32to8,
   2080                              binop(Iop_And32,
   2081                                    binop(Iop_Sub32,
   2082                                          mkexpr(amtT),
   2083                                          mkU32(1)
   2084                                    ),
   2085                                    mkU32(31)
   2086                              )
   2087                         )
   2088                   ),
   2089                   mkU32(1)
   2090             ),
   2091             mkexpr(oldC)
   2092          )
   2093       );
   2094    }
   2095    assign(
   2096       *res,
   2097       IRExpr_ITE(
   2098          binop(Iop_CmpNE32, mkexpr(amt5T), mkU32(0)),
   2099          binop(Iop_Or32,
   2100                binop(Iop_Shr32,
   2101                      mkexpr(rMt),
   2102                      unop(Iop_32to8, mkexpr(amt5T))
   2103                ),
   2104                binop(Iop_Shl32,
   2105                      mkexpr(rMt),
   2106                      unop(Iop_32to8,
   2107                           binop(Iop_Sub32, mkU32(32), mkexpr(amt5T))
   2108                      )
   2109                )
   2110                ),
   2111          mkexpr(rMt)
   2112       )
   2113    );
   2114    DIS(buf, "r%u, ROR r#%u", rM, rS);
   2115 }
   2116 
   2117 
   2118 /* Generate an expression corresponding to the immediate-shift case of
   2119    a shifter operand.  This is used both for ARM and Thumb2.
   2120 
   2121    Bind it to a temporary, and return that via *res.  If newC is
   2122    non-NULL, also compute a value for the shifter's carry out (in the
   2123    LSB of a word), bind it to a temporary, and return that via *shco.
   2124 
   2125    Generates GETs from the guest state and is therefore not safe to
   2126    use once we start doing PUTs to it, for any given instruction.
   2127 
   2128    'how' is encoded thusly:
   2129       00b LSL,  01b LSR,  10b ASR,  11b ROR
   2130    Most but not all ARM and Thumb integer insns use this encoding.
   2131    Be careful to ensure the right value is passed here.
   2132 */
   2133 static void compute_result_and_C_after_shift_by_imm5 (
   2134                /*OUT*/HChar* buf,
   2135                /*OUT*/IRTemp* res,
   2136                /*OUT*/IRTemp* newC,
   2137                IRTemp  rMt,       /* reg to shift */
   2138                UInt    how,       /* what kind of shift */
   2139                UInt    shift_amt, /* shift amount (0..31) */
   2140                UInt    rM         /* only for debug printing */
   2141             )
   2142 {
   2143    vassert(shift_amt < 32);
   2144    vassert(how < 4);
   2145 
   2146    switch (how) {
   2147 
   2148       case 0:
   2149          compute_result_and_C_after_LSL_by_imm5(
   2150             buf, res, newC, rMt, shift_amt, rM
   2151          );
   2152          break;
   2153 
   2154       case 1:
   2155          compute_result_and_C_after_LSR_by_imm5(
   2156             buf, res, newC, rMt, shift_amt, rM
   2157          );
   2158          break;
   2159 
   2160       case 2:
   2161          compute_result_and_C_after_ASR_by_imm5(
   2162             buf, res, newC, rMt, shift_amt, rM
   2163          );
   2164          break;
   2165 
   2166       case 3:
   2167          if (shift_amt == 0) {
   2168             IRTemp oldcT = newTemp(Ity_I32);
   2169             // rotate right 1 bit through carry (?)
   2170             // RRX -- described at ARM ARM A5-17
   2171             // res  = (oldC << 31) | (Rm >>u 1)
   2172             // newC = Rm[0]
   2173             if (newC) {
   2174                assign( *newC,
   2175                        binop(Iop_And32, mkexpr(rMt), mkU32(1)));
   2176             }
   2177             assign( oldcT, mk_armg_calculate_flag_c() );
   2178             assign( *res,
   2179                     binop(Iop_Or32,
   2180                           binop(Iop_Shl32, mkexpr(oldcT), mkU8(31)),
   2181                           binop(Iop_Shr32, mkexpr(rMt), mkU8(1))) );
   2182             DIS(buf, "r%u, RRX", rM);
   2183          } else {
   2184             // rotate right in range 1..31
   2185             // res  = Rm `ror` shift_amt
   2186             // newC = Rm[shift_amt - 1]
   2187             vassert(shift_amt >= 1 && shift_amt <= 31);
   2188             if (newC) {
   2189                assign( *newC,
   2190                        binop(Iop_And32,
   2191                              binop(Iop_Shr32, mkexpr(rMt),
   2192                                               mkU8(shift_amt - 1)),
   2193                              mkU32(1)));
   2194             }
   2195             assign( *res,
   2196                     binop(Iop_Or32,
   2197                           binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)),
   2198                           binop(Iop_Shl32, mkexpr(rMt),
   2199                                            mkU8(32-shift_amt))));
   2200             DIS(buf, "r%u, ROR #%u", rM, shift_amt);
   2201          }
   2202          break;
   2203 
   2204       default:
   2205          /*NOTREACHED*/
   2206          vassert(0);
   2207    }
   2208 }
   2209 
   2210 
   2211 /* Generate an expression corresponding to the register-shift case of
   2212    a shifter operand.  This is used both for ARM and Thumb2.
   2213 
   2214    Bind it to a temporary, and return that via *res.  If newC is
   2215    non-NULL, also compute a value for the shifter's carry out (in the
   2216    LSB of a word), bind it to a temporary, and return that via *shco.
   2217 
   2218    Generates GETs from the guest state and is therefore not safe to
   2219    use once we start doing PUTs to it, for any given instruction.
   2220 
   2221    'how' is encoded thusly:
   2222       00b LSL,  01b LSR,  10b ASR,  11b ROR
   2223    Most but not all ARM and Thumb integer insns use this encoding.
   2224    Be careful to ensure the right value is passed here.
   2225 */
   2226 static void compute_result_and_C_after_shift_by_reg (
   2227                /*OUT*/HChar*  buf,
   2228                /*OUT*/IRTemp* res,
   2229                /*OUT*/IRTemp* newC,
   2230                IRTemp  rMt,       /* reg to shift */
   2231                UInt    how,       /* what kind of shift */
   2232                IRTemp  rSt,       /* shift amount */
   2233                UInt    rM,        /* only for debug printing */
   2234                UInt    rS         /* only for debug printing */
   2235             )
   2236 {
   2237    vassert(how < 4);
   2238    switch (how) {
   2239       case 0: { /* LSL */
   2240          compute_result_and_C_after_LSL_by_reg(
   2241             buf, res, newC, rMt, rSt, rM, rS
   2242          );
   2243          break;
   2244       }
   2245       case 1: { /* LSR */
   2246          compute_result_and_C_after_LSR_by_reg(
   2247             buf, res, newC, rMt, rSt, rM, rS
   2248          );
   2249          break;
   2250       }
   2251       case 2: { /* ASR */
   2252          compute_result_and_C_after_ASR_by_reg(
   2253             buf, res, newC, rMt, rSt, rM, rS
   2254          );
   2255          break;
   2256       }
   2257       case 3: { /* ROR */
   2258          compute_result_and_C_after_ROR_by_reg(
   2259              buf, res, newC, rMt, rSt, rM, rS
   2260          );
   2261          break;
   2262       }
   2263       default:
   2264          /*NOTREACHED*/
   2265          vassert(0);
   2266    }
   2267 }
   2268 
   2269 
   2270 /* Generate an expression corresponding to a shifter_operand, bind it
   2271    to a temporary, and return that via *shop.  If shco is non-NULL,
   2272    also compute a value for the shifter's carry out (in the LSB of a
   2273    word), bind it to a temporary, and return that via *shco.
   2274 
   2275    If for some reason we can't come up with a shifter operand (missing
   2276    case?  not really a shifter operand?) return False.
   2277 
   2278    Generates GETs from the guest state and is therefore not safe to
   2279    use once we start doing PUTs to it, for any given instruction.
   2280 
   2281    For ARM insns only; not for Thumb.
   2282 */
   2283 static Bool mk_shifter_operand ( UInt insn_25, UInt insn_11_0,
   2284                                  /*OUT*/IRTemp* shop,
   2285                                  /*OUT*/IRTemp* shco,
   2286                                  /*OUT*/HChar* buf )
   2287 {
   2288    UInt insn_4 = (insn_11_0 >> 4) & 1;
   2289    UInt insn_7 = (insn_11_0 >> 7) & 1;
   2290    vassert(insn_25 <= 0x1);
   2291    vassert(insn_11_0 <= 0xFFF);
   2292 
   2293    vassert(shop && *shop == IRTemp_INVALID);
   2294    *shop = newTemp(Ity_I32);
   2295 
   2296    if (shco) {
   2297       vassert(*shco == IRTemp_INVALID);
   2298       *shco = newTemp(Ity_I32);
   2299    }
   2300 
   2301    /* 32-bit immediate */
   2302 
   2303    if (insn_25 == 1) {
   2304       /* immediate: (7:0) rotated right by 2 * (11:8) */
   2305       UInt imm = (insn_11_0 >> 0) & 0xFF;
   2306       UInt rot = 2 * ((insn_11_0 >> 8) & 0xF);
   2307       vassert(rot <= 30);
   2308       imm = ROR32(imm, rot);
   2309       if (shco) {
   2310          if (rot == 0) {
   2311             assign( *shco, mk_armg_calculate_flag_c() );
   2312          } else {
   2313             assign( *shco, mkU32( (imm >> 31) & 1 ) );
   2314          }
   2315       }
   2316       DIS(buf, "#0x%x", imm);
   2317       assign( *shop, mkU32(imm) );
   2318       return True;
   2319    }
   2320 
   2321    /* Shift/rotate by immediate */
   2322 
   2323    if (insn_25 == 0 && insn_4 == 0) {
   2324       /* Rm (3:0) shifted (6:5) by immediate (11:7) */
   2325       UInt shift_amt = (insn_11_0 >> 7) & 0x1F;
   2326       UInt rM        = (insn_11_0 >> 0) & 0xF;
   2327       UInt how       = (insn_11_0 >> 5) & 3;
   2328       /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
   2329       IRTemp rMt = newTemp(Ity_I32);
   2330       assign(rMt, getIRegA(rM));
   2331 
   2332       vassert(shift_amt <= 31);
   2333 
   2334       compute_result_and_C_after_shift_by_imm5(
   2335          buf, shop, shco, rMt, how, shift_amt, rM
   2336       );
   2337       return True;
   2338    }
   2339 
   2340    /* Shift/rotate by register */
   2341    if (insn_25 == 0 && insn_4 == 1) {
   2342       /* Rm (3:0) shifted (6:5) by Rs (11:8) */
   2343       UInt rM  = (insn_11_0 >> 0) & 0xF;
   2344       UInt rS  = (insn_11_0 >> 8) & 0xF;
   2345       UInt how = (insn_11_0 >> 5) & 3;
   2346       /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
   2347       IRTemp rMt = newTemp(Ity_I32);
   2348       IRTemp rSt = newTemp(Ity_I32);
   2349 
   2350       if (insn_7 == 1)
   2351          return False; /* not really a shifter operand */
   2352 
   2353       assign(rMt, getIRegA(rM));
   2354       assign(rSt, getIRegA(rS));
   2355 
   2356       compute_result_and_C_after_shift_by_reg(
   2357          buf, shop, shco, rMt, how, rSt, rM, rS
   2358       );
   2359       return True;
   2360    }
   2361 
   2362    vex_printf("mk_shifter_operand(0x%x,0x%x)\n", insn_25, insn_11_0 );
   2363    return False;
   2364 }
   2365 
   2366 
   2367 /* ARM only */
   2368 static
   2369 IRExpr* mk_EA_reg_plusminus_imm12 ( UInt rN, UInt bU, UInt imm12,
   2370                                     /*OUT*/HChar* buf )
   2371 {
   2372    vassert(rN < 16);
   2373    vassert(bU < 2);
   2374    vassert(imm12 < 0x1000);
   2375    HChar opChar = bU == 1 ? '+' : '-';
   2376    DIS(buf, "[r%u, #%c%u]", rN, opChar, imm12);
   2377    return
   2378       binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
   2379              getIRegA(rN),
   2380              mkU32(imm12) );
   2381 }
   2382 
   2383 
   2384 /* ARM only.
   2385    NB: This is "DecodeImmShift" in newer versions of the the ARM ARM.
   2386 */
   2387 static
   2388 IRExpr* mk_EA_reg_plusminus_shifted_reg ( UInt rN, UInt bU, UInt rM,
   2389                                           UInt sh2, UInt imm5,
   2390                                           /*OUT*/HChar* buf )
   2391 {
   2392    vassert(rN < 16);
   2393    vassert(bU < 2);
   2394    vassert(rM < 16);
   2395    vassert(sh2 < 4);
   2396    vassert(imm5 < 32);
   2397    HChar   opChar = bU == 1 ? '+' : '-';
   2398    IRExpr* index  = NULL;
   2399    switch (sh2) {
   2400       case 0: /* LSL */
   2401          /* imm5 can be in the range 0 .. 31 inclusive. */
   2402          index = binop(Iop_Shl32, getIRegA(rM), mkU8(imm5));
   2403          DIS(buf, "[r%u, %c r%u LSL #%u]", rN, opChar, rM, imm5);
   2404          break;
   2405       case 1: /* LSR */
   2406          if (imm5 == 0) {
   2407             index = mkU32(0);
   2408             vassert(0); // ATC
   2409          } else {
   2410             index = binop(Iop_Shr32, getIRegA(rM), mkU8(imm5));
   2411          }
   2412          DIS(buf, "[r%u, %cr%u, LSR #%u]",
   2413                   rN, opChar, rM, imm5 == 0 ? 32 : imm5);
   2414          break;
   2415       case 2: /* ASR */
   2416          /* Doesn't this just mean that the behaviour with imm5 == 0
   2417             is the same as if it had been 31 ? */
   2418          if (imm5 == 0) {
   2419             index = binop(Iop_Sar32, getIRegA(rM), mkU8(31));
   2420             vassert(0); // ATC
   2421          } else {
   2422             index = binop(Iop_Sar32, getIRegA(rM), mkU8(imm5));
   2423          }
   2424          DIS(buf, "[r%u, %cr%u, ASR #%u]",
   2425                   rN, opChar, rM, imm5 == 0 ? 32 : imm5);
   2426          break;
   2427       case 3: /* ROR or RRX */
   2428          if (imm5 == 0) {
   2429             IRTemp rmT    = newTemp(Ity_I32);
   2430             IRTemp cflagT = newTemp(Ity_I32);
   2431             assign(rmT, getIRegA(rM));
   2432             assign(cflagT, mk_armg_calculate_flag_c());
   2433             index = binop(Iop_Or32,
   2434                           binop(Iop_Shl32, mkexpr(cflagT), mkU8(31)),
   2435                           binop(Iop_Shr32, mkexpr(rmT), mkU8(1)));
   2436             DIS(buf, "[r%u, %cr%u, RRX]", rN, opChar, rM);
   2437          } else {
   2438             IRTemp rmT = newTemp(Ity_I32);
   2439             assign(rmT, getIRegA(rM));
   2440             vassert(imm5 >= 1 && imm5 <= 31);
   2441             index = binop(Iop_Or32,
   2442                           binop(Iop_Shl32, mkexpr(rmT), mkU8(32-imm5)),
   2443                           binop(Iop_Shr32, mkexpr(rmT), mkU8(imm5)));
   2444             DIS(buf, "[r%u, %cr%u, ROR #%u]", rN, opChar, rM, imm5);
   2445          }
   2446          break;
   2447       default:
   2448          vassert(0);
   2449    }
   2450    vassert(index);
   2451    return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
   2452                 getIRegA(rN), index);
   2453 }
   2454 
   2455 
   2456 /* ARM only */
   2457 static
   2458 IRExpr* mk_EA_reg_plusminus_imm8 ( UInt rN, UInt bU, UInt imm8,
   2459                                    /*OUT*/HChar* buf )
   2460 {
   2461    vassert(rN < 16);
   2462    vassert(bU < 2);
   2463    vassert(imm8 < 0x100);
   2464    HChar opChar = bU == 1 ? '+' : '-';
   2465    DIS(buf, "[r%u, #%c%u]", rN, opChar, imm8);
   2466    return
   2467       binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
   2468              getIRegA(rN),
   2469              mkU32(imm8) );
   2470 }
   2471 
   2472 
   2473 /* ARM only */
   2474 static
   2475 IRExpr* mk_EA_reg_plusminus_reg ( UInt rN, UInt bU, UInt rM,
   2476                                   /*OUT*/HChar* buf )
   2477 {
   2478    vassert(rN < 16);
   2479    vassert(bU < 2);
   2480    vassert(rM < 16);
   2481    HChar   opChar = bU == 1 ? '+' : '-';
   2482    IRExpr* index  = getIRegA(rM);
   2483    DIS(buf, "[r%u, %c r%u]", rN, opChar, rM);
   2484    return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
   2485                 getIRegA(rN), index);
   2486 }
   2487 
   2488 
   2489 /* irRes :: Ity_I32 holds a floating point comparison result encoded
   2490    as an IRCmpF64Result.  Generate code to convert it to an
   2491    ARM-encoded (N,Z,C,V) group in the lowest 4 bits of an I32 value.
   2492    Assign a new temp to hold that value, and return the temp. */
   2493 static
   2494 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes )
   2495 {
   2496    IRTemp ix       = newTemp(Ity_I32);
   2497    IRTemp termL    = newTemp(Ity_I32);
   2498    IRTemp termR    = newTemp(Ity_I32);
   2499    IRTemp nzcv     = newTemp(Ity_I32);
   2500 
   2501    /* This is where the fun starts.  We have to convert 'irRes' from
   2502       an IR-convention return result (IRCmpF64Result) to an
   2503       ARM-encoded (N,Z,C,V) group.  The final result is in the bottom
   2504       4 bits of 'nzcv'. */
   2505    /* Map compare result from IR to ARM(nzcv) */
   2506    /*
   2507       FP cmp result | IR   | ARM(nzcv)
   2508       --------------------------------
   2509       UN              0x45   0011
   2510       LT              0x01   1000
   2511       GT              0x00   0010
   2512       EQ              0x40   0110
   2513    */
   2514    /* Now since you're probably wondering WTF ..
   2515 
   2516       ix fishes the useful bits out of the IR value, bits 6 and 0, and
   2517       places them side by side, giving a number which is 0, 1, 2 or 3.
   2518 
   2519       termL is a sequence cooked up by GNU superopt.  It converts ix
   2520          into an almost correct value NZCV value (incredibly), except
   2521          for the case of UN, where it produces 0100 instead of the
   2522          required 0011.
   2523 
   2524       termR is therefore a correction term, also computed from ix.  It
   2525          is 1 in the UN case and 0 for LT, GT and UN.  Hence, to get
   2526          the final correct value, we subtract termR from termL.
   2527 
   2528       Don't take my word for it.  There's a test program at the bottom
   2529       of this file, to try this out with.
   2530    */
   2531    assign(
   2532       ix,
   2533       binop(Iop_Or32,
   2534             binop(Iop_And32,
   2535                   binop(Iop_Shr32, mkexpr(irRes), mkU8(5)),
   2536                   mkU32(3)),
   2537             binop(Iop_And32, mkexpr(irRes), mkU32(1))));
   2538 
   2539    assign(
   2540       termL,
   2541       binop(Iop_Add32,
   2542             binop(Iop_Shr32,
   2543                   binop(Iop_Sub32,
   2544                         binop(Iop_Shl32,
   2545                               binop(Iop_Xor32, mkexpr(ix), mkU32(1)),
   2546                               mkU8(30)),
   2547                         mkU32(1)),
   2548                   mkU8(29)),
   2549             mkU32(1)));
   2550 
   2551    assign(
   2552       termR,
   2553       binop(Iop_And32,
   2554             binop(Iop_And32,
   2555                   mkexpr(ix),
   2556                   binop(Iop_Shr32, mkexpr(ix), mkU8(1))),
   2557             mkU32(1)));
   2558 
   2559    assign(nzcv, binop(Iop_Sub32, mkexpr(termL), mkexpr(termR)));
   2560    return nzcv;
   2561 }
   2562 
   2563 
   2564 /* Thumb32 only.  This is "ThumbExpandImm" in the ARM ARM.  If
   2565    updatesC is non-NULL, a boolean is written to it indicating whether
   2566    or not the C flag is updated, as per ARM ARM "ThumbExpandImm_C".
   2567 */
   2568 static UInt thumbExpandImm ( Bool* updatesC,
   2569                              UInt imm1, UInt imm3, UInt imm8 )
   2570 {
   2571    vassert(imm1 < (1<<1));
   2572    vassert(imm3 < (1<<3));
   2573    vassert(imm8 < (1<<8));
   2574    UInt i_imm3_a = (imm1 << 4) | (imm3 << 1) | ((imm8 >> 7) & 1);
   2575    UInt abcdefgh = imm8;
   2576    UInt lbcdefgh = imm8 | 0x80;
   2577    if (updatesC) {
   2578       *updatesC = i_imm3_a >= 8;
   2579    }
   2580    switch (i_imm3_a) {
   2581       case 0: case 1:
   2582          return abcdefgh;
   2583       case 2: case 3:
   2584          return (abcdefgh << 16) | abcdefgh;
   2585       case 4: case 5:
   2586          return (abcdefgh << 24) | (abcdefgh << 8);
   2587       case 6: case 7:
   2588          return (abcdefgh << 24) | (abcdefgh << 16)
   2589                 | (abcdefgh << 8) | abcdefgh;
   2590       case 8 ... 31:
   2591          return lbcdefgh << (32 - i_imm3_a);
   2592       default:
   2593          break;
   2594    }
   2595    /*NOTREACHED*/vassert(0);
   2596 }
   2597 
   2598 
   2599 /* Version of thumbExpandImm where we simply feed it the
   2600    instruction halfwords (the lowest addressed one is I0). */
   2601 static UInt thumbExpandImm_from_I0_I1 ( Bool* updatesC,
   2602                                         UShort i0s, UShort i1s )
   2603 {
   2604    UInt i0    = (UInt)i0s;
   2605    UInt i1    = (UInt)i1s;
   2606    UInt imm1  = SLICE_UInt(i0,10,10);
   2607    UInt imm3  = SLICE_UInt(i1,14,12);
   2608    UInt imm8  = SLICE_UInt(i1,7,0);
   2609    return thumbExpandImm(updatesC, imm1, imm3, imm8);
   2610 }
   2611 
   2612 
   2613 /* Thumb16 only.  Given the firstcond and mask fields from an IT
   2614    instruction, compute the 32-bit ITSTATE value implied, as described
   2615    in libvex_guest_arm.h.  This is not the ARM ARM representation.
   2616    Also produce the t/e chars for the 2nd, 3rd, 4th insns, for
   2617    disassembly printing.  Returns False if firstcond or mask
   2618    denote something invalid.
   2619 
   2620    The number and conditions for the instructions to be
   2621    conditionalised depend on firstcond and mask:
   2622 
   2623    mask      cond 1    cond 2      cond 3      cond 4
   2624 
   2625    1000      fc[3:0]
   2626    x100      fc[3:0]   fc[3:1]:x
   2627    xy10      fc[3:0]   fc[3:1]:x   fc[3:1]:y
   2628    xyz1      fc[3:0]   fc[3:1]:x   fc[3:1]:y   fc[3:1]:z
   2629 
   2630    The condition fields are assembled in *itstate backwards (cond 4 at
   2631    the top, cond 1 at the bottom).  Conditions are << 4'd and then
   2632    ^0xE'd, and those fields that correspond to instructions in the IT
   2633    block are tagged with a 1 bit.
   2634 */
   2635 static Bool compute_ITSTATE ( /*OUT*/UInt*  itstate,
   2636                               /*OUT*/HChar* ch1,
   2637                               /*OUT*/HChar* ch2,
   2638                               /*OUT*/HChar* ch3,
   2639                               UInt firstcond, UInt mask )
   2640 {
   2641    vassert(firstcond <= 0xF);
   2642    vassert(mask <= 0xF);
   2643    *itstate = 0;
   2644    *ch1 = *ch2 = *ch3 = '.';
   2645    if (mask == 0)
   2646       return False; /* the logic below actually ensures this anyway,
   2647                        but clearer to make it explicit. */
   2648    if (firstcond == 0xF)
   2649       return False; /* NV is not allowed */
   2650    if (firstcond == 0xE && popcount32(mask) != 1)
   2651       return False; /* if firstcond is AL then all the rest must be too */
   2652 
   2653    UInt m3 = (mask >> 3) & 1;
   2654    UInt m2 = (mask >> 2) & 1;
   2655    UInt m1 = (mask >> 1) & 1;
   2656    UInt m0 = (mask >> 0) & 1;
   2657 
   2658    UInt fc = (firstcond << 4) | 1/*in-IT-block*/;
   2659    UInt ni = (0xE/*AL*/ << 4) | 0/*not-in-IT-block*/;
   2660 
   2661    if (m3 == 1 && (m2|m1|m0) == 0) {
   2662       *itstate = (ni << 24) | (ni << 16) | (ni << 8) | fc;
   2663       *itstate ^= 0xE0E0E0E0;
   2664       return True;
   2665    }
   2666 
   2667    if (m2 == 1 && (m1|m0) == 0) {
   2668       *itstate = (ni << 24) | (ni << 16) | (setbit32(fc, 4, m3) << 8) | fc;
   2669       *itstate ^= 0xE0E0E0E0;
   2670       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
   2671       return True;
   2672    }
   2673 
   2674    if (m1 == 1 && m0 == 0) {
   2675       *itstate = (ni << 24)
   2676                  | (setbit32(fc, 4, m2) << 16)
   2677                  | (setbit32(fc, 4, m3) << 8) | fc;
   2678       *itstate ^= 0xE0E0E0E0;
   2679       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
   2680       *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
   2681       return True;
   2682    }
   2683 
   2684    if (m0 == 1) {
   2685       *itstate = (setbit32(fc, 4, m1) << 24)
   2686                  | (setbit32(fc, 4, m2) << 16)
   2687                  | (setbit32(fc, 4, m3) << 8) | fc;
   2688       *itstate ^= 0xE0E0E0E0;
   2689       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
   2690       *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
   2691       *ch3 = m1 == (firstcond & 1) ? 't' : 'e';
   2692       return True;
   2693    }
   2694 
   2695    return False;
   2696 }
   2697 
   2698 
   2699 /* Generate IR to do 32-bit bit reversal, a la Hacker's Delight
   2700    Chapter 7 Section 1. */
   2701 static IRTemp gen_BITREV ( IRTemp x0 )
   2702 {
   2703    IRTemp x1 = newTemp(Ity_I32);
   2704    IRTemp x2 = newTemp(Ity_I32);
   2705    IRTemp x3 = newTemp(Ity_I32);
   2706    IRTemp x4 = newTemp(Ity_I32);
   2707    IRTemp x5 = newTemp(Ity_I32);
   2708    UInt   c1 = 0x55555555;
   2709    UInt   c2 = 0x33333333;
   2710    UInt   c3 = 0x0F0F0F0F;
   2711    UInt   c4 = 0x00FF00FF;
   2712    UInt   c5 = 0x0000FFFF;
   2713    assign(x1,
   2714           binop(Iop_Or32,
   2715                 binop(Iop_Shl32,
   2716                       binop(Iop_And32, mkexpr(x0), mkU32(c1)),
   2717                       mkU8(1)),
   2718                 binop(Iop_Shr32,
   2719                       binop(Iop_And32, mkexpr(x0), mkU32(~c1)),
   2720                       mkU8(1))
   2721    ));
   2722    assign(x2,
   2723           binop(Iop_Or32,
   2724                 binop(Iop_Shl32,
   2725                       binop(Iop_And32, mkexpr(x1), mkU32(c2)),
   2726                       mkU8(2)),
   2727                 binop(Iop_Shr32,
   2728                       binop(Iop_And32, mkexpr(x1), mkU32(~c2)),
   2729                       mkU8(2))
   2730    ));
   2731    assign(x3,
   2732           binop(Iop_Or32,
   2733                 binop(Iop_Shl32,
   2734                       binop(Iop_And32, mkexpr(x2), mkU32(c3)),
   2735                       mkU8(4)),
   2736                 binop(Iop_Shr32,
   2737                       binop(Iop_And32, mkexpr(x2), mkU32(~c3)),
   2738                       mkU8(4))
   2739    ));
   2740    assign(x4,
   2741           binop(Iop_Or32,
   2742                 binop(Iop_Shl32,
   2743                       binop(Iop_And32, mkexpr(x3), mkU32(c4)),
   2744                       mkU8(8)),
   2745                 binop(Iop_Shr32,
   2746                       binop(Iop_And32, mkexpr(x3), mkU32(~c4)),
   2747                       mkU8(8))
   2748    ));
   2749    assign(x5,
   2750           binop(Iop_Or32,
   2751                 binop(Iop_Shl32,
   2752                       binop(Iop_And32, mkexpr(x4), mkU32(c5)),
   2753                       mkU8(16)),
   2754                 binop(Iop_Shr32,
   2755                       binop(Iop_And32, mkexpr(x4), mkU32(~c5)),
   2756                       mkU8(16))
   2757    ));
   2758    return x5;
   2759 }
   2760 
   2761 
   2762 /* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
   2763    0:1:2:3 (aka byte-swap). */
   2764 static IRTemp gen_REV ( IRTemp arg )
   2765 {
   2766    IRTemp res = newTemp(Ity_I32);
   2767    assign(res,
   2768           binop(Iop_Or32,
   2769                 binop(Iop_Shl32, mkexpr(arg), mkU8(24)),
   2770           binop(Iop_Or32,
   2771                 binop(Iop_And32, binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
   2772                                  mkU32(0x00FF0000)),
   2773           binop(Iop_Or32,
   2774                 binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
   2775                                        mkU32(0x0000FF00)),
   2776                 binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(24)),
   2777                                        mkU32(0x000000FF) )
   2778    ))));
   2779    return res;
   2780 }
   2781 
   2782 
   2783 /* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
   2784    2:3:0:1 (swap within lo and hi halves). */
   2785 static IRTemp gen_REV16 ( IRTemp arg )
   2786 {
   2787    IRTemp res = newTemp(Ity_I32);
   2788    assign(res,
   2789           binop(Iop_Or32,
   2790                 binop(Iop_And32,
   2791                       binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
   2792                       mkU32(0xFF00FF00)),
   2793                 binop(Iop_And32,
   2794                       binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
   2795                       mkU32(0x00FF00FF))));
   2796    return res;
   2797 }
   2798 
   2799 
   2800 /*------------------------------------------------------------*/
   2801 /*--- Advanced SIMD (NEON) instructions                    ---*/
   2802 /*------------------------------------------------------------*/
   2803 
   2804 /*------------------------------------------------------------*/
   2805 /*--- NEON data processing                                 ---*/
   2806 /*------------------------------------------------------------*/
   2807 
   2808 /* For all NEON DP ops, we use the normal scheme to handle conditional
   2809    writes to registers -- pass in condT and hand that on to the
   2810    put*Reg functions.  In ARM mode condT is always IRTemp_INVALID
   2811    since NEON is unconditional for ARM.  In Thumb mode condT is
   2812    derived from the ITSTATE shift register in the normal way. */
   2813 
   2814 static
   2815 UInt get_neon_d_regno(UInt theInstr)
   2816 {
   2817    UInt x = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
   2818    if (theInstr & 0x40) {
   2819       if (x & 1) {
   2820          x = x + 0x100;
   2821       } else {
   2822          x = x >> 1;
   2823       }
   2824    }
   2825    return x;
   2826 }
   2827 
   2828 static
   2829 UInt get_neon_n_regno(UInt theInstr)
   2830 {
   2831    UInt x = ((theInstr >> 3) & 0x10) | ((theInstr >> 16) & 0xF);
   2832    if (theInstr & 0x40) {
   2833       if (x & 1) {
   2834          x = x + 0x100;
   2835       } else {
   2836          x = x >> 1;
   2837       }
   2838    }
   2839    return x;
   2840 }
   2841 
   2842 static
   2843 UInt get_neon_m_regno(UInt theInstr)
   2844 {
   2845    UInt x = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
   2846    if (theInstr & 0x40) {
   2847       if (x & 1) {
   2848          x = x + 0x100;
   2849       } else {
   2850          x = x >> 1;
   2851       }
   2852    }
   2853    return x;
   2854 }
   2855 
   2856 static
   2857 Bool dis_neon_vext ( UInt theInstr, IRTemp condT )
   2858 {
   2859    UInt dreg = get_neon_d_regno(theInstr);
   2860    UInt mreg = get_neon_m_regno(theInstr);
   2861    UInt nreg = get_neon_n_regno(theInstr);
   2862    UInt imm4 = (theInstr >> 8) & 0xf;
   2863    UInt Q = (theInstr >> 6) & 1;
   2864    HChar reg_t = Q ? 'q' : 'd';
   2865 
   2866    if (Q) {
   2867       putQReg(dreg, triop(Iop_SliceV128, /*hiV128*/getQReg(mreg),
   2868                           /*loV128*/getQReg(nreg), mkU8(imm4)), condT);
   2869    } else {
   2870       putDRegI64(dreg, triop(Iop_Slice64, /*hiI64*/getDRegI64(mreg),
   2871                              /*loI64*/getDRegI64(nreg), mkU8(imm4)), condT);
   2872    }
   2873    DIP("vext.8 %c%u, %c%u, %c%u, #%u\n", reg_t, dreg, reg_t, nreg,
   2874                                          reg_t, mreg, imm4);
   2875    return True;
   2876 }
   2877 
   2878 /* Generate specific vector FP binary ops, possibly with a fake
   2879    rounding mode as required by the primop. */
   2880 static
   2881 IRExpr* binop_w_fake_RM ( IROp op, IRExpr* argL, IRExpr* argR )
   2882 {
   2883    switch (op) {
   2884       case Iop_Add32Fx4:
   2885       case Iop_Sub32Fx4:
   2886       case Iop_Mul32Fx4:
   2887          return triop(op, get_FAKE_roundingmode(), argL, argR );
   2888       case Iop_Add32x4: case Iop_Add16x8:
   2889       case Iop_Sub32x4: case Iop_Sub16x8:
   2890       case Iop_Mul32x4: case Iop_Mul16x8:
   2891       case Iop_Mul32x2: case Iop_Mul16x4:
   2892       case Iop_Add32Fx2:
   2893       case Iop_Sub32Fx2:
   2894       case Iop_Mul32Fx2:
   2895       case Iop_PwAdd32Fx2:
   2896          return binop(op, argL, argR);
   2897       default:
   2898         ppIROp(op);
   2899         vassert(0);
   2900    }
   2901 }
   2902 
   2903 /* VTBL, VTBX */
   2904 static
   2905 Bool dis_neon_vtb ( UInt theInstr, IRTemp condT )
   2906 {
   2907    UInt op = (theInstr >> 6) & 1;
   2908    UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
   2909    UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
   2910    UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
   2911    UInt len = (theInstr >> 8) & 3;
   2912    Int i;
   2913    IROp cmp;
   2914    ULong imm;
   2915    IRTemp arg_l;
   2916    IRTemp old_mask, new_mask, cur_mask;
   2917    IRTemp old_res, new_res;
   2918    IRTemp old_arg, new_arg;
   2919 
   2920    if (dreg >= 0x100 || mreg >= 0x100 || nreg >= 0x100)
   2921       return False;
   2922    if (nreg + len > 31)
   2923       return False;
   2924 
   2925    cmp = Iop_CmpGT8Ux8;
   2926 
   2927    old_mask = newTemp(Ity_I64);
   2928    old_res = newTemp(Ity_I64);
   2929    old_arg = newTemp(Ity_I64);
   2930    assign(old_mask, mkU64(0));
   2931    assign(old_res, mkU64(0));
   2932    assign(old_arg, getDRegI64(mreg));
   2933    imm = 8;
   2934    imm = (imm <<  8) | imm;
   2935    imm = (imm << 16) | imm;
   2936    imm = (imm << 32) | imm;
   2937 
   2938    for (i = 0; i <= len; i++) {
   2939       arg_l = newTemp(Ity_I64);
   2940       new_mask = newTemp(Ity_I64);
   2941       cur_mask = newTemp(Ity_I64);
   2942       new_res = newTemp(Ity_I64);
   2943       new_arg = newTemp(Ity_I64);
   2944       assign(arg_l, getDRegI64(nreg+i));
   2945       assign(new_arg, binop(Iop_Sub8x8, mkexpr(old_arg), mkU64(imm)));
   2946       assign(cur_mask, binop(cmp, mkU64(imm), mkexpr(old_arg)));
   2947       assign(new_mask, binop(Iop_Or64, mkexpr(old_mask), mkexpr(cur_mask)));
   2948       assign(new_res, binop(Iop_Or64,
   2949                             mkexpr(old_res),
   2950                             binop(Iop_And64,
   2951                                   binop(Iop_Perm8x8,
   2952                                         mkexpr(arg_l),
   2953                                         binop(Iop_And64,
   2954                                               mkexpr(old_arg),
   2955                                               mkexpr(cur_mask))),
   2956                                   mkexpr(cur_mask))));
   2957 
   2958       old_arg = new_arg;
   2959       old_mask = new_mask;
   2960       old_res = new_res;
   2961    }
   2962    if (op) {
   2963       new_res = newTemp(Ity_I64);
   2964       assign(new_res, binop(Iop_Or64,
   2965                             binop(Iop_And64,
   2966                                   getDRegI64(dreg),
   2967                                   unop(Iop_Not64, mkexpr(old_mask))),
   2968                             mkexpr(old_res)));
   2969       old_res = new_res;
   2970    }
   2971 
   2972    putDRegI64(dreg, mkexpr(old_res), condT);
   2973    DIP("vtb%c.8 d%u, {", op ? 'x' : 'l', dreg);
   2974    if (len > 0) {
   2975       DIP("d%u-d%u", nreg, nreg + len);
   2976    } else {
   2977       DIP("d%u", nreg);
   2978    }
   2979    DIP("}, d%u\n", mreg);
   2980    return True;
   2981 }
   2982 
   2983 /* VDUP (scalar)  */
   2984 static
   2985 Bool dis_neon_vdup ( UInt theInstr, IRTemp condT )
   2986 {
   2987    UInt Q = (theInstr >> 6) & 1;
   2988    UInt dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
   2989    UInt mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
   2990    UInt imm4 = (theInstr >> 16) & 0xF;
   2991    UInt index;
   2992    UInt size;
   2993    IRTemp arg_m;
   2994    IRTemp res;
   2995    IROp op, op2;
   2996 
   2997    if ((imm4 == 0) || (imm4 == 8))
   2998       return False;
   2999    if ((Q == 1) && ((dreg & 1) == 1))
   3000       return False;
   3001    if (Q)
   3002       dreg >>= 1;
   3003    arg_m = newTemp(Ity_I64);
   3004    assign(arg_m, getDRegI64(mreg));
   3005    if (Q)
   3006       res = newTemp(Ity_V128);
   3007    else
   3008       res = newTemp(Ity_I64);
   3009    if ((imm4 & 1) == 1) {
   3010       op = Q ? Iop_Dup8x16 : Iop_Dup8x8;
   3011       op2 = Iop_GetElem8x8;
   3012       index = imm4 >> 1;
   3013       size = 8;
   3014    } else if ((imm4 & 3) == 2) {
   3015       op = Q ? Iop_Dup16x8 : Iop_Dup16x4;
   3016       op2 = Iop_GetElem16x4;
   3017       index = imm4 >> 2;
   3018       size = 16;
   3019    } else if ((imm4 & 7) == 4) {
   3020       op = Q ? Iop_Dup32x4 : Iop_Dup32x2;
   3021       op2 = Iop_GetElem32x2;
   3022       index = imm4 >> 3;
   3023       size = 32;
   3024    } else {
   3025       return False; // can this ever happen?
   3026    }
   3027    assign(res, unop(op, binop(op2, mkexpr(arg_m), mkU8(index))));
   3028    if (Q) {
   3029       putQReg(dreg, mkexpr(res), condT);
   3030    } else {
   3031       putDRegI64(dreg, mkexpr(res), condT);
   3032    }
   3033    DIP("vdup.%u %c%u, d%u[%u]\n", size, Q ? 'q' : 'd', dreg, mreg, index);
   3034    return True;
   3035 }
   3036 
   3037 /* A7.4.1 Three registers of the same length */
   3038 static
   3039 Bool dis_neon_data_3same ( UInt theInstr, IRTemp condT )
   3040 {
   3041    UInt Q = (theInstr >> 6) & 1;
   3042    UInt dreg = get_neon_d_regno(theInstr);
   3043    UInt nreg = get_neon_n_regno(theInstr);
   3044    UInt mreg = get_neon_m_regno(theInstr);
   3045    UInt A = (theInstr >> 8) & 0xF;
   3046    UInt B = (theInstr >> 4) & 1;
   3047    UInt C = (theInstr >> 20) & 0x3;
   3048    UInt U = (theInstr >> 24) & 1;
   3049    UInt size = C;
   3050 
   3051    IRTemp arg_n;
   3052    IRTemp arg_m;
   3053    IRTemp res;
   3054 
   3055    if (Q) {
   3056       arg_n = newTemp(Ity_V128);
   3057       arg_m = newTemp(Ity_V128);
   3058       res = newTemp(Ity_V128);
   3059       assign(arg_n, getQReg(nreg));
   3060       assign(arg_m, getQReg(mreg));
   3061    } else {
   3062       arg_n = newTemp(Ity_I64);
   3063       arg_m = newTemp(Ity_I64);
   3064       res = newTemp(Ity_I64);
   3065       assign(arg_n, getDRegI64(nreg));
   3066       assign(arg_m, getDRegI64(mreg));
   3067    }
   3068 
   3069    switch(A) {
   3070       case 0:
   3071          if (B == 0) {
   3072             /* VHADD */
   3073             ULong imm = 0;
   3074             IRExpr *imm_val;
   3075             IROp addOp;
   3076             IROp andOp;
   3077             IROp shOp;
   3078             HChar regType = Q ? 'q' : 'd';
   3079 
   3080             if (size == 3)
   3081                return False;
   3082             switch(size) {
   3083                case 0: imm = 0x101010101010101LL; break;
   3084                case 1: imm = 0x1000100010001LL; break;
   3085                case 2: imm = 0x100000001LL; break;
   3086                default: vassert(0);
   3087             }
   3088             if (Q) {
   3089                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
   3090                andOp = Iop_AndV128;
   3091             } else {
   3092                imm_val = mkU64(imm);
   3093                andOp = Iop_And64;
   3094             }
   3095             if (U) {
   3096                switch(size) {
   3097                   case 0:
   3098                      addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
   3099                      shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3100                      break;
   3101                   case 1:
   3102                      addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
   3103                      shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3104                      break;
   3105                   case 2:
   3106                      addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
   3107                      shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3108                      break;
   3109                   default:
   3110                      vassert(0);
   3111                }
   3112             } else {
   3113                switch(size) {
   3114                   case 0:
   3115                      addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
   3116                      shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
   3117                      break;
   3118                   case 1:
   3119                      addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
   3120                      shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
   3121                      break;
   3122                   case 2:
   3123                      addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
   3124                      shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
   3125                      break;
   3126                   default:
   3127                      vassert(0);
   3128                }
   3129             }
   3130             assign(res,
   3131                    binop(addOp,
   3132                          binop(addOp,
   3133                                binop(shOp, mkexpr(arg_m), mkU8(1)),
   3134                                binop(shOp, mkexpr(arg_n), mkU8(1))),
   3135                          binop(shOp,
   3136                                binop(addOp,
   3137                                      binop(andOp, mkexpr(arg_m), imm_val),
   3138                                      binop(andOp, mkexpr(arg_n), imm_val)),
   3139                                mkU8(1))));
   3140             DIP("vhadd.%c%d %c%u, %c%u, %c%u\n",
   3141                 U ? 'u' : 's', 8 << size, regType,
   3142                 dreg, regType, nreg, regType, mreg);
   3143          } else {
   3144             /* VQADD */
   3145             IROp op, op2;
   3146             IRTemp tmp;
   3147             HChar reg_t = Q ? 'q' : 'd';
   3148             if (Q) {
   3149                switch (size) {
   3150                   case 0:
   3151                      op = U ? Iop_QAdd8Ux16 : Iop_QAdd8Sx16;
   3152                      op2 = Iop_Add8x16;
   3153                      break;
   3154                   case 1:
   3155                      op = U ? Iop_QAdd16Ux8 : Iop_QAdd16Sx8;
   3156                      op2 = Iop_Add16x8;
   3157                      break;
   3158                   case 2:
   3159                      op = U ? Iop_QAdd32Ux4 : Iop_QAdd32Sx4;
   3160                      op2 = Iop_Add32x4;
   3161                      break;
   3162                   case 3:
   3163                      op = U ? Iop_QAdd64Ux2 : Iop_QAdd64Sx2;
   3164                      op2 = Iop_Add64x2;
   3165                      break;
   3166                   default:
   3167                      vassert(0);
   3168                }
   3169             } else {
   3170                switch (size) {
   3171                   case 0:
   3172                      op = U ? Iop_QAdd8Ux8 : Iop_QAdd8Sx8;
   3173                      op2 = Iop_Add8x8;
   3174                      break;
   3175                   case 1:
   3176                      op = U ? Iop_QAdd16Ux4 : Iop_QAdd16Sx4;
   3177                      op2 = Iop_Add16x4;
   3178                      break;
   3179                   case 2:
   3180                      op = U ? Iop_QAdd32Ux2 : Iop_QAdd32Sx2;
   3181                      op2 = Iop_Add32x2;
   3182                      break;
   3183                   case 3:
   3184                      op = U ? Iop_QAdd64Ux1 : Iop_QAdd64Sx1;
   3185                      op2 = Iop_Add64;
   3186                      break;
   3187                   default:
   3188                      vassert(0);
   3189                }
   3190             }
   3191             if (Q) {
   3192                tmp = newTemp(Ity_V128);
   3193             } else {
   3194                tmp = newTemp(Ity_I64);
   3195             }
   3196             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   3197             assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
   3198             setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
   3199             DIP("vqadd.%c%d %c%u %c%u, %c%u\n",
   3200                 U ? 'u' : 's',
   3201                 8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3202          }
   3203          break;
   3204       case 1:
   3205          if (B == 0) {
   3206             /* VRHADD */
   3207             /* VRHADD C, A, B ::=
   3208                  C = (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1) */
   3209             IROp shift_op, add_op;
   3210             IRTemp cc;
   3211             ULong one = 1;
   3212             HChar reg_t = Q ? 'q' : 'd';
   3213             switch (size) {
   3214                case 0: one = (one <<  8) | one; /* fall through */
   3215                case 1: one = (one << 16) | one; /* fall through */
   3216                case 2: one = (one << 32) | one; break;
   3217                case 3: return False;
   3218                default: vassert(0);
   3219             }
   3220             if (Q) {
   3221                switch (size) {
   3222                   case 0:
   3223                      shift_op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
   3224                      add_op = Iop_Add8x16;
   3225                      break;
   3226                   case 1:
   3227                      shift_op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
   3228                      add_op = Iop_Add16x8;
   3229                      break;
   3230                   case 2:
   3231                      shift_op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
   3232                      add_op = Iop_Add32x4;
   3233                      break;
   3234                   case 3:
   3235                      return False;
   3236                   default:
   3237                      vassert(0);
   3238                }
   3239             } else {
   3240                switch (size) {
   3241                   case 0:
   3242                      shift_op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
   3243                      add_op = Iop_Add8x8;
   3244                      break;
   3245                   case 1:
   3246                      shift_op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
   3247                      add_op = Iop_Add16x4;
   3248                      break;
   3249                   case 2:
   3250                      shift_op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
   3251                      add_op = Iop_Add32x2;
   3252                      break;
   3253                   case 3:
   3254                      return False;
   3255                   default:
   3256                      vassert(0);
   3257                }
   3258             }
   3259             if (Q) {
   3260                cc = newTemp(Ity_V128);
   3261                assign(cc, binop(shift_op,
   3262                                 binop(add_op,
   3263                                       binop(add_op,
   3264                                             binop(Iop_AndV128,
   3265                                                   mkexpr(arg_n),
   3266                                                   binop(Iop_64HLtoV128,
   3267                                                         mkU64(one),
   3268                                                         mkU64(one))),
   3269                                             binop(Iop_AndV128,
   3270                                                   mkexpr(arg_m),
   3271                                                   binop(Iop_64HLtoV128,
   3272                                                         mkU64(one),
   3273                                                         mkU64(one)))),
   3274                                       binop(Iop_64HLtoV128,
   3275                                             mkU64(one),
   3276                                             mkU64(one))),
   3277                                 mkU8(1)));
   3278                assign(res, binop(add_op,
   3279                                  binop(add_op,
   3280                                        binop(shift_op,
   3281                                              mkexpr(arg_n),
   3282                                              mkU8(1)),
   3283                                        binop(shift_op,
   3284                                              mkexpr(arg_m),
   3285                                              mkU8(1))),
   3286                                  mkexpr(cc)));
   3287             } else {
   3288                cc = newTemp(Ity_I64);
   3289                assign(cc, binop(shift_op,
   3290                                 binop(add_op,
   3291                                       binop(add_op,
   3292                                             binop(Iop_And64,
   3293                                                   mkexpr(arg_n),
   3294                                                   mkU64(one)),
   3295                                             binop(Iop_And64,
   3296                                                   mkexpr(arg_m),
   3297                                                   mkU64(one))),
   3298                                       mkU64(one)),
   3299                                 mkU8(1)));
   3300                assign(res, binop(add_op,
   3301                                  binop(add_op,
   3302                                        binop(shift_op,
   3303                                              mkexpr(arg_n),
   3304                                              mkU8(1)),
   3305                                        binop(shift_op,
   3306                                              mkexpr(arg_m),
   3307                                              mkU8(1))),
   3308                                  mkexpr(cc)));
   3309             }
   3310             DIP("vrhadd.%c%d %c%u, %c%u, %c%u\n",
   3311                 U ? 'u' : 's',
   3312                 8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3313          } else {
   3314             if (U == 0)  {
   3315                switch(C) {
   3316                   case 0: {
   3317                      /* VAND  */
   3318                      HChar reg_t = Q ? 'q' : 'd';
   3319                      if (Q) {
   3320                         assign(res, binop(Iop_AndV128, mkexpr(arg_n),
   3321                                                        mkexpr(arg_m)));
   3322                      } else {
   3323                         assign(res, binop(Iop_And64, mkexpr(arg_n),
   3324                                                      mkexpr(arg_m)));
   3325                      }
   3326                      DIP("vand %c%u, %c%u, %c%u\n",
   3327                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3328                      break;
   3329                   }
   3330                   case 1: {
   3331                      /* VBIC  */
   3332                      HChar reg_t = Q ? 'q' : 'd';
   3333                      if (Q) {
   3334                         assign(res, binop(Iop_AndV128,mkexpr(arg_n),
   3335                                unop(Iop_NotV128, mkexpr(arg_m))));
   3336                      } else {
   3337                         assign(res, binop(Iop_And64, mkexpr(arg_n),
   3338                                unop(Iop_Not64, mkexpr(arg_m))));
   3339                      }
   3340                      DIP("vbic %c%u, %c%u, %c%u\n",
   3341                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3342                      break;
   3343                   }
   3344                   case 2:
   3345                      if ( nreg != mreg) {
   3346                         /* VORR  */
   3347                         HChar reg_t = Q ? 'q' : 'd';
   3348                         if (Q) {
   3349                            assign(res, binop(Iop_OrV128, mkexpr(arg_n),
   3350                                                          mkexpr(arg_m)));
   3351                         } else {
   3352                            assign(res, binop(Iop_Or64, mkexpr(arg_n),
   3353                                                        mkexpr(arg_m)));
   3354                         }
   3355                         DIP("vorr %c%u, %c%u, %c%u\n",
   3356                             reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3357                      } else {
   3358                         /* VMOV  */
   3359                         HChar reg_t = Q ? 'q' : 'd';
   3360                         assign(res, mkexpr(arg_m));
   3361                         DIP("vmov %c%u, %c%u\n", reg_t, dreg, reg_t, mreg);
   3362                      }
   3363                      break;
   3364                   case 3:{
   3365                      /* VORN  */
   3366                      HChar reg_t = Q ? 'q' : 'd';
   3367                      if (Q) {
   3368                         assign(res, binop(Iop_OrV128,mkexpr(arg_n),
   3369                                unop(Iop_NotV128, mkexpr(arg_m))));
   3370                      } else {
   3371                         assign(res, binop(Iop_Or64, mkexpr(arg_n),
   3372                                unop(Iop_Not64, mkexpr(arg_m))));
   3373                      }
   3374                      DIP("vorn %c%u, %c%u, %c%u\n",
   3375                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3376                      break;
   3377                   }
   3378                }
   3379             } else {
   3380                switch(C) {
   3381                   case 0:
   3382                      /* VEOR (XOR)  */
   3383                      if (Q) {
   3384                         assign(res, binop(Iop_XorV128, mkexpr(arg_n),
   3385                                                        mkexpr(arg_m)));
   3386                      } else {
   3387                         assign(res, binop(Iop_Xor64, mkexpr(arg_n),
   3388                                                      mkexpr(arg_m)));
   3389                      }
   3390                      DIP("veor %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
   3391                            Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   3392                      break;
   3393                   case 1:
   3394                      /* VBSL  */
   3395                      if (Q) {
   3396                         IRTemp reg_d = newTemp(Ity_V128);
   3397                         assign(reg_d, getQReg(dreg));
   3398                         assign(res,
   3399                                binop(Iop_OrV128,
   3400                                      binop(Iop_AndV128, mkexpr(arg_n),
   3401                                                         mkexpr(reg_d)),
   3402                                      binop(Iop_AndV128,
   3403                                            mkexpr(arg_m),
   3404                                            unop(Iop_NotV128,
   3405                                                  mkexpr(reg_d)) ) ) );
   3406                      } else {
   3407                         IRTemp reg_d = newTemp(Ity_I64);
   3408                         assign(reg_d, getDRegI64(dreg));
   3409                         assign(res,
   3410                                binop(Iop_Or64,
   3411                                      binop(Iop_And64, mkexpr(arg_n),
   3412                                                       mkexpr(reg_d)),
   3413                                      binop(Iop_And64,
   3414                                            mkexpr(arg_m),
   3415                                            unop(Iop_Not64, mkexpr(reg_d)))));
   3416                      }
   3417                      DIP("vbsl %c%u, %c%u, %c%u\n",
   3418                          Q ? 'q' : 'd', dreg,
   3419                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   3420                      break;
   3421                   case 2:
   3422                      /* VBIT  */
   3423                      if (Q) {
   3424                         IRTemp reg_d = newTemp(Ity_V128);
   3425                         assign(reg_d, getQReg(dreg));
   3426                         assign(res,
   3427                                binop(Iop_OrV128,
   3428                                      binop(Iop_AndV128, mkexpr(arg_n),
   3429                                                         mkexpr(arg_m)),
   3430                                      binop(Iop_AndV128,
   3431                                            mkexpr(reg_d),
   3432                                            unop(Iop_NotV128, mkexpr(arg_m)))));
   3433                      } else {
   3434                         IRTemp reg_d = newTemp(Ity_I64);
   3435                         assign(reg_d, getDRegI64(dreg));
   3436                         assign(res,
   3437                                binop(Iop_Or64,
   3438                                      binop(Iop_And64, mkexpr(arg_n),
   3439                                                       mkexpr(arg_m)),
   3440                                      binop(Iop_And64,
   3441                                            mkexpr(reg_d),
   3442                                            unop(Iop_Not64, mkexpr(arg_m)))));
   3443                      }
   3444                      DIP("vbit %c%u, %c%u, %c%u\n",
   3445                          Q ? 'q' : 'd', dreg,
   3446                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   3447                      break;
   3448                   case 3:
   3449                      /* VBIF  */
   3450                      if (Q) {
   3451                         IRTemp reg_d = newTemp(Ity_V128);
   3452                         assign(reg_d, getQReg(dreg));
   3453                         assign(res,
   3454                                binop(Iop_OrV128,
   3455                                      binop(Iop_AndV128, mkexpr(reg_d),
   3456                                                         mkexpr(arg_m)),
   3457                                      binop(Iop_AndV128,
   3458                                            mkexpr(arg_n),
   3459                                            unop(Iop_NotV128, mkexpr(arg_m)))));
   3460                      } else {
   3461                         IRTemp reg_d = newTemp(Ity_I64);
   3462                         assign(reg_d, getDRegI64(dreg));
   3463                         assign(res,
   3464                                binop(Iop_Or64,
   3465                                      binop(Iop_And64, mkexpr(reg_d),
   3466                                                       mkexpr(arg_m)),
   3467                                      binop(Iop_And64,
   3468                                            mkexpr(arg_n),
   3469                                            unop(Iop_Not64, mkexpr(arg_m)))));
   3470                      }
   3471                      DIP("vbif %c%u, %c%u, %c%u\n",
   3472                          Q ? 'q' : 'd', dreg,
   3473                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   3474                      break;
   3475                }
   3476             }
   3477          }
   3478          break;
   3479       case 2:
   3480          if (B == 0) {
   3481             /* VHSUB */
   3482             /* (A >> 1) - (B >> 1) - (NOT (A) & B & 1)   */
   3483             ULong imm = 0;
   3484             IRExpr *imm_val;
   3485             IROp subOp;
   3486             IROp notOp;
   3487             IROp andOp;
   3488             IROp shOp;
   3489             if (size == 3)
   3490                return False;
   3491             switch(size) {
   3492                case 0: imm = 0x101010101010101LL; break;
   3493                case 1: imm = 0x1000100010001LL; break;
   3494                case 2: imm = 0x100000001LL; break;
   3495                default: vassert(0);
   3496             }
   3497             if (Q) {
   3498                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
   3499                andOp = Iop_AndV128;
   3500                notOp = Iop_NotV128;
   3501             } else {
   3502                imm_val = mkU64(imm);
   3503                andOp = Iop_And64;
   3504                notOp = Iop_Not64;
   3505             }
   3506             if (U) {
   3507                switch(size) {
   3508                   case 0:
   3509                      subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   3510                      shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3511                      break;
   3512                   case 1:
   3513                      subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   3514                      shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3515                      break;
   3516                   case 2:
   3517                      subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   3518                      shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3519                      break;
   3520                   default:
   3521                      vassert(0);
   3522                }
   3523             } else {
   3524                switch(size) {
   3525                   case 0:
   3526                      subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   3527                      shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
   3528                      break;
   3529                   case 1:
   3530                      subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   3531                      shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
   3532                      break;
   3533                   case 2:
   3534                      subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   3535                      shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
   3536                      break;
   3537                   default:
   3538                      vassert(0);
   3539                }
   3540             }
   3541             assign(res,
   3542                    binop(subOp,
   3543                          binop(subOp,
   3544                                binop(shOp, mkexpr(arg_n), mkU8(1)),
   3545                                binop(shOp, mkexpr(arg_m), mkU8(1))),
   3546                          binop(andOp,
   3547                                binop(andOp,
   3548                                      unop(notOp, mkexpr(arg_n)),
   3549                                      mkexpr(arg_m)),
   3550                                imm_val)));
   3551             DIP("vhsub.%c%d %c%u, %c%u, %c%u\n",
   3552                 U ? 'u' : 's', 8 << size,
   3553                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   3554                 mreg);
   3555          } else {
   3556             /* VQSUB */
   3557             IROp op, op2;
   3558             IRTemp tmp;
   3559             if (Q) {
   3560                switch (size) {
   3561                   case 0:
   3562                      op = U ? Iop_QSub8Ux16 : Iop_QSub8Sx16;
   3563                      op2 = Iop_Sub8x16;
   3564                      break;
   3565                   case 1:
   3566                      op = U ? Iop_QSub16Ux8 : Iop_QSub16Sx8;
   3567                      op2 = Iop_Sub16x8;
   3568                      break;
   3569                   case 2:
   3570                      op = U ? Iop_QSub32Ux4 : Iop_QSub32Sx4;
   3571                      op2 = Iop_Sub32x4;
   3572                      break;
   3573                   case 3:
   3574                      op = U ? Iop_QSub64Ux2 : Iop_QSub64Sx2;
   3575                      op2 = Iop_Sub64x2;
   3576                      break;
   3577                   default:
   3578                      vassert(0);
   3579                }
   3580             } else {
   3581                switch (size) {
   3582                   case 0:
   3583                      op = U ? Iop_QSub8Ux8 : Iop_QSub8Sx8;
   3584                      op2 = Iop_Sub8x8;
   3585                      break;
   3586                   case 1:
   3587                      op = U ? Iop_QSub16Ux4 : Iop_QSub16Sx4;
   3588                      op2 = Iop_Sub16x4;
   3589                      break;
   3590                   case 2:
   3591                      op = U ? Iop_QSub32Ux2 : Iop_QSub32Sx2;
   3592                      op2 = Iop_Sub32x2;
   3593                      break;
   3594                   case 3:
   3595                      op = U ? Iop_QSub64Ux1 : Iop_QSub64Sx1;
   3596                      op2 = Iop_Sub64;
   3597                      break;
   3598                   default:
   3599                      vassert(0);
   3600                }
   3601             }
   3602             if (Q)
   3603                tmp = newTemp(Ity_V128);
   3604             else
   3605                tmp = newTemp(Ity_I64);
   3606             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   3607             assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
   3608             setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
   3609             DIP("vqsub.%c%d %c%u, %c%u, %c%u\n",
   3610                 U ? 'u' : 's', 8 << size,
   3611                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   3612                 mreg);
   3613          }
   3614          break;
   3615       case 3: {
   3616             IROp op;
   3617             if (Q) {
   3618                switch (size) {
   3619                   case 0: op = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16; break;
   3620                   case 1: op = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8; break;
   3621                   case 2: op = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4; break;
   3622                   case 3: return False;
   3623                   default: vassert(0);
   3624                }
   3625             } else {
   3626                switch (size) {
   3627                   case 0: op = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8; break;
   3628                   case 1: op = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4; break;
   3629                   case 2: op = U ? Iop_CmpGT32Ux2: Iop_CmpGT32Sx2; break;
   3630                   case 3: return False;
   3631                   default: vassert(0);
   3632                }
   3633             }
   3634             if (B == 0) {
   3635                /* VCGT  */
   3636                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   3637                DIP("vcgt.%c%d %c%u, %c%u, %c%u\n",
   3638                    U ? 'u' : 's', 8 << size,
   3639                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   3640                    mreg);
   3641             } else {
   3642                /* VCGE  */
   3643                /* VCGE res, argn, argm
   3644                     is equal to
   3645                   VCGT tmp, argm, argn
   3646                   VNOT res, tmp */
   3647                assign(res,
   3648                       unop(Q ? Iop_NotV128 : Iop_Not64,
   3649                            binop(op, mkexpr(arg_m), mkexpr(arg_n))));
   3650                DIP("vcge.%c%d %c%u, %c%u, %c%u\n",
   3651                    U ? 'u' : 's', 8 << size,
   3652                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   3653                    mreg);
   3654             }
   3655          }
   3656          break;
   3657       case 4:
   3658          if (B == 0) {
   3659             /* VSHL */
   3660             IROp op = Iop_INVALID, sub_op = Iop_INVALID;
   3661             IRTemp tmp = IRTemp_INVALID;
   3662             if (U) {
   3663                switch (size) {
   3664                   case 0: op = Q ? Iop_Shl8x16 : Iop_Shl8x8; break;
   3665                   case 1: op = Q ? Iop_Shl16x8 : Iop_Shl16x4; break;
   3666                   case 2: op = Q ? Iop_Shl32x4 : Iop_Shl32x2; break;
   3667                   case 3: op = Q ? Iop_Shl64x2 : Iop_Shl64; break;
   3668                   default: vassert(0);
   3669                }
   3670             } else {
   3671                tmp = newTemp(Q ? Ity_V128 : Ity_I64);
   3672                switch (size) {
   3673                   case 0:
   3674                      op = Q ? Iop_Sar8x16 : Iop_Sar8x8;
   3675                      sub_op = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   3676                      break;
   3677                   case 1:
   3678                      op = Q ? Iop_Sar16x8 : Iop_Sar16x4;
   3679                      sub_op = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   3680                      break;
   3681                   case 2:
   3682                      op = Q ? Iop_Sar32x4 : Iop_Sar32x2;
   3683                      sub_op = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   3684                      break;
   3685                   case 3:
   3686                      op = Q ? Iop_Sar64x2 : Iop_Sar64;
   3687                      sub_op = Q ? Iop_Sub64x2 : Iop_Sub64;
   3688                      break;
   3689                   default:
   3690                      vassert(0);
   3691                }
   3692             }
   3693             if (U) {
   3694                if (!Q && (size == 3))
   3695                   assign(res, binop(op, mkexpr(arg_m),
   3696                                         unop(Iop_64to8, mkexpr(arg_n))));
   3697                else
   3698                   assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
   3699             } else {
   3700                if (Q)
   3701                   assign(tmp, binop(sub_op,
   3702                                     binop(Iop_64HLtoV128, mkU64(0), mkU64(0)),
   3703                                     mkexpr(arg_n)));
   3704                else
   3705                   assign(tmp, binop(sub_op, mkU64(0), mkexpr(arg_n)));
   3706                if (!Q && (size == 3))
   3707                   assign(res, binop(op, mkexpr(arg_m),
   3708                                         unop(Iop_64to8, mkexpr(tmp))));
   3709                else
   3710                   assign(res, binop(op, mkexpr(arg_m), mkexpr(tmp)));
   3711             }
   3712             DIP("vshl.%c%d %c%u, %c%u, %c%u\n",
   3713                 U ? 'u' : 's', 8 << size,
   3714                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
   3715                 nreg);
   3716          } else {
   3717             /* VQSHL */
   3718             IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt;
   3719             IRTemp tmp, shval, mask, old_shval;
   3720             UInt i;
   3721             ULong esize;
   3722             cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
   3723             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
   3724             if (U) {
   3725                switch (size) {
   3726                   case 0:
   3727                      op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
   3728                      op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
   3729                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3730                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   3731                      break;
   3732                   case 1:
   3733                      op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
   3734                      op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
   3735                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3736                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   3737                      break;
   3738                   case 2:
   3739                      op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
   3740                      op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
   3741                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3742                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   3743                      break;
   3744                   case 3:
   3745                      op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
   3746                      op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
   3747                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   3748                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   3749                      break;
   3750                   default:
   3751                      vassert(0);
   3752                }
   3753             } else {
   3754                switch (size) {
   3755                   case 0:
   3756                      op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
   3757                      op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
   3758                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3759                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   3760                      break;
   3761                   case 1:
   3762                      op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
   3763                      op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
   3764                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3765                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   3766                      break;
   3767                   case 2:
   3768                      op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
   3769                      op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
   3770                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3771                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   3772                      break;
   3773                   case 3:
   3774                      op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
   3775                      op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
   3776                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   3777                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   3778                      break;
   3779                   default:
   3780                      vassert(0);
   3781                }
   3782             }
   3783             if (Q) {
   3784                tmp = newTemp(Ity_V128);
   3785                shval = newTemp(Ity_V128);
   3786                mask = newTemp(Ity_V128);
   3787             } else {
   3788                tmp = newTemp(Ity_I64);
   3789                shval = newTemp(Ity_I64);
   3790                mask = newTemp(Ity_I64);
   3791             }
   3792             assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
   3793             /* Only least significant byte from second argument is used.
   3794                Copy this byte to the whole vector element. */
   3795             assign(shval, binop(op_shrn,
   3796                                 binop(op_shln,
   3797                                        mkexpr(arg_n),
   3798                                        mkU8((8 << size) - 8)),
   3799                                 mkU8((8 << size) - 8)));
   3800             for(i = 0; i < size; i++) {
   3801                old_shval = shval;
   3802                shval = newTemp(Q ? Ity_V128 : Ity_I64);
   3803                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
   3804                                    mkexpr(old_shval),
   3805                                    binop(op_shln,
   3806                                          mkexpr(old_shval),
   3807                                          mkU8(8 << i))));
   3808             }
   3809             /* If shift is greater or equal to the element size and
   3810                element is non-zero, then QC flag should be set. */
   3811             esize = (8 << size) - 1;
   3812             esize = (esize <<  8) | esize;
   3813             esize = (esize << 16) | esize;
   3814             esize = (esize << 32) | esize;
   3815             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   3816                              binop(cmp_gt, mkexpr(shval),
   3817                                            Q ? mkU128(esize) : mkU64(esize)),
   3818                              unop(cmp_neq, mkexpr(arg_m))),
   3819                        Q ? mkU128(0) : mkU64(0),
   3820                        Q, condT);
   3821             /* Othervise QC flag should be set if shift value is positive and
   3822                result beign rightshifted the same value is not equal to left
   3823                argument. */
   3824             assign(mask, binop(cmp_gt, mkexpr(shval),
   3825                                        Q ? mkU128(0) : mkU64(0)));
   3826             if (!Q && size == 3)
   3827                assign(tmp, binop(op_rev, mkexpr(res),
   3828                                          unop(Iop_64to8, mkexpr(arg_n))));
   3829             else
   3830                assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
   3831             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   3832                              mkexpr(tmp), mkexpr(mask)),
   3833                        binop(Q ? Iop_AndV128 : Iop_And64,
   3834                              mkexpr(arg_m), mkexpr(mask)),
   3835                        Q, condT);
   3836             DIP("vqshl.%c%d %c%u, %c%u, %c%u\n",
   3837                 U ? 'u' : 's', 8 << size,
   3838                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
   3839                 nreg);
   3840          }
   3841          break;
   3842       case 5:
   3843          if (B == 0) {
   3844             /* VRSHL */
   3845             IROp op, op_shrn, op_shln, cmp_gt, op_add;
   3846             IRTemp shval, old_shval, imm_val, round;
   3847             UInt i;
   3848             ULong imm;
   3849             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
   3850             imm = 1L;
   3851             switch (size) {
   3852                case 0: imm = (imm <<  8) | imm; /* fall through */
   3853                case 1: imm = (imm << 16) | imm; /* fall through */
   3854                case 2: imm = (imm << 32) | imm; /* fall through */
   3855                case 3: break;
   3856                default: vassert(0);
   3857             }
   3858             imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
   3859             round = newTemp(Q ? Ity_V128 : Ity_I64);
   3860             assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
   3861             if (U) {
   3862                switch (size) {
   3863                   case 0:
   3864                      op = Q ? Iop_Shl8x16 : Iop_Shl8x8;
   3865                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
   3866                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3867                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   3868                      break;
   3869                   case 1:
   3870                      op = Q ? Iop_Shl16x8 : Iop_Shl16x4;
   3871                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
   3872                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3873                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   3874                      break;
   3875                   case 2:
   3876                      op = Q ? Iop_Shl32x4 : Iop_Shl32x2;
   3877                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
   3878                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3879                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   3880                      break;
   3881                   case 3:
   3882                      op = Q ? Iop_Shl64x2 : Iop_Shl64;
   3883                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
   3884                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   3885                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   3886                      break;
   3887                   default:
   3888                      vassert(0);
   3889                }
   3890             } else {
   3891                switch (size) {
   3892                   case 0:
   3893                      op = Q ? Iop_Sal8x16 : Iop_Sal8x8;
   3894                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
   3895                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3896                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   3897                      break;
   3898                   case 1:
   3899                      op = Q ? Iop_Sal16x8 : Iop_Sal16x4;
   3900                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
   3901                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3902                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   3903                      break;
   3904                   case 2:
   3905                      op = Q ? Iop_Sal32x4 : Iop_Sal32x2;
   3906                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
   3907                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3908                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   3909                      break;
   3910                   case 3:
   3911                      op = Q ? Iop_Sal64x2 : Iop_Sal64x1;
   3912                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
   3913                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   3914                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   3915                      break;
   3916                   default:
   3917                      vassert(0);
   3918                }
   3919             }
   3920             if (Q) {
   3921                shval = newTemp(Ity_V128);
   3922             } else {
   3923                shval = newTemp(Ity_I64);
   3924             }
   3925             /* Only least significant byte from second argument is used.
   3926                Copy this byte to the whole vector element. */
   3927             assign(shval, binop(op_shrn,
   3928                                 binop(op_shln,
   3929                                        mkexpr(arg_n),
   3930                                        mkU8((8 << size) - 8)),
   3931                                 mkU8((8 << size) - 8)));
   3932             for (i = 0; i < size; i++) {
   3933                old_shval = shval;
   3934                shval = newTemp(Q ? Ity_V128 : Ity_I64);
   3935                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
   3936                                    mkexpr(old_shval),
   3937                                    binop(op_shln,
   3938                                          mkexpr(old_shval),
   3939                                          mkU8(8 << i))));
   3940             }
   3941             /* Compute the result */
   3942             if (!Q && size == 3 && U) {
   3943                assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
   3944                                    binop(op,
   3945                                          mkexpr(arg_m),
   3946                                          unop(Iop_64to8,
   3947                                               binop(op_add,
   3948                                                     mkexpr(arg_n),
   3949                                                     mkexpr(imm_val)))),
   3950                                    binop(Q ? Iop_AndV128 : Iop_And64,
   3951                                          mkexpr(imm_val),
   3952                                          binop(cmp_gt,
   3953                                                Q ? mkU128(0) : mkU64(0),
   3954                                                mkexpr(arg_n)))));
   3955                assign(res, binop(op_add,
   3956                                  binop(op,
   3957                                        mkexpr(arg_m),
   3958                                        unop(Iop_64to8, mkexpr(arg_n))),
   3959                                  mkexpr(round)));
   3960             } else {
   3961                assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
   3962                                    binop(op,
   3963                                          mkexpr(arg_m),
   3964                                          binop(op_add,
   3965                                                mkexpr(arg_n),
   3966                                                mkexpr(imm_val))),
   3967                                    binop(Q ? Iop_AndV128 : Iop_And64,
   3968                                          mkexpr(imm_val),
   3969                                          binop(cmp_gt,
   3970                                                Q ? mkU128(0) : mkU64(0),
   3971                                                mkexpr(arg_n)))));
   3972                assign(res, binop(op_add,
   3973                                  binop(op, mkexpr(arg_m), mkexpr(arg_n)),
   3974                                  mkexpr(round)));
   3975             }
   3976             DIP("vrshl.%c%d %c%u, %c%u, %c%u\n",
   3977                 U ? 'u' : 's', 8 << size,
   3978                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
   3979                 nreg);
   3980          } else {
   3981             /* VQRSHL */
   3982             IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt, op_add;
   3983             IRTemp tmp, shval, mask, old_shval, imm_val, round;
   3984             UInt i;
   3985             ULong esize, imm;
   3986             cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
   3987             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
   3988             imm = 1L;
   3989             switch (size) {
   3990                case 0: imm = (imm <<  8) | imm; /* fall through */
   3991                case 1: imm = (imm << 16) | imm; /* fall through */
   3992                case 2: imm = (imm << 32) | imm; /* fall through */
   3993                case 3: break;
   3994                default: vassert(0);
   3995             }
   3996             imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
   3997             round = newTemp(Q ? Ity_V128 : Ity_I64);
   3998             assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
   3999             if (U) {
   4000                switch (size) {
   4001                   case 0:
   4002                      op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
   4003                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
   4004                      op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
   4005                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   4006                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   4007                      break;
   4008                   case 1:
   4009                      op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
   4010                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
   4011                      op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
   4012                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   4013                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   4014                      break;
   4015                   case 2:
   4016                      op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
   4017                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
   4018                      op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
   4019                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   4020                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   4021                      break;
   4022                   case 3:
   4023                      op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
   4024                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
   4025                      op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
   4026                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   4027                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   4028                      break;
   4029                   default:
   4030                      vassert(0);
   4031                }
   4032             } else {
   4033                switch (size) {
   4034                   case 0:
   4035                      op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
   4036                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
   4037                      op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
   4038                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   4039                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   4040                      break;
   4041                   case 1:
   4042                      op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
   4043                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
   4044                      op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
   4045                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   4046                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   4047                      break;
   4048                   case 2:
   4049                      op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
   4050                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
   4051                      op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
   4052                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   4053                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   4054                      break;
   4055                   case 3:
   4056                      op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
   4057                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
   4058                      op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
   4059                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   4060                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   4061                      break;
   4062                   default:
   4063                      vassert(0);
   4064                }
   4065             }
   4066             if (Q) {
   4067                tmp = newTemp(Ity_V128);
   4068                shval = newTemp(Ity_V128);
   4069                mask = newTemp(Ity_V128);
   4070             } else {
   4071                tmp = newTemp(Ity_I64);
   4072                shval = newTemp(Ity_I64);
   4073                mask = newTemp(Ity_I64);
   4074             }
   4075             /* Only least significant byte from second argument is used.
   4076                Copy this byte to the whole vector element. */
   4077             assign(shval, binop(op_shrn,
   4078                                 binop(op_shln,
   4079                                        mkexpr(arg_n),
   4080                                        mkU8((8 << size) - 8)),
   4081                                 mkU8((8 << size) - 8)));
   4082             for (i = 0; i < size; i++) {
   4083                old_shval = shval;
   4084                shval = newTemp(Q ? Ity_V128 : Ity_I64);
   4085                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
   4086                                    mkexpr(old_shval),
   4087                                    binop(op_shln,
   4088                                          mkexpr(old_shval),
   4089                                          mkU8(8 << i))));
   4090             }
   4091             /* Compute the result */
   4092             assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
   4093                                 binop(op,
   4094                                       mkexpr(arg_m),
   4095                                       binop(op_add,
   4096                                             mkexpr(arg_n),
   4097                                             mkexpr(imm_val))),
   4098                                 binop(Q ? Iop_AndV128 : Iop_And64,
   4099                                       mkexpr(imm_val),
   4100                                       binop(cmp_gt,
   4101                                             Q ? mkU128(0) : mkU64(0),
   4102                                             mkexpr(arg_n)))));
   4103             assign(res, binop(op_add,
   4104                               binop(op, mkexpr(arg_m), mkexpr(arg_n)),
   4105                               mkexpr(round)));
   4106             /* If shift is greater or equal to the element size and element is
   4107                non-zero, then QC flag should be set. */
   4108             esize = (8 << size) - 1;
   4109             esize = (esize <<  8) | esize;
   4110             esize = (esize << 16) | esize;
   4111             esize = (esize << 32) | esize;
   4112             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   4113                              binop(cmp_gt, mkexpr(shval),
   4114                                            Q ? mkU128(esize) : mkU64(esize)),
   4115                              unop(cmp_neq, mkexpr(arg_m))),
   4116                        Q ? mkU128(0) : mkU64(0),
   4117                        Q, condT);
   4118             /* Othervise QC flag should be set if shift value is positive and
   4119                result beign rightshifted the same value is not equal to left
   4120                argument. */
   4121             assign(mask, binop(cmp_gt, mkexpr(shval),
   4122                                Q ? mkU128(0) : mkU64(0)));
   4123             if (!Q && size == 3)
   4124                assign(tmp, binop(op_rev, mkexpr(res),
   4125                                          unop(Iop_64to8, mkexpr(arg_n))));
   4126             else
   4127                assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
   4128             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   4129                              mkexpr(tmp), mkexpr(mask)),
   4130                        binop(Q ? Iop_AndV128 : Iop_And64,
   4131                              mkexpr(arg_m), mkexpr(mask)),
   4132                        Q, condT);
   4133             DIP("vqrshl.%c%d %c%u, %c%u, %c%u\n",
   4134                 U ? 'u' : 's', 8 << size,
   4135                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
   4136                 nreg);
   4137          }
   4138          break;
   4139       case 6:
   4140          /* VMAX, VMIN  */
   4141          if (B == 0) {
   4142             /* VMAX */
   4143             IROp op;
   4144             if (U == 0) {
   4145                switch (size) {
   4146                   case 0: op = Q ? Iop_Max8Sx16 : Iop_Max8Sx8; break;
   4147                   case 1: op = Q ? Iop_Max16Sx8 : Iop_Max16Sx4; break;
   4148                   case 2: op = Q ? Iop_Max32Sx4 : Iop_Max32Sx2; break;
   4149                   case 3: return False;
   4150                   default: vassert(0);
   4151                }
   4152             } else {
   4153                switch (size) {
   4154                   case 0: op = Q ? Iop_Max8Ux16 : Iop_Max8Ux8; break;
   4155                   case 1: op = Q ? Iop_Max16Ux8 : Iop_Max16Ux4; break;
   4156                   case 2: op = Q ? Iop_Max32Ux4 : Iop_Max32Ux2; break;
   4157                   case 3: return False;
   4158                   default: vassert(0);
   4159                }
   4160             }
   4161             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4162             DIP("vmax.%c%d %c%u, %c%u, %c%u\n",
   4163                 U ? 'u' : 's', 8 << size,
   4164                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4165                 mreg);
   4166          } else {
   4167             /* VMIN */
   4168             IROp op;
   4169             if (U == 0) {
   4170                switch (size) {
   4171                   case 0: op = Q ? Iop_Min8Sx16 : Iop_Min8Sx8; break;
   4172                   case 1: op = Q ? Iop_Min16Sx8 : Iop_Min16Sx4; break;
   4173                   case 2: op = Q ? Iop_Min32Sx4 : Iop_Min32Sx2; break;
   4174                   case 3: return False;
   4175                   default: vassert(0);
   4176                }
   4177             } else {
   4178                switch (size) {
   4179                   case 0: op = Q ? Iop_Min8Ux16 : Iop_Min8Ux8; break;
   4180                   case 1: op = Q ? Iop_Min16Ux8 : Iop_Min16Ux4; break;
   4181                   case 2: op = Q ? Iop_Min32Ux4 : Iop_Min32Ux2; break;
   4182                   case 3: return False;
   4183                   default: vassert(0);
   4184                }
   4185             }
   4186             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4187             DIP("vmin.%c%d %c%u, %c%u, %c%u\n",
   4188                 U ? 'u' : 's', 8 << size,
   4189                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4190                 mreg);
   4191          }
   4192          break;
   4193       case 7:
   4194          if (B == 0) {
   4195             /* VABD */
   4196             IROp op_cmp, op_sub;
   4197             IRTemp cond;
   4198             if ((theInstr >> 23) & 1) {
   4199                vpanic("VABDL should not be in dis_neon_data_3same\n");
   4200             }
   4201             if (Q) {
   4202                switch (size) {
   4203                   case 0:
   4204                      op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
   4205                      op_sub = Iop_Sub8x16;
   4206                      break;
   4207                   case 1:
   4208                      op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
   4209                      op_sub = Iop_Sub16x8;
   4210                      break;
   4211                   case 2:
   4212                      op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
   4213                      op_sub = Iop_Sub32x4;
   4214                      break;
   4215                   case 3:
   4216                      return False;
   4217                   default:
   4218                      vassert(0);
   4219                }
   4220             } else {
   4221                switch (size) {
   4222                   case 0:
   4223                      op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
   4224                      op_sub = Iop_Sub8x8;
   4225                      break;
   4226                   case 1:
   4227                      op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
   4228                      op_sub = Iop_Sub16x4;
   4229                      break;
   4230                   case 2:
   4231                      op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
   4232                      op_sub = Iop_Sub32x2;
   4233                      break;
   4234                   case 3:
   4235                      return False;
   4236                   default:
   4237                      vassert(0);
   4238                }
   4239             }
   4240             if (Q) {
   4241                cond = newTemp(Ity_V128);
   4242             } else {
   4243                cond = newTemp(Ity_I64);
   4244             }
   4245             assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
   4246             assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
   4247                               binop(Q ? Iop_AndV128 : Iop_And64,
   4248                                     binop(op_sub, mkexpr(arg_n),
   4249                                                   mkexpr(arg_m)),
   4250                                     mkexpr(cond)),
   4251                               binop(Q ? Iop_AndV128 : Iop_And64,
   4252                                     binop(op_sub, mkexpr(arg_m),
   4253                                                   mkexpr(arg_n)),
   4254                                     unop(Q ? Iop_NotV128 : Iop_Not64,
   4255                                          mkexpr(cond)))));
   4256             DIP("vabd.%c%d %c%u, %c%u, %c%u\n",
   4257                 U ? 'u' : 's', 8 << size,
   4258                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4259                 mreg);
   4260          } else {
   4261             /* VABA */
   4262             IROp op_cmp, op_sub, op_add;
   4263             IRTemp cond, acc, tmp;
   4264             if ((theInstr >> 23) & 1) {
   4265                vpanic("VABAL should not be in dis_neon_data_3same");
   4266             }
   4267             if (Q) {
   4268                switch (size) {
   4269                   case 0:
   4270                      op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
   4271                      op_sub = Iop_Sub8x16;
   4272                      op_add = Iop_Add8x16;
   4273                      break;
   4274                   case 1:
   4275                      op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
   4276                      op_sub = Iop_Sub16x8;
   4277                      op_add = Iop_Add16x8;
   4278                      break;
   4279                   case 2:
   4280                      op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
   4281                      op_sub = Iop_Sub32x4;
   4282                      op_add = Iop_Add32x4;
   4283                      break;
   4284                   case 3:
   4285                      return False;
   4286                   default:
   4287                      vassert(0);
   4288                }
   4289             } else {
   4290                switch (size) {
   4291                   case 0:
   4292                      op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
   4293                      op_sub = Iop_Sub8x8;
   4294                      op_add = Iop_Add8x8;
   4295                      break;
   4296                   case 1:
   4297                      op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
   4298                      op_sub = Iop_Sub16x4;
   4299                      op_add = Iop_Add16x4;
   4300                      break;
   4301                   case 2:
   4302                      op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
   4303                      op_sub = Iop_Sub32x2;
   4304                      op_add = Iop_Add32x2;
   4305                      break;
   4306                   case 3:
   4307                      return False;
   4308                   default:
   4309                      vassert(0);
   4310                }
   4311             }
   4312             if (Q) {
   4313                cond = newTemp(Ity_V128);
   4314                acc = newTemp(Ity_V128);
   4315                tmp = newTemp(Ity_V128);
   4316                assign(acc, getQReg(dreg));
   4317             } else {
   4318                cond = newTemp(Ity_I64);
   4319                acc = newTemp(Ity_I64);
   4320                tmp = newTemp(Ity_I64);
   4321                assign(acc, getDRegI64(dreg));
   4322             }
   4323             assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
   4324             assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
   4325                               binop(Q ? Iop_AndV128 : Iop_And64,
   4326                                     binop(op_sub, mkexpr(arg_n),
   4327                                                   mkexpr(arg_m)),
   4328                                     mkexpr(cond)),
   4329                               binop(Q ? Iop_AndV128 : Iop_And64,
   4330                                     binop(op_sub, mkexpr(arg_m),
   4331                                                   mkexpr(arg_n)),
   4332                                     unop(Q ? Iop_NotV128 : Iop_Not64,
   4333                                          mkexpr(cond)))));
   4334             assign(res, binop(op_add, mkexpr(acc), mkexpr(tmp)));
   4335             DIP("vaba.%c%d %c%u, %c%u, %c%u\n",
   4336                 U ? 'u' : 's', 8 << size,
   4337                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4338                 mreg);
   4339          }
   4340          break;
   4341       case 8:
   4342          if (B == 0) {
   4343             IROp op;
   4344             if (U == 0) {
   4345                /* VADD  */
   4346                switch (size) {
   4347                   case 0: op = Q ? Iop_Add8x16 : Iop_Add8x8; break;
   4348                   case 1: op = Q ? Iop_Add16x8 : Iop_Add16x4; break;
   4349                   case 2: op = Q ? Iop_Add32x4 : Iop_Add32x2; break;
   4350                   case 3: op = Q ? Iop_Add64x2 : Iop_Add64; break;
   4351                   default: vassert(0);
   4352                }
   4353                DIP("vadd.i%d %c%u, %c%u, %c%u\n",
   4354                    8 << size, Q ? 'q' : 'd',
   4355                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4356             } else {
   4357                /* VSUB  */
   4358                switch (size) {
   4359                   case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
   4360                   case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
   4361                   case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
   4362                   case 3: op = Q ? Iop_Sub64x2 : Iop_Sub64; break;
   4363                   default: vassert(0);
   4364                }
   4365                DIP("vsub.i%d %c%u, %c%u, %c%u\n",
   4366                    8 << size, Q ? 'q' : 'd',
   4367                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4368             }
   4369             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4370          } else {
   4371             IROp op;
   4372             switch (size) {
   4373                case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
   4374                case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
   4375                case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
   4376                case 3: op = Q ? Iop_CmpNEZ64x2 : Iop_CmpwNEZ64; break;
   4377                default: vassert(0);
   4378             }
   4379             if (U == 0) {
   4380                /* VTST  */
   4381                assign(res, unop(op, binop(Q ? Iop_AndV128 : Iop_And64,
   4382                                           mkexpr(arg_n),
   4383                                           mkexpr(arg_m))));
   4384                DIP("vtst.%d %c%u, %c%u, %c%u\n",
   4385                    8 << size, Q ? 'q' : 'd',
   4386                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4387             } else {
   4388                /* VCEQ  */
   4389                assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
   4390                                 unop(op,
   4391                                      binop(Q ? Iop_XorV128 : Iop_Xor64,
   4392                                            mkexpr(arg_n),
   4393                                            mkexpr(arg_m)))));
   4394                DIP("vceq.i%d %c%u, %c%u, %c%u\n",
   4395                    8 << size, Q ? 'q' : 'd',
   4396                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4397             }
   4398          }
   4399          break;
   4400       case 9:
   4401          if (B == 0) {
   4402             /* VMLA, VMLS (integer) */
   4403             IROp op, op2;
   4404             UInt P = (theInstr >> 24) & 1;
   4405             if (P) {
   4406                switch (size) {
   4407                   case 0:
   4408                      op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
   4409                      op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   4410                      break;
   4411                   case 1:
   4412                      op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
   4413                      op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   4414                      break;
   4415                   case 2:
   4416                      op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
   4417                      op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   4418                      break;
   4419                   case 3:
   4420                      return False;
   4421                   default:
   4422                      vassert(0);
   4423                }
   4424             } else {
   4425                switch (size) {
   4426                   case 0:
   4427                      op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
   4428                      op2 = Q ? Iop_Add8x16 : Iop_Add8x8;
   4429                      break;
   4430                   case 1:
   4431                      op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
   4432                      op2 = Q ? Iop_Add16x8 : Iop_Add16x4;
   4433                      break;
   4434                   case 2:
   4435                      op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
   4436                      op2 = Q ? Iop_Add32x4 : Iop_Add32x2;
   4437                      break;
   4438                   case 3:
   4439                      return False;
   4440                   default:
   4441                      vassert(0);
   4442                }
   4443             }
   4444             assign(res, binop(op2,
   4445                               Q ? getQReg(dreg) : getDRegI64(dreg),
   4446                               binop(op, mkexpr(arg_n), mkexpr(arg_m))));
   4447             DIP("vml%c.i%d %c%u, %c%u, %c%u\n",
   4448                 P ? 's' : 'a', 8 << size,
   4449                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4450                 mreg);
   4451          } else {
   4452             /* VMUL */
   4453             IROp op;
   4454             UInt P = (theInstr >> 24) & 1;
   4455             if (P) {
   4456                switch (size) {
   4457                   case 0:
   4458                      op = Q ? Iop_PolynomialMul8x16 : Iop_PolynomialMul8x8;
   4459                      break;
   4460                   case 1: case 2: case 3: return False;
   4461                   default: vassert(0);
   4462                }
   4463             } else {
   4464                switch (size) {
   4465                   case 0: op = Q ? Iop_Mul8x16 : Iop_Mul8x8; break;
   4466                   case 1: op = Q ? Iop_Mul16x8 : Iop_Mul16x4; break;
   4467                   case 2: op = Q ? Iop_Mul32x4 : Iop_Mul32x2; break;
   4468                   case 3: return False;
   4469                   default: vassert(0);
   4470                }
   4471             }
   4472             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4473             DIP("vmul.%c%d %c%u, %c%u, %c%u\n",
   4474                 P ? 'p' : 'i', 8 << size,
   4475                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4476                 mreg);
   4477          }
   4478          break;
   4479       case 10: {
   4480          /* VPMAX, VPMIN  */
   4481          UInt P = (theInstr >> 4) & 1;
   4482          IROp op;
   4483          if (Q)
   4484             return False;
   4485          if (P) {
   4486             switch (size) {
   4487                case 0: op = U ? Iop_PwMin8Ux8  : Iop_PwMin8Sx8; break;
   4488                case 1: op = U ? Iop_PwMin16Ux4 : Iop_PwMin16Sx4; break;
   4489                case 2: op = U ? Iop_PwMin32Ux2 : Iop_PwMin32Sx2; break;
   4490                case 3: return False;
   4491                default: vassert(0);
   4492             }
   4493          } else {
   4494             switch (size) {
   4495                case 0: op = U ? Iop_PwMax8Ux8  : Iop_PwMax8Sx8; break;
   4496                case 1: op = U ? Iop_PwMax16Ux4 : Iop_PwMax16Sx4; break;
   4497                case 2: op = U ? Iop_PwMax32Ux2 : Iop_PwMax32Sx2; break;
   4498                case 3: return False;
   4499                default: vassert(0);
   4500             }
   4501          }
   4502          assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4503          DIP("vp%s.%c%d %c%u, %c%u, %c%u\n",
   4504              P ? "min" : "max", U ? 'u' : 's',
   4505              8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
   4506              Q ? 'q' : 'd', mreg);
   4507          break;
   4508       }
   4509       case 11:
   4510          if (B == 0) {
   4511             if (U == 0) {
   4512                /* VQDMULH  */
   4513                IROp op ,op2;
   4514                ULong imm;
   4515                switch (size) {
   4516                   case 0: case 3:
   4517                      return False;
   4518                   case 1:
   4519                      op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
   4520                      op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
   4521                      imm = 1LL << 15;
   4522                      imm = (imm << 16) | imm;
   4523                      imm = (imm << 32) | imm;
   4524                      break;
   4525                   case 2:
   4526                      op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
   4527                      op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
   4528                      imm = 1LL << 31;
   4529                      imm = (imm << 32) | imm;
   4530                      break;
   4531                   default:
   4532                      vassert(0);
   4533                }
   4534                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4535                setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   4536                                 binop(op2, mkexpr(arg_n),
   4537                                            Q ? mkU128(imm) : mkU64(imm)),
   4538                                 binop(op2, mkexpr(arg_m),
   4539                                            Q ? mkU128(imm) : mkU64(imm))),
   4540                           Q ? mkU128(0) : mkU64(0),
   4541                           Q, condT);
   4542                DIP("vqdmulh.s%d %c%u, %c%u, %c%u\n",
   4543                    8 << size, Q ? 'q' : 'd',
   4544                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4545             } else {
   4546                /* VQRDMULH */
   4547                IROp op ,op2;
   4548                ULong imm;
   4549                switch(size) {
   4550                   case 0: case 3:
   4551                      return False;
   4552                   case 1:
   4553                      imm = 1LL << 15;
   4554                      imm = (imm << 16) | imm;
   4555                      imm = (imm << 32) | imm;
   4556                      op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
   4557                      op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
   4558                      break;
   4559                   case 2:
   4560                      imm = 1LL << 31;
   4561                      imm = (imm << 32) | imm;
   4562                      op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
   4563                      op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
   4564                      break;
   4565                   default:
   4566                      vassert(0);
   4567                }
   4568                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4569                setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   4570                                 binop(op2, mkexpr(arg_n),
   4571                                            Q ? mkU128(imm) : mkU64(imm)),
   4572                                 binop(op2, mkexpr(arg_m),
   4573                                            Q ? mkU128(imm) : mkU64(imm))),
   4574                           Q ? mkU128(0) : mkU64(0),
   4575                           Q, condT);
   4576                DIP("vqrdmulh.s%d %c%u, %c%u, %c%u\n",
   4577                    8 << size, Q ? 'q' : 'd',
   4578                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4579             }
   4580          } else {
   4581             if (U == 0) {
   4582                /* VPADD */
   4583                IROp op;
   4584                if (Q)
   4585                   return False;
   4586                switch (size) {
   4587                   case 0: op = Q ? Iop_PwAdd8x16 : Iop_PwAdd8x8;  break;
   4588                   case 1: op = Q ? Iop_PwAdd16x8 : Iop_PwAdd16x4; break;
   4589                   case 2: op = Q ? Iop_PwAdd32x4 : Iop_PwAdd32x2; break;
   4590                   case 3: return False;
   4591                   default: vassert(0);
   4592                }
   4593                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4594                DIP("vpadd.i%d %c%u, %c%u, %c%u\n",
   4595                    8 << size, Q ? 'q' : 'd',
   4596                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4597             }
   4598          }
   4599          break;
   4600       /* Starting from here these are FP SIMD cases */
   4601       case 13:
   4602          if (B == 0) {
   4603             IROp op;
   4604             if (U == 0) {
   4605                if ((C >> 1) == 0) {
   4606                   /* VADD  */
   4607                   op = Q ? Iop_Add32Fx4 : Iop_Add32Fx2 ;
   4608                   DIP("vadd.f32 %c%u, %c%u, %c%u\n",
   4609                       Q ? 'q' : 'd', dreg,
   4610                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4611                } else {
   4612                   /* VSUB  */
   4613                   op = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2 ;
   4614                   DIP("vsub.f32 %c%u, %c%u, %c%u\n",
   4615                       Q ? 'q' : 'd', dreg,
   4616                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4617                }
   4618             } else {
   4619                if ((C >> 1) == 0) {
   4620                   /* VPADD */
   4621                   if (Q)
   4622                      return False;
   4623                   op = Iop_PwAdd32Fx2;
   4624                   DIP("vpadd.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
   4625                } else {
   4626                   /* VABD  */
   4627                   if (Q) {
   4628                      assign(res, unop(Iop_Abs32Fx4,
   4629                                       triop(Iop_Sub32Fx4,
   4630                                             get_FAKE_roundingmode(),
   4631                                             mkexpr(arg_n),
   4632                                             mkexpr(arg_m))));
   4633                   } else {
   4634                      assign(res, unop(Iop_Abs32Fx2,
   4635                                       binop(Iop_Sub32Fx2,
   4636                                             mkexpr(arg_n),
   4637                                             mkexpr(arg_m))));
   4638                   }
   4639                   DIP("vabd.f32 %c%u, %c%u, %c%u\n",
   4640                       Q ? 'q' : 'd', dreg,
   4641                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4642                   break;
   4643                }
   4644             }
   4645             assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
   4646          } else {
   4647             if (U == 0) {
   4648                /* VMLA, VMLS  */
   4649                IROp op, op2;
   4650                UInt P = (theInstr >> 21) & 1;
   4651                if (P) {
   4652                   switch (size & 1) {
   4653                      case 0:
   4654                         op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
   4655                         op2 = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
   4656                         break;
   4657                      case 1: return False;
   4658                      default: vassert(0);
   4659                   }
   4660                } else {
   4661                   switch (size & 1) {
   4662                      case 0:
   4663                         op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
   4664                         op2 = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
   4665                         break;
   4666                      case 1: return False;
   4667                      default: vassert(0);
   4668                   }
   4669                }
   4670                assign(res, binop_w_fake_RM(
   4671                               op2,
   4672                               Q ? getQReg(dreg) : getDRegI64(dreg),
   4673                               binop_w_fake_RM(op, mkexpr(arg_n),
   4674                                                   mkexpr(arg_m))));
   4675 
   4676                DIP("vml%c.f32 %c%u, %c%u, %c%u\n",
   4677                    P ? 's' : 'a', Q ? 'q' : 'd',
   4678                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4679             } else {
   4680                /* VMUL  */
   4681                IROp op;
   4682                if ((C >> 1) != 0)
   4683                   return False;
   4684                op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2 ;
   4685                assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
   4686                DIP("vmul.f32 %c%u, %c%u, %c%u\n",
   4687                    Q ? 'q' : 'd', dreg,
   4688                    Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4689             }
   4690          }
   4691          break;
   4692       case 14:
   4693          if (B == 0) {
   4694             if (U == 0) {
   4695                if ((C >> 1) == 0) {
   4696                   /* VCEQ  */
   4697                   IROp op;
   4698                   if ((theInstr >> 20) & 1)
   4699                      return False;
   4700                   op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2;
   4701                   assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4702                   DIP("vceq.f32 %c%u, %c%u, %c%u\n",
   4703                       Q ? 'q' : 'd', dreg,
   4704                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4705                } else {
   4706                   return False;
   4707                }
   4708             } else {
   4709                if ((C >> 1) == 0) {
   4710                   /* VCGE  */
   4711                   IROp op;
   4712                   if ((theInstr >> 20) & 1)
   4713                      return False;
   4714                   op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
   4715                   assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4716                   DIP("vcge.f32 %c%u, %c%u, %c%u\n",
   4717                       Q ? 'q' : 'd', dreg,
   4718                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4719                } else {
   4720                   /* VCGT  */
   4721                   IROp op;
   4722                   if ((theInstr >> 20) & 1)
   4723                      return False;
   4724                   op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
   4725                   assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4726                   DIP("vcgt.f32 %c%u, %c%u, %c%u\n",
   4727                       Q ? 'q' : 'd', dreg,
   4728                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4729                }
   4730             }
   4731          } else {
   4732             if (U == 1) {
   4733                /* VACGE, VACGT */
   4734                UInt op_bit = (theInstr >> 21) & 1;
   4735                IROp op, op2;
   4736                op2 = Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2;
   4737                if (op_bit) {
   4738                   op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
   4739                   assign(res, binop(op,
   4740                                     unop(op2, mkexpr(arg_n)),
   4741                                     unop(op2, mkexpr(arg_m))));
   4742                } else {
   4743                   op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
   4744                   assign(res, binop(op,
   4745                                     unop(op2, mkexpr(arg_n)),
   4746                                     unop(op2, mkexpr(arg_m))));
   4747                }
   4748                DIP("vacg%c.f32 %c%u, %c%u, %c%u\n", op_bit ? 't' : 'e',
   4749                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
   4750                    Q ? 'q' : 'd', mreg);
   4751             }
   4752          }
   4753          break;
   4754       case 15:
   4755          if (B == 0) {
   4756             if (U == 0) {
   4757                /* VMAX, VMIN  */
   4758                IROp op;
   4759                if ((theInstr >> 20) & 1)
   4760                   return False;
   4761                if ((theInstr >> 21) & 1) {
   4762                   op = Q ? Iop_Min32Fx4 : Iop_Min32Fx2;
   4763                   DIP("vmin.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
   4764                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4765                } else {
   4766                   op = Q ? Iop_Max32Fx4 : Iop_Max32Fx2;
   4767                   DIP("vmax.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
   4768                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4769                }
   4770                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4771             } else {
   4772                /* VPMAX, VPMIN   */
   4773                IROp op;
   4774                if (Q)
   4775                   return False;
   4776                if ((theInstr >> 20) & 1)
   4777                   return False;
   4778                if ((theInstr >> 21) & 1) {
   4779                   op = Iop_PwMin32Fx2;
   4780                   DIP("vpmin.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
   4781                } else {
   4782                   op = Iop_PwMax32Fx2;
   4783                   DIP("vpmax.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
   4784                }
   4785                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4786             }
   4787          } else {
   4788             if (U == 0) {
   4789                if ((C >> 1) == 0) {
   4790                   /* VRECPS */
   4791                   if ((theInstr >> 20) & 1)
   4792                      return False;
   4793                   assign(res, binop(Q ? Iop_RecipStep32Fx4
   4794                                       : Iop_RecipStep32Fx2,
   4795                                     mkexpr(arg_n),
   4796                                     mkexpr(arg_m)));
   4797                   DIP("vrecps.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
   4798                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4799                } else {
   4800                   /* VRSQRTS  */
   4801                   if ((theInstr >> 20) & 1)
   4802                      return False;
   4803                   assign(res, binop(Q ? Iop_RSqrtStep32Fx4
   4804                                       : Iop_RSqrtStep32Fx2,
   4805                                     mkexpr(arg_n),
   4806                                     mkexpr(arg_m)));
   4807                   DIP("vrsqrts.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
   4808                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4809                }
   4810             }
   4811          }
   4812          break;
   4813    }
   4814 
   4815    if (Q) {
   4816       putQReg(dreg, mkexpr(res), condT);
   4817    } else {
   4818       putDRegI64(dreg, mkexpr(res), condT);
   4819    }
   4820 
   4821    return True;
   4822 }
   4823 
   4824 /* A7.4.2 Three registers of different length */
   4825 static
   4826 Bool dis_neon_data_3diff ( UInt theInstr, IRTemp condT )
   4827 {
   4828    UInt A = (theInstr >> 8) & 0xf;
   4829    UInt B = (theInstr >> 20) & 3;
   4830    UInt U = (theInstr >> 24) & 1;
   4831    UInt P = (theInstr >> 9) & 1;
   4832    UInt mreg = get_neon_m_regno(theInstr);
   4833    UInt nreg = get_neon_n_regno(theInstr);
   4834    UInt dreg = get_neon_d_regno(theInstr);
   4835    UInt size = B;
   4836    ULong imm;
   4837    IRTemp res, arg_m, arg_n, cond, tmp;
   4838    IROp cvt, cvt2, cmp, op, op2, sh, add;
   4839    switch (A) {
   4840       case 0: case 1: case 2: case 3:
   4841          /* VADDL, VADDW, VSUBL, VSUBW */
   4842          if (dreg & 1)
   4843             return False;
   4844          dreg >>= 1;
   4845          size = B;
   4846          switch (size) {
   4847             case 0:
   4848                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
   4849                op = (A & 2) ? Iop_Sub16x8 : Iop_Add16x8;
   4850                break;
   4851             case 1:
   4852                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
   4853                op = (A & 2) ? Iop_Sub32x4 : Iop_Add32x4;
   4854                break;
   4855             case 2:
   4856                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
   4857                op = (A & 2) ? Iop_Sub64x2 : Iop_Add64x2;
   4858                break;
   4859             case 3:
   4860                return False;
   4861             default:
   4862                vassert(0);
   4863          }
   4864          arg_n = newTemp(Ity_V128);
   4865          arg_m = newTemp(Ity_V128);
   4866          if (A & 1) {
   4867             if (nreg & 1)
   4868                return False;
   4869             nreg >>= 1;
   4870             assign(arg_n, getQReg(nreg));
   4871          } else {
   4872             assign(arg_n, unop(cvt, getDRegI64(nreg)));
   4873          }
   4874          assign(arg_m, unop(cvt, getDRegI64(mreg)));
   4875          putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
   4876                        condT);
   4877          DIP("v%s%c.%c%d q%u, %c%u, d%u\n", (A & 2) ? "sub" : "add",
   4878              (A & 1) ? 'w' : 'l', U ? 'u' : 's', 8 << size, dreg,
   4879              (A & 1) ? 'q' : 'd', nreg, mreg);
   4880          return True;
   4881       case 4:
   4882          /* VADDHN, VRADDHN */
   4883          if (mreg & 1)
   4884             return False;
   4885          mreg >>= 1;
   4886          if (nreg & 1)
   4887             return False;
   4888          nreg >>= 1;
   4889          size = B;
   4890          switch (size) {
   4891             case 0:
   4892                op = Iop_Add16x8;
   4893                cvt = Iop_NarrowUn16to8x8;
   4894                sh = Iop_ShrN16x8;
   4895                imm = 1U << 7;
   4896                imm = (imm << 16) | imm;
   4897                imm = (imm << 32) | imm;
   4898                break;
   4899             case 1:
   4900                op = Iop_Add32x4;
   4901                cvt = Iop_NarrowUn32to16x4;
   4902                sh = Iop_ShrN32x4;
   4903                imm = 1U << 15;
   4904                imm = (imm << 32) | imm;
   4905                break;
   4906             case 2:
   4907                op = Iop_Add64x2;
   4908                cvt = Iop_NarrowUn64to32x2;
   4909                sh = Iop_ShrN64x2;
   4910                imm = 1U << 31;
   4911                break;
   4912             case 3:
   4913                return False;
   4914             default:
   4915                vassert(0);
   4916          }
   4917          tmp = newTemp(Ity_V128);
   4918          res = newTemp(Ity_V128);
   4919          assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
   4920          if (U) {
   4921             /* VRADDHN */
   4922             assign(res, binop(op, mkexpr(tmp),
   4923                      binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
   4924          } else {
   4925             assign(res, mkexpr(tmp));
   4926          }
   4927          putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
   4928                     condT);
   4929          DIP("v%saddhn.i%d d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
   4930              nreg, mreg);
   4931          return True;
   4932       case 5:
   4933          /* VABAL */
   4934          if (!((theInstr >> 23) & 1)) {
   4935             vpanic("VABA should not be in dis_neon_data_3diff\n");
   4936          }
   4937          if (dreg & 1)
   4938             return False;
   4939          dreg >>= 1;
   4940          switch (size) {
   4941             case 0:
   4942                cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
   4943                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
   4944                cvt2 = Iop_Widen8Sto16x8;
   4945                op = Iop_Sub16x8;
   4946                op2 = Iop_Add16x8;
   4947                break;
   4948             case 1:
   4949                cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
   4950                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
   4951                cvt2 = Iop_Widen16Sto32x4;
   4952                op = Iop_Sub32x4;
   4953                op2 = Iop_Add32x4;
   4954                break;
   4955             case 2:
   4956                cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
   4957                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
   4958                cvt2 = Iop_Widen32Sto64x2;
   4959                op = Iop_Sub64x2;
   4960                op2 = Iop_Add64x2;
   4961                break;
   4962             case 3:
   4963                return False;
   4964             default:
   4965                vassert(0);
   4966          }
   4967          arg_n = newTemp(Ity_V128);
   4968          arg_m = newTemp(Ity_V128);
   4969          cond = newTemp(Ity_V128);
   4970          res = newTemp(Ity_V128);
   4971          assign(arg_n, unop(cvt, getDRegI64(nreg)));
   4972          assign(arg_m, unop(cvt, getDRegI64(mreg)));
   4973          assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
   4974                                             getDRegI64(mreg))));
   4975          assign(res, binop(op2,
   4976                            binop(Iop_OrV128,
   4977                                  binop(Iop_AndV128,
   4978                                        binop(op, mkexpr(arg_n), mkexpr(arg_m)),
   4979                                        mkexpr(cond)),
   4980                                  binop(Iop_AndV128,
   4981                                        binop(op, mkexpr(arg_m), mkexpr(arg_n)),
   4982                                        unop(Iop_NotV128, mkexpr(cond)))),
   4983                            getQReg(dreg)));
   4984          putQReg(dreg, mkexpr(res), condT);
   4985          DIP("vabal.%c%d q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
   4986              nreg, mreg);
   4987          return True;
   4988       case 6:
   4989          /* VSUBHN, VRSUBHN */
   4990          if (mreg & 1)
   4991             return False;
   4992          mreg >>= 1;
   4993          if (nreg & 1)
   4994             return False;
   4995          nreg >>= 1;
   4996          size = B;
   4997          switch (size) {
   4998             case 0:
   4999                op = Iop_Sub16x8;
   5000                op2 = Iop_Add16x8;
   5001                cvt = Iop_NarrowUn16to8x8;
   5002                sh = Iop_ShrN16x8;
   5003                imm = 1U << 7;
   5004                imm = (imm << 16) | imm;
   5005                imm = (imm << 32) | imm;
   5006                break;
   5007             case 1:
   5008                op = Iop_Sub32x4;
   5009                op2 = Iop_Add32x4;
   5010                cvt = Iop_NarrowUn32to16x4;
   5011                sh = Iop_ShrN32x4;
   5012                imm = 1U << 15;
   5013                imm = (imm << 32) | imm;
   5014                break;
   5015             case 2:
   5016                op = Iop_Sub64x2;
   5017                op2 = Iop_Add64x2;
   5018                cvt = Iop_NarrowUn64to32x2;
   5019                sh = Iop_ShrN64x2;
   5020                imm = 1U << 31;
   5021                break;
   5022             case 3:
   5023                return False;
   5024             default:
   5025                vassert(0);
   5026          }
   5027          tmp = newTemp(Ity_V128);
   5028          res = newTemp(Ity_V128);
   5029          assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
   5030          if (U) {
   5031             /* VRSUBHN */
   5032             assign(res, binop(op2, mkexpr(tmp),
   5033                      binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
   5034          } else {
   5035             assign(res, mkexpr(tmp));
   5036          }
   5037          putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
   5038                     condT);
   5039          DIP("v%ssubhn.i%d d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
   5040              nreg, mreg);
   5041          return True;
   5042       case 7:
   5043          /* VABDL */
   5044          if (!((theInstr >> 23) & 1)) {
   5045             vpanic("VABL should not be in dis_neon_data_3diff\n");
   5046          }
   5047          if (dreg & 1)
   5048             return False;
   5049          dreg >>= 1;
   5050          switch (size) {
   5051             case 0:
   5052                cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
   5053                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
   5054                cvt2 = Iop_Widen8Sto16x8;
   5055                op = Iop_Sub16x8;
   5056                break;
   5057             case 1:
   5058                cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
   5059                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
   5060                cvt2 = Iop_Widen16Sto32x4;
   5061                op = Iop_Sub32x4;
   5062                break;
   5063             case 2:
   5064                cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
   5065                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
   5066                cvt2 = Iop_Widen32Sto64x2;
   5067                op = Iop_Sub64x2;
   5068                break;
   5069             case 3:
   5070                return False;
   5071             default:
   5072                vassert(0);
   5073          }
   5074          arg_n = newTemp(Ity_V128);
   5075          arg_m = newTemp(Ity_V128);
   5076          cond = newTemp(Ity_V128);
   5077          res = newTemp(Ity_V128);
   5078          assign(arg_n, unop(cvt, getDRegI64(nreg)));
   5079          assign(arg_m, unop(cvt, getDRegI64(mreg)));
   5080          assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
   5081                                             getDRegI64(mreg))));
   5082          assign(res, binop(Iop_OrV128,
   5083                            binop(Iop_AndV128,
   5084                                  binop(op, mkexpr(arg_n), mkexpr(arg_m)),
   5085                                  mkexpr(cond)),
   5086                            binop(Iop_AndV128,
   5087                                  binop(op, mkexpr(arg_m), mkexpr(arg_n)),
   5088                                  unop(Iop_NotV128, mkexpr(cond)))));
   5089          putQReg(dreg, mkexpr(res), condT);
   5090          DIP("vabdl.%c%d q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
   5091              nreg, mreg);
   5092          return True;
   5093       case 8:
   5094       case 10:
   5095          /* VMLAL, VMLSL (integer) */
   5096          if (dreg & 1)
   5097             return False;
   5098          dreg >>= 1;
   5099          size = B;
   5100          switch (size) {
   5101             case 0:
   5102                op = U ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
   5103                op2 = P ? Iop_Sub16x8 : Iop_Add16x8;
   5104                break;
   5105             case 1:
   5106                op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
   5107                op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
   5108                break;
   5109             case 2:
   5110                op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
   5111                op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
   5112                break;
   5113             case 3:
   5114                return False;
   5115             default:
   5116                vassert(0);
   5117          }
   5118          res = newTemp(Ity_V128);
   5119          assign(res, binop(op, getDRegI64(nreg),getDRegI64(mreg)));
   5120          putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
   5121          DIP("vml%cl.%c%d q%u, d%u, d%u\n", P ? 's' : 'a', U ? 'u' : 's',
   5122              8 << size, dreg, nreg, mreg);
   5123          return True;
   5124       case 9:
   5125       case 11:
   5126          /* VQDMLAL, VQDMLSL */
   5127          if (U)
   5128             return False;
   5129          if (dreg & 1)
   5130             return False;
   5131          dreg >>= 1;
   5132          size = B;
   5133          switch (size) {
   5134             case 0: case 3:
   5135                return False;
   5136             case 1:
   5137                op = Iop_QDMull16Sx4;
   5138                cmp = Iop_CmpEQ16x4;
   5139                add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
   5140                op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
   5141                imm = 1LL << 15;
   5142                imm = (imm << 16) | imm;
   5143                imm = (imm << 32) | imm;
   5144                break;
   5145             case 2:
   5146                op = Iop_QDMull32Sx2;
   5147                cmp = Iop_CmpEQ32x2;
   5148                add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
   5149                op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
   5150                imm = 1LL << 31;
   5151                imm = (imm << 32) | imm;
   5152                break;
   5153             default:
   5154                vassert(0);
   5155          }
   5156          res = newTemp(Ity_V128);
   5157          tmp = newTemp(Ity_V128);
   5158          assign(res, binop(op, getDRegI64(nreg), getDRegI64(mreg)));
   5159          assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
   5160          setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
   5161                     True, condT);
   5162          setFlag_QC(binop(Iop_And64,
   5163                           binop(cmp, getDRegI64(nreg), mkU64(imm)),
   5164                           binop(cmp, getDRegI64(mreg), mkU64(imm))),
   5165                     mkU64(0),
   5166                     False, condT);
   5167          putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
   5168          DIP("vqdml%cl.s%d q%u, d%u, d%u\n", P ? 's' : 'a', 8 << size, dreg,
   5169              nreg, mreg);
   5170          return True;
   5171       case 12:
   5172       case 14:
   5173          /* VMULL (integer or polynomial) */
   5174          if (dreg & 1)
   5175             return False;
   5176          dreg >>= 1;
   5177          size = B;
   5178          switch (size) {
   5179             case 0:
   5180                op = (U) ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
   5181                if (P)
   5182                   op = Iop_PolynomialMull8x8;
   5183                break;
   5184             case 1:
   5185                op = (U) ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
   5186                break;
   5187             case 2:
   5188                op = (U) ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
   5189                break;
   5190             default:
   5191                vassert(0);
   5192          }
   5193          putQReg(dreg, binop(op, getDRegI64(nreg),
   5194                                  getDRegI64(mreg)), condT);
   5195          DIP("vmull.%c%d q%u, d%u, d%u\n", P ? 'p' : (U ? 'u' : 's'),
   5196                8 << size, dreg, nreg, mreg);
   5197          return True;
   5198       case 13:
   5199          /* VQDMULL */
   5200          if (U)
   5201             return False;
   5202          if (dreg & 1)
   5203             return False;
   5204          dreg >>= 1;
   5205          size = B;
   5206          switch (size) {
   5207             case 0:
   5208             case 3:
   5209                return False;
   5210             case 1:
   5211                op = Iop_QDMull16Sx4;
   5212                op2 = Iop_CmpEQ16x4;
   5213                imm = 1LL << 15;
   5214                imm = (imm << 16) | imm;
   5215                imm = (imm << 32) | imm;
   5216                break;
   5217             case 2:
   5218                op = Iop_QDMull32Sx2;
   5219                op2 = Iop_CmpEQ32x2;
   5220                imm = 1LL << 31;
   5221                imm = (imm << 32) | imm;
   5222                break;
   5223             default:
   5224                vassert(0);
   5225          }
   5226          putQReg(dreg, binop(op, getDRegI64(nreg), getDRegI64(mreg)),
   5227                condT);
   5228          setFlag_QC(binop(Iop_And64,
   5229                           binop(op2, getDRegI64(nreg), mkU64(imm)),
   5230                           binop(op2, getDRegI64(mreg), mkU64(imm))),
   5231                     mkU64(0),
   5232                     False, condT);
   5233          DIP("vqdmull.s%d q%u, d%u, d%u\n", 8 << size, dreg, nreg, mreg);
   5234          return True;
   5235       default:
   5236          return False;
   5237    }
   5238    return False;
   5239 }
   5240 
   5241 /* A7.4.3 Two registers and a scalar */
   5242 static
   5243 Bool dis_neon_data_2reg_and_scalar ( UInt theInstr, IRTemp condT )
   5244 {
   5245 #  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
   5246    UInt U = INSN(24,24);
   5247    UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
   5248    UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
   5249    UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
   5250    UInt size = INSN(21,20);
   5251    UInt index;
   5252    UInt Q = INSN(24,24);
   5253 
   5254    if (INSN(27,25) != 1 || INSN(23,23) != 1
   5255        || INSN(6,6) != 1 || INSN(4,4) != 0)
   5256       return False;
   5257 
   5258    /* VMLA, VMLS (scalar)  */
   5259    if ((INSN(11,8) & BITS4(1,0,1,0)) == BITS4(0,0,0,0)) {
   5260       IRTemp res, arg_m, arg_n;
   5261       IROp dup, get, op, op2, add, sub;
   5262       if (Q) {
   5263          if ((dreg & 1) || (nreg & 1))
   5264             return False;
   5265          dreg >>= 1;
   5266          nreg >>= 1;
   5267          res = newTemp(Ity_V128);
   5268          arg_m = newTemp(Ity_V128);
   5269          arg_n = newTemp(Ity_V128);
   5270          assign(arg_n, getQReg(nreg));
   5271          switch(size) {
   5272             case 1:
   5273                dup = Iop_Dup16x8;
   5274                get = Iop_GetElem16x4;
   5275                index = mreg >> 3;
   5276                mreg &= 7;
   5277                break;
   5278             case 2:
   5279                dup = Iop_Dup32x4;
   5280                get = Iop_GetElem32x2;
   5281                index = mreg >> 4;
   5282                mreg &= 0xf;
   5283                break;
   5284             case 0:
   5285             case 3:
   5286                return False;
   5287             default:
   5288                vassert(0);
   5289          }
   5290          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5291       } else {
   5292          res = newTemp(Ity_I64);
   5293          arg_m = newTemp(Ity_I64);
   5294          arg_n = newTemp(Ity_I64);
   5295          assign(arg_n, getDRegI64(nreg));
   5296          switch(size) {
   5297             case 1:
   5298                dup = Iop_Dup16x4;
   5299                get = Iop_GetElem16x4;
   5300                index = mreg >> 3;
   5301                mreg &= 7;
   5302                break;
   5303             case 2:
   5304                dup = Iop_Dup32x2;
   5305                get = Iop_GetElem32x2;
   5306                index = mreg >> 4;
   5307                mreg &= 0xf;
   5308                break;
   5309             case 0:
   5310             case 3:
   5311                return False;
   5312             default:
   5313                vassert(0);
   5314          }
   5315          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5316       }
   5317       if (INSN(8,8)) {
   5318          switch (size) {
   5319             case 2:
   5320                op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
   5321                add = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
   5322                sub = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
   5323                break;
   5324             case 0:
   5325             case 1:
   5326             case 3:
   5327                return False;
   5328             default:
   5329                vassert(0);
   5330          }
   5331       } else {
   5332          switch (size) {
   5333             case 1:
   5334                op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
   5335                add = Q ? Iop_Add16x8 : Iop_Add16x4;
   5336                sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   5337                break;
   5338             case 2:
   5339                op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
   5340                add = Q ? Iop_Add32x4 : Iop_Add32x2;
   5341                sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   5342                break;
   5343             case 0:
   5344             case 3:
   5345                return False;
   5346             default:
   5347                vassert(0);
   5348          }
   5349       }
   5350       op2 = INSN(10,10) ? sub : add;
   5351       assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
   5352       if (Q)
   5353          putQReg(dreg, binop_w_fake_RM(op2, getQReg(dreg), mkexpr(res)),
   5354                  condT);
   5355       else
   5356          putDRegI64(dreg, binop(op2, getDRegI64(dreg), mkexpr(res)),
   5357                     condT);
   5358       DIP("vml%c.%c%d %c%u, %c%u, d%u[%u]\n", INSN(10,10) ? 's' : 'a',
   5359             INSN(8,8) ? 'f' : 'i', 8 << size,
   5360             Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, mreg, index);
   5361       return True;
   5362    }
   5363 
   5364    /* VMLAL, VMLSL (scalar)   */
   5365    if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,0)) {
   5366       IRTemp res, arg_m, arg_n;
   5367       IROp dup, get, op, op2, add, sub;
   5368       if (dreg & 1)
   5369          return False;
   5370       dreg >>= 1;
   5371       res = newTemp(Ity_V128);
   5372       arg_m = newTemp(Ity_I64);
   5373       arg_n = newTemp(Ity_I64);
   5374       assign(arg_n, getDRegI64(nreg));
   5375       switch(size) {
   5376          case 1:
   5377             dup = Iop_Dup16x4;
   5378             get = Iop_GetElem16x4;
   5379             index = mreg >> 3;
   5380             mreg &= 7;
   5381             break;
   5382          case 2:
   5383             dup = Iop_Dup32x2;
   5384             get = Iop_GetElem32x2;
   5385             index = mreg >> 4;
   5386             mreg &= 0xf;
   5387             break;
   5388          case 0:
   5389          case 3:
   5390             return False;
   5391          default:
   5392             vassert(0);
   5393       }
   5394       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5395       switch (size) {
   5396          case 1:
   5397             op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
   5398             add = Iop_Add32x4;
   5399             sub = Iop_Sub32x4;
   5400             break;
   5401          case 2:
   5402             op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
   5403             add = Iop_Add64x2;
   5404             sub = Iop_Sub64x2;
   5405             break;
   5406          case 0:
   5407          case 3:
   5408             return False;
   5409          default:
   5410             vassert(0);
   5411       }
   5412       op2 = INSN(10,10) ? sub : add;
   5413       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   5414       putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
   5415       DIP("vml%cl.%c%d q%u, d%u, d%u[%u]\n",
   5416           INSN(10,10) ? 's' : 'a', U ? 'u' : 's',
   5417           8 << size, dreg, nreg, mreg, index);
   5418       return True;
   5419    }
   5420 
   5421    /* VQDMLAL, VQDMLSL (scalar)  */
   5422    if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,1) && !U) {
   5423       IRTemp res, arg_m, arg_n, tmp;
   5424       IROp dup, get, op, op2, add, cmp;
   5425       UInt P = INSN(10,10);
   5426       ULong imm;
   5427       if (dreg & 1)
   5428          return False;
   5429       dreg >>= 1;
   5430       res = newTemp(Ity_V128);
   5431       arg_m = newTemp(Ity_I64);
   5432       arg_n = newTemp(Ity_I64);
   5433       assign(arg_n, getDRegI64(nreg));
   5434       switch(size) {
   5435          case 1:
   5436             dup = Iop_Dup16x4;
   5437             get = Iop_GetElem16x4;
   5438             index = mreg >> 3;
   5439             mreg &= 7;
   5440             break;
   5441          case 2:
   5442             dup = Iop_Dup32x2;
   5443             get = Iop_GetElem32x2;
   5444             index = mreg >> 4;
   5445             mreg &= 0xf;
   5446             break;
   5447          case 0:
   5448          case 3:
   5449             return False;
   5450          default:
   5451             vassert(0);
   5452       }
   5453       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5454       switch (size) {
   5455          case 0:
   5456          case 3:
   5457             return False;
   5458          case 1:
   5459             op = Iop_QDMull16Sx4;
   5460             cmp = Iop_CmpEQ16x4;
   5461             add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
   5462             op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
   5463             imm = 1LL << 15;
   5464             imm = (imm << 16) | imm;
   5465             imm = (imm << 32) | imm;
   5466             break;
   5467          case 2:
   5468             op = Iop_QDMull32Sx2;
   5469             cmp = Iop_CmpEQ32x2;
   5470             add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
   5471             op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
   5472             imm = 1LL << 31;
   5473             imm = (imm << 32) | imm;
   5474             break;
   5475          default:
   5476             vassert(0);
   5477       }
   5478       res = newTemp(Ity_V128);
   5479       tmp = newTemp(Ity_V128);
   5480       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   5481       assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
   5482       setFlag_QC(binop(Iop_And64,
   5483                        binop(cmp, mkexpr(arg_n), mkU64(imm)),
   5484                        binop(cmp, mkexpr(arg_m), mkU64(imm))),
   5485                  mkU64(0),
   5486                  False, condT);
   5487       setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
   5488                  True, condT);
   5489       putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
   5490       DIP("vqdml%cl.s%d q%u, d%u, d%u[%u]\n", P ? 's' : 'a', 8 << size,
   5491           dreg, nreg, mreg, index);
   5492       return True;
   5493    }
   5494 
   5495    /* VMUL (by scalar)  */
   5496    if ((INSN(11,8) & BITS4(1,1,1,0)) == BITS4(1,0,0,0)) {
   5497       IRTemp res, arg_m, arg_n;
   5498       IROp dup, get, op;
   5499       if (Q) {
   5500          if ((dreg & 1) || (nreg & 1))
   5501             return False;
   5502          dreg >>= 1;
   5503          nreg >>= 1;
   5504          res = newTemp(Ity_V128);
   5505          arg_m = newTemp(Ity_V128);
   5506          arg_n = newTemp(Ity_V128);
   5507          assign(arg_n, getQReg(nreg));
   5508          switch(size) {
   5509             case 1:
   5510                dup = Iop_Dup16x8;
   5511                get = Iop_GetElem16x4;
   5512                index = mreg >> 3;
   5513                mreg &= 7;
   5514                break;
   5515             case 2:
   5516                dup = Iop_Dup32x4;
   5517                get = Iop_GetElem32x2;
   5518                index = mreg >> 4;
   5519                mreg &= 0xf;
   5520                break;
   5521             case 0:
   5522             case 3:
   5523                return False;
   5524             default:
   5525                vassert(0);
   5526          }
   5527          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5528       } else {
   5529          res = newTemp(Ity_I64);
   5530          arg_m = newTemp(Ity_I64);
   5531          arg_n = newTemp(Ity_I64);
   5532          assign(arg_n, getDRegI64(nreg));
   5533          switch(size) {
   5534             case 1:
   5535                dup = Iop_Dup16x4;
   5536                get = Iop_GetElem16x4;
   5537                index = mreg >> 3;
   5538                mreg &= 7;
   5539                break;
   5540             case 2:
   5541                dup = Iop_Dup32x2;
   5542                get = Iop_GetElem32x2;
   5543                index = mreg >> 4;
   5544                mreg &= 0xf;
   5545                break;
   5546             case 0:
   5547             case 3:
   5548                return False;
   5549             default:
   5550                vassert(0);
   5551          }
   5552          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5553       }
   5554       if (INSN(8,8)) {
   5555          switch (size) {
   5556             case 2:
   5557                op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
   5558                break;
   5559             case 0:
   5560             case 1:
   5561             case 3:
   5562                return False;
   5563             default:
   5564                vassert(0);
   5565          }
   5566       } else {
   5567          switch (size) {
   5568             case 1:
   5569                op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
   5570                break;
   5571             case 2:
   5572                op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
   5573                break;
   5574             case 0:
   5575             case 3:
   5576                return False;
   5577             default:
   5578                vassert(0);
   5579          }
   5580       }
   5581       assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
   5582       if (Q)
   5583          putQReg(dreg, mkexpr(res), condT);
   5584       else
   5585          putDRegI64(dreg, mkexpr(res), condT);
   5586       DIP("vmul.%c%d %c%u, %c%u, d%u[%u]\n", INSN(8,8) ? 'f' : 'i',
   5587           8 << size, Q ? 'q' : 'd', dreg,
   5588           Q ? 'q' : 'd', nreg, mreg, index);
   5589       return True;
   5590    }
   5591 
   5592    /* VMULL (scalar) */
   5593    if (INSN(11,8) == BITS4(1,0,1,0)) {
   5594       IRTemp res, arg_m, arg_n;
   5595       IROp dup, get, op;
   5596       if (dreg & 1)
   5597          return False;
   5598       dreg >>= 1;
   5599       res = newTemp(Ity_V128);
   5600       arg_m = newTemp(Ity_I64);
   5601       arg_n = newTemp(Ity_I64);
   5602       assign(arg_n, getDRegI64(nreg));
   5603       switch(size) {
   5604          case 1:
   5605             dup = Iop_Dup16x4;
   5606             get = Iop_GetElem16x4;
   5607             index = mreg >> 3;
   5608             mreg &= 7;
   5609             break;
   5610          case 2:
   5611             dup = Iop_Dup32x2;
   5612             get = Iop_GetElem32x2;
   5613             index = mreg >> 4;
   5614             mreg &= 0xf;
   5615             break;
   5616          case 0:
   5617          case 3:
   5618             return False;
   5619          default:
   5620             vassert(0);
   5621       }
   5622       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5623       switch (size) {
   5624          case 1: op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4; break;
   5625          case 2: op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2; break;
   5626          case 0: case 3: return False;
   5627          default: vassert(0);
   5628       }
   5629       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   5630       putQReg(dreg, mkexpr(res), condT);
   5631       DIP("vmull.%c%d q%u, d%u, d%u[%u]\n", U ? 'u' : 's', 8 << size, dreg,
   5632           nreg, mreg, index);
   5633       return True;
   5634    }
   5635 
   5636    /* VQDMULL */
   5637    if (INSN(11,8) == BITS4(1,0,1,1) && !U) {
   5638       IROp op ,op2, dup, get;
   5639       ULong imm;
   5640       IRTemp arg_m, arg_n;
   5641       if (dreg & 1)
   5642          return False;
   5643       dreg >>= 1;
   5644       arg_m = newTemp(Ity_I64);
   5645       arg_n = newTemp(Ity_I64);
   5646       assign(arg_n, getDRegI64(nreg));
   5647       switch(size) {
   5648          case 1:
   5649             dup = Iop_Dup16x4;
   5650             get = Iop_GetElem16x4;
   5651             index = mreg >> 3;
   5652             mreg &= 7;
   5653             break;
   5654          case 2:
   5655             dup = Iop_Dup32x2;
   5656             get = Iop_GetElem32x2;
   5657             index = mreg >> 4;
   5658             mreg &= 0xf;
   5659             break;
   5660          case 0:
   5661          case 3:
   5662             return False;
   5663          default:
   5664             vassert(0);
   5665       }
   5666       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5667       switch (size) {
   5668          case 0:
   5669          case 3:
   5670             return False;
   5671          case 1:
   5672             op = Iop_QDMull16Sx4;
   5673             op2 = Iop_CmpEQ16x4;
   5674             imm = 1LL << 15;
   5675             imm = (imm << 16) | imm;
   5676             imm = (imm << 32) | imm;
   5677             break;
   5678          case 2:
   5679             op = Iop_QDMull32Sx2;
   5680             op2 = Iop_CmpEQ32x2;
   5681             imm = 1LL << 31;
   5682             imm = (imm << 32) | imm;
   5683             break;
   5684          default:
   5685             vassert(0);
   5686       }
   5687       putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
   5688             condT);
   5689       setFlag_QC(binop(Iop_And64,
   5690                        binop(op2, mkexpr(arg_n), mkU64(imm)),
   5691                        binop(op2, mkexpr(arg_m), mkU64(imm))),
   5692                  mkU64(0),
   5693                  False, condT);
   5694       DIP("vqdmull.s%d q%u, d%u, d%u[%u]\n", 8 << size, dreg, nreg, mreg,
   5695           index);
   5696       return True;
   5697    }
   5698 
   5699    /* VQDMULH */
   5700    if (INSN(11,8) == BITS4(1,1,0,0)) {
   5701       IROp op ,op2, dup, get;
   5702       ULong imm;
   5703       IRTemp res, arg_m, arg_n;
   5704       if (Q) {
   5705          if ((dreg & 1) || (nreg & 1))
   5706             return False;
   5707          dreg >>= 1;
   5708          nreg >>= 1;
   5709          res = newTemp(Ity_V128);
   5710          arg_m = newTemp(Ity_V128);
   5711          arg_n = newTemp(Ity_V128);
   5712          assign(arg_n, getQReg(nreg));
   5713          switch(size) {
   5714             case 1:
   5715                dup = Iop_Dup16x8;
   5716                get = Iop_GetElem16x4;
   5717                index = mreg >> 3;
   5718                mreg &= 7;
   5719                break;
   5720             case 2:
   5721                dup = Iop_Dup32x4;
   5722                get = Iop_GetElem32x2;
   5723                index = mreg >> 4;
   5724                mreg &= 0xf;
   5725                break;
   5726             case 0:
   5727             case 3:
   5728                return False;
   5729             default:
   5730                vassert(0);
   5731          }
   5732          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5733       } else {
   5734          res = newTemp(Ity_I64);
   5735          arg_m = newTemp(Ity_I64);
   5736          arg_n = newTemp(Ity_I64);
   5737          assign(arg_n, getDRegI64(nreg));
   5738          switch(size) {
   5739             case 1:
   5740                dup = Iop_Dup16x4;
   5741                get = Iop_GetElem16x4;
   5742                index = mreg >> 3;
   5743                mreg &= 7;
   5744                break;
   5745             case 2:
   5746                dup = Iop_Dup32x2;
   5747                get = Iop_GetElem32x2;
   5748                index = mreg >> 4;
   5749                mreg &= 0xf;
   5750                break;
   5751             case 0:
   5752             case 3:
   5753                return False;
   5754             default:
   5755                vassert(0);
   5756          }
   5757          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5758       }
   5759       switch (size) {
   5760          case 0:
   5761          case 3:
   5762             return False;
   5763          case 1:
   5764             op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
   5765             op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
   5766             imm = 1LL << 15;
   5767             imm = (imm << 16) | imm;
   5768             imm = (imm << 32) | imm;
   5769             break;
   5770          case 2:
   5771             op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
   5772             op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
   5773             imm = 1LL << 31;
   5774             imm = (imm << 32) | imm;
   5775             break;
   5776          default:
   5777             vassert(0);
   5778       }
   5779       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   5780       setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   5781                        binop(op2, mkexpr(arg_n),
   5782                                   Q ? mkU128(imm) : mkU64(imm)),
   5783                        binop(op2, mkexpr(arg_m),
   5784                              Q ? mkU128(imm) : mkU64(imm))),
   5785                  Q ? mkU128(0) : mkU64(0),
   5786                  Q, condT);
   5787       if (Q)
   5788          putQReg(dreg, mkexpr(res), condT);
   5789       else
   5790          putDRegI64(dreg, mkexpr(res), condT);
   5791       DIP("vqdmulh.s%d %c%u, %c%u, d%u[%u]\n",
   5792           8 << size, Q ? 'q' : 'd', dreg,
   5793           Q ? 'q' : 'd', nreg, mreg, index);
   5794       return True;
   5795    }
   5796 
   5797    /* VQRDMULH (scalar) */
   5798    if (INSN(11,8) == BITS4(1,1,0,1)) {
   5799       IROp op ,op2, dup, get;
   5800       ULong imm;
   5801       IRTemp res, arg_m, arg_n;
   5802       if (Q) {
   5803          if ((dreg & 1) || (nreg & 1))
   5804             return False;
   5805          dreg >>= 1;
   5806          nreg >>= 1;
   5807          res = newTemp(Ity_V128);
   5808          arg_m = newTemp(Ity_V128);
   5809          arg_n = newTemp(Ity_V128);
   5810          assign(arg_n, getQReg(nreg));
   5811          switch(size) {
   5812             case 1:
   5813                dup = Iop_Dup16x8;
   5814                get = Iop_GetElem16x4;
   5815                index = mreg >> 3;
   5816                mreg &= 7;
   5817                break;
   5818             case 2:
   5819                dup = Iop_Dup32x4;
   5820                get = Iop_GetElem32x2;
   5821                index = mreg >> 4;
   5822                mreg &= 0xf;
   5823                break;
   5824             case 0:
   5825             case 3:
   5826                return False;
   5827             default:
   5828                vassert(0);
   5829          }
   5830          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5831       } else {
   5832          res = newTemp(Ity_I64);
   5833          arg_m = newTemp(Ity_I64);
   5834          arg_n = newTemp(Ity_I64);
   5835          assign(arg_n, getDRegI64(nreg));
   5836          switch(size) {
   5837             case 1:
   5838                dup = Iop_Dup16x4;
   5839                get = Iop_GetElem16x4;
   5840                index = mreg >> 3;
   5841                mreg &= 7;
   5842                break;
   5843             case 2:
   5844                dup = Iop_Dup32x2;
   5845                get = Iop_GetElem32x2;
   5846                index = mreg >> 4;
   5847                mreg &= 0xf;
   5848                break;
   5849             case 0:
   5850             case 3:
   5851                return False;
   5852             default:
   5853                vassert(0);
   5854          }
   5855          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5856       }
   5857       switch (size) {
   5858          case 0:
   5859          case 3:
   5860             return False;
   5861          case 1:
   5862             op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
   5863             op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
   5864             imm = 1LL << 15;
   5865             imm = (imm << 16) | imm;
   5866             imm = (imm << 32) | imm;
   5867             break;
   5868          case 2:
   5869             op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
   5870             op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
   5871             imm = 1LL << 31;
   5872             imm = (imm << 32) | imm;
   5873             break;
   5874          default:
   5875             vassert(0);
   5876       }
   5877       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   5878       setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   5879                        binop(op2, mkexpr(arg_n),
   5880                                   Q ? mkU128(imm) : mkU64(imm)),
   5881                        binop(op2, mkexpr(arg_m),
   5882                                   Q ? mkU128(imm) : mkU64(imm))),
   5883                  Q ? mkU128(0) : mkU64(0),
   5884                  Q, condT);
   5885       if (Q)
   5886          putQReg(dreg, mkexpr(res), condT);
   5887       else
   5888          putDRegI64(dreg, mkexpr(res), condT);
   5889       DIP("vqrdmulh.s%d %c%u, %c%u, d%u[%u]\n",
   5890           8 << size, Q ? 'q' : 'd', dreg,
   5891           Q ? 'q' : 'd', nreg, mreg, index);
   5892       return True;
   5893    }
   5894 
   5895    return False;
   5896 #  undef INSN
   5897 }
   5898 
   5899 /* A7.4.4 Two registers and a shift amount */
   5900 static
   5901 Bool dis_neon_data_2reg_and_shift ( UInt theInstr, IRTemp condT )
   5902 {
   5903    UInt A = (theInstr >> 8) & 0xf;
   5904    UInt B = (theInstr >> 6) & 1;
   5905    UInt L = (theInstr >> 7) & 1;
   5906    UInt U = (theInstr >> 24) & 1;
   5907    UInt Q = B;
   5908    UInt imm6 = (theInstr >> 16) & 0x3f;
   5909    UInt shift_imm;
   5910    UInt size = 4;
   5911    UInt tmp;
   5912    UInt mreg = get_neon_m_regno(theInstr);
   5913    UInt dreg = get_neon_d_regno(theInstr);
   5914    ULong imm = 0;
   5915    IROp op, cvt, add = Iop_INVALID, cvt2, op_rev;
   5916    IRTemp reg_m, res, mask;
   5917 
   5918    if (L == 0 && ((theInstr >> 19) & 7) == 0)
   5919       /* It is one reg and immediate */
   5920       return False;
   5921 
   5922    tmp = (L << 6) | imm6;
   5923    if (tmp & 0x40) {
   5924       size = 3;
   5925       shift_imm = 64 - imm6;
   5926    } else if (tmp & 0x20) {
   5927       size = 2;
   5928       shift_imm = 64 - imm6;
   5929    } else if (tmp & 0x10) {
   5930       size = 1;
   5931       shift_imm = 32 - imm6;
   5932    } else if (tmp & 0x8) {
   5933       size = 0;
   5934       shift_imm = 16 - imm6;
   5935    } else {
   5936       return False;
   5937    }
   5938 
   5939    switch (A) {
   5940       case 3:
   5941       case 2:
   5942          /* VRSHR, VRSRA */
   5943          if (shift_imm > 0) {
   5944             IRExpr *imm_val;
   5945             imm = 1L;
   5946             switch (size) {
   5947                case 0:
   5948                   imm = (imm << 8) | imm;
   5949                   /* fall through */
   5950                case 1:
   5951                   imm = (imm << 16) | imm;
   5952                   /* fall through */
   5953                case 2:
   5954                   imm = (imm << 32) | imm;
   5955                   /* fall through */
   5956                case 3:
   5957                   break;
   5958                default:
   5959                   vassert(0);
   5960             }
   5961             if (Q) {
   5962                reg_m = newTemp(Ity_V128);
   5963                res = newTemp(Ity_V128);
   5964                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
   5965                assign(reg_m, getQReg(mreg));
   5966                switch (size) {
   5967                   case 0:
   5968                      add = Iop_Add8x16;
   5969                      op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
   5970                      break;
   5971                   case 1:
   5972                      add = Iop_Add16x8;
   5973                      op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
   5974                      break;
   5975                   case 2:
   5976                      add = Iop_Add32x4;
   5977                      op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
   5978                      break;
   5979                   case 3:
   5980                      add = Iop_Add64x2;
   5981                      op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
   5982                      break;
   5983                   default:
   5984                      vassert(0);
   5985                }
   5986             } else {
   5987                reg_m = newTemp(Ity_I64);
   5988                res = newTemp(Ity_I64);
   5989                imm_val = mkU64(imm);
   5990                assign(reg_m, getDRegI64(mreg));
   5991                switch (size) {
   5992                   case 0:
   5993                      add = Iop_Add8x8;
   5994                      op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
   5995                      break;
   5996                   case 1:
   5997                      add = Iop_Add16x4;
   5998                      op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
   5999                      break;
   6000                   case 2:
   6001                      add = Iop_Add32x2;
   6002                      op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
   6003                      break;
   6004                   case 3:
   6005                      add = Iop_Add64;
   6006                      op = U ? Iop_Shr64 : Iop_Sar64;
   6007                      break;
   6008                   default:
   6009                      vassert(0);
   6010                }
   6011             }
   6012             assign(res,
   6013                    binop(add,
   6014                          binop(op,
   6015                                mkexpr(reg_m),
   6016                                mkU8(shift_imm)),
   6017                          binop(Q ? Iop_AndV128 : Iop_And64,
   6018                                binop(op,
   6019                                      mkexpr(reg_m),
   6020                                      mkU8(shift_imm - 1)),
   6021                                imm_val)));
   6022          } else {
   6023             if (Q) {
   6024                res = newTemp(Ity_V128);
   6025                assign(res, getQReg(mreg));
   6026             } else {
   6027                res = newTemp(Ity_I64);
   6028                assign(res, getDRegI64(mreg));
   6029             }
   6030          }
   6031          if (A == 3) {
   6032             if (Q) {
   6033                putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
   6034                              condT);
   6035             } else {
   6036                putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
   6037                                 condT);
   6038             }
   6039             DIP("vrsra.%c%d %c%u, %c%u, #%u\n",
   6040                 U ? 'u' : 's', 8 << size,
   6041                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6042          } else {
   6043             if (Q) {
   6044                putQReg(dreg, mkexpr(res), condT);
   6045             } else {
   6046                putDRegI64(dreg, mkexpr(res), condT);
   6047             }
   6048             DIP("vrshr.%c%d %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
   6049                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6050          }
   6051          return True;
   6052       case 1:
   6053       case 0:
   6054          /* VSHR, VSRA */
   6055          if (Q) {
   6056             reg_m = newTemp(Ity_V128);
   6057             assign(reg_m, getQReg(mreg));
   6058             res = newTemp(Ity_V128);
   6059          } else {
   6060             reg_m = newTemp(Ity_I64);
   6061             assign(reg_m, getDRegI64(mreg));
   6062             res = newTemp(Ity_I64);
   6063          }
   6064          if (Q) {
   6065             switch (size) {
   6066                case 0:
   6067                   op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
   6068                   add = Iop_Add8x16;
   6069                   break;
   6070                case 1:
   6071                   op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
   6072                   add = Iop_Add16x8;
   6073                   break;
   6074                case 2:
   6075                   op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
   6076                   add = Iop_Add32x4;
   6077                   break;
   6078                case 3:
   6079                   op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
   6080                   add = Iop_Add64x2;
   6081                   break;
   6082                default:
   6083                   vassert(0);
   6084             }
   6085          } else {
   6086             switch (size) {
   6087                case 0:
   6088                   op =  U ? Iop_ShrN8x8 : Iop_SarN8x8;
   6089                   add = Iop_Add8x8;
   6090                   break;
   6091                case 1:
   6092                   op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
   6093                   add = Iop_Add16x4;
   6094                   break;
   6095                case 2:
   6096                   op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
   6097                   add = Iop_Add32x2;
   6098                   break;
   6099                case 3:
   6100                   op = U ? Iop_Shr64 : Iop_Sar64;
   6101                   add = Iop_Add64;
   6102                   break;
   6103                default:
   6104                   vassert(0);
   6105             }
   6106          }
   6107          assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
   6108          if (A == 1) {
   6109             if (Q) {
   6110                putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
   6111                              condT);
   6112             } else {
   6113                putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
   6114                                 condT);
   6115             }
   6116             DIP("vsra.%c%d %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
   6117                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6118          } else {
   6119             if (Q) {
   6120                putQReg(dreg, mkexpr(res), condT);
   6121             } else {
   6122                putDRegI64(dreg, mkexpr(res), condT);
   6123             }
   6124             DIP("vshr.%c%d %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
   6125                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6126          }
   6127          return True;
   6128       case 4:
   6129          /* VSRI */
   6130          if (!U)
   6131             return False;
   6132          if (Q) {
   6133             res = newTemp(Ity_V128);
   6134             mask = newTemp(Ity_V128);
   6135          } else {
   6136             res = newTemp(Ity_I64);
   6137             mask = newTemp(Ity_I64);
   6138          }
   6139          switch (size) {
   6140             case 0: op = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; break;
   6141             case 1: op = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; break;
   6142             case 2: op = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; break;
   6143             case 3: op = Q ? Iop_ShrN64x2 : Iop_Shr64; break;
   6144             default: vassert(0);
   6145          }
   6146          if (Q) {
   6147             assign(mask, binop(op, binop(Iop_64HLtoV128,
   6148                                          mkU64(0xFFFFFFFFFFFFFFFFLL),
   6149                                          mkU64(0xFFFFFFFFFFFFFFFFLL)),
   6150                                mkU8(shift_imm)));
   6151             assign(res, binop(Iop_OrV128,
   6152                               binop(Iop_AndV128,
   6153                                     getQReg(dreg),
   6154                                     unop(Iop_NotV128,
   6155                                          mkexpr(mask))),
   6156                               binop(op,
   6157                                     getQReg(mreg),
   6158                                     mkU8(shift_imm))));
   6159             putQReg(dreg, mkexpr(res), condT);
   6160          } else {
   6161             assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
   6162                                mkU8(shift_imm)));
   6163             assign(res, binop(Iop_Or64,
   6164                               binop(Iop_And64,
   6165                                     getDRegI64(dreg),
   6166                                     unop(Iop_Not64,
   6167                                          mkexpr(mask))),
   6168                               binop(op,
   6169                                     getDRegI64(mreg),
   6170                                     mkU8(shift_imm))));
   6171             putDRegI64(dreg, mkexpr(res), condT);
   6172          }
   6173          DIP("vsri.%d %c%u, %c%u, #%u\n",
   6174              8 << size, Q ? 'q' : 'd', dreg,
   6175              Q ? 'q' : 'd', mreg, shift_imm);
   6176          return True;
   6177       case 5:
   6178          if (U) {
   6179             /* VSLI */
   6180             shift_imm = 8 * (1 << size) - shift_imm;
   6181             if (Q) {
   6182                res = newTemp(Ity_V128);
   6183                mask = newTemp(Ity_V128);
   6184             } else {
   6185                res = newTemp(Ity_I64);
   6186                mask = newTemp(Ity_I64);
   6187             }
   6188             switch (size) {
   6189                case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
   6190                case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
   6191                case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
   6192                case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
   6193                default: vassert(0);
   6194             }
   6195             if (Q) {
   6196                assign(mask, binop(op, binop(Iop_64HLtoV128,
   6197                                             mkU64(0xFFFFFFFFFFFFFFFFLL),
   6198                                             mkU64(0xFFFFFFFFFFFFFFFFLL)),
   6199                                   mkU8(shift_imm)));
   6200                assign(res, binop(Iop_OrV128,
   6201                                  binop(Iop_AndV128,
   6202                                        getQReg(dreg),
   6203                                        unop(Iop_NotV128,
   6204                                             mkexpr(mask))),
   6205                                  binop(op,
   6206                                        getQReg(mreg),
   6207                                        mkU8(shift_imm))));
   6208                putQReg(dreg, mkexpr(res), condT);
   6209             } else {
   6210                assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
   6211                                   mkU8(shift_imm)));
   6212                assign(res, binop(Iop_Or64,
   6213                                  binop(Iop_And64,
   6214                                        getDRegI64(dreg),
   6215                                        unop(Iop_Not64,
   6216                                             mkexpr(mask))),
   6217                                  binop(op,
   6218                                        getDRegI64(mreg),
   6219                                        mkU8(shift_imm))));
   6220                putDRegI64(dreg, mkexpr(res), condT);
   6221             }
   6222             DIP("vsli.%d %c%u, %c%u, #%u\n",
   6223                 8 << size, Q ? 'q' : 'd', dreg,
   6224                 Q ? 'q' : 'd', mreg, shift_imm);
   6225             return True;
   6226          } else {
   6227             /* VSHL #imm */
   6228             shift_imm = 8 * (1 << size) - shift_imm;
   6229             if (Q) {
   6230                res = newTemp(Ity_V128);
   6231             } else {
   6232                res = newTemp(Ity_I64);
   6233             }
   6234             switch (size) {
   6235                case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
   6236                case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
   6237                case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
   6238                case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
   6239                default: vassert(0);
   6240             }
   6241             assign(res, binop(op, Q ? getQReg(mreg) : getDRegI64(mreg),
   6242                      mkU8(shift_imm)));
   6243             if (Q) {
   6244                putQReg(dreg, mkexpr(res), condT);
   6245             } else {
   6246                putDRegI64(dreg, mkexpr(res), condT);
   6247             }
   6248             DIP("vshl.i%d %c%u, %c%u, #%u\n",
   6249                 8 << size, Q ? 'q' : 'd', dreg,
   6250                 Q ? 'q' : 'd', mreg, shift_imm);
   6251             return True;
   6252          }
   6253          break;
   6254       case 6:
   6255       case 7:
   6256          /* VQSHL, VQSHLU */
   6257          shift_imm = 8 * (1 << size) - shift_imm;
   6258          if (U) {
   6259             if (A & 1) {
   6260                switch (size) {
   6261                   case 0:
   6262                      op = Q ? Iop_QShlNsatUU8x16 : Iop_QShlNsatUU8x8;
   6263                      op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   6264                      break;
   6265                   case 1:
   6266                      op = Q ? Iop_QShlNsatUU16x8 : Iop_QShlNsatUU16x4;
   6267                      op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   6268                      break;
   6269                   case 2:
   6270                      op = Q ? Iop_QShlNsatUU32x4 : Iop_QShlNsatUU32x2;
   6271                      op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   6272                      break;
   6273                   case 3:
   6274                      op = Q ? Iop_QShlNsatUU64x2 : Iop_QShlNsatUU64x1;
   6275                      op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
   6276                      break;
   6277                   default:
   6278                      vassert(0);
   6279                }
   6280                DIP("vqshl.u%d %c%u, %c%u, #%u\n",
   6281                    8 << size,
   6282                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6283             } else {
   6284                switch (size) {
   6285                   case 0:
   6286                      op = Q ? Iop_QShlNsatSU8x16 : Iop_QShlNsatSU8x8;
   6287                      op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   6288                      break;
   6289                   case 1:
   6290                      op = Q ? Iop_QShlNsatSU16x8 : Iop_QShlNsatSU16x4;
   6291                      op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   6292                      break;
   6293                   case 2:
   6294                      op = Q ? Iop_QShlNsatSU32x4 : Iop_QShlNsatSU32x2;
   6295                      op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   6296                      break;
   6297                   case 3:
   6298                      op = Q ? Iop_QShlNsatSU64x2 : Iop_QShlNsatSU64x1;
   6299                      op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
   6300                      break;
   6301                   default:
   6302                      vassert(0);
   6303                }
   6304                DIP("vqshlu.s%d %c%u, %c%u, #%u\n",
   6305                    8 << size,
   6306                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6307             }
   6308          } else {
   6309             if (!(A & 1))
   6310                return False;
   6311             switch (size) {
   6312                case 0:
   6313                   op = Q ? Iop_QShlNsatSS8x16 : Iop_QShlNsatSS8x8;
   6314                   op_rev = Q ? Iop_SarN8x16 : Iop_SarN8x8;
   6315                   break;
   6316                case 1:
   6317                   op = Q ? Iop_QShlNsatSS16x8 : Iop_QShlNsatSS16x4;
   6318                   op_rev = Q ? Iop_SarN16x8 : Iop_SarN16x4;
   6319                   break;
   6320                case 2:
   6321                   op = Q ? Iop_QShlNsatSS32x4 : Iop_QShlNsatSS32x2;
   6322                   op_rev = Q ? Iop_SarN32x4 : Iop_SarN32x2;
   6323                   break;
   6324                case 3:
   6325                   op = Q ? Iop_QShlNsatSS64x2 : Iop_QShlNsatSS64x1;
   6326                   op_rev = Q ? Iop_SarN64x2 : Iop_Sar64;
   6327                   break;
   6328                default:
   6329                   vassert(0);
   6330             }
   6331             DIP("vqshl.s%d %c%u, %c%u, #%u\n",
   6332                 8 << size,
   6333                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6334          }
   6335          if (Q) {
   6336             tmp = newTemp(Ity_V128);
   6337             res = newTemp(Ity_V128);
   6338             reg_m = newTemp(Ity_V128);
   6339             assign(reg_m, getQReg(mreg));
   6340          } else {
   6341             tmp = newTemp(Ity_I64);
   6342             res = newTemp(Ity_I64);
   6343             reg_m = newTemp(Ity_I64);
   6344             assign(reg_m, getDRegI64(mreg));
   6345          }
   6346          assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
   6347          assign(tmp, binop(op_rev, mkexpr(res), mkU8(shift_imm)));
   6348          setFlag_QC(mkexpr(tmp), mkexpr(reg_m), Q, condT);
   6349          if (Q)
   6350             putQReg(dreg, mkexpr(res), condT);
   6351          else
   6352             putDRegI64(dreg, mkexpr(res), condT);
   6353          return True;
   6354       case 8:
   6355          if (!U) {
   6356             if (L == 1)
   6357                return False;
   6358             size++;
   6359             dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
   6360             mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
   6361             if (mreg & 1)
   6362                return False;
   6363             mreg >>= 1;
   6364             if (!B) {
   6365                /* VSHRN*/
   6366                IROp narOp;
   6367                reg_m = newTemp(Ity_V128);
   6368                assign(reg_m, getQReg(mreg));
   6369                res = newTemp(Ity_I64);
   6370                switch (size) {
   6371                   case 1:
   6372                      op = Iop_ShrN16x8;
   6373                      narOp = Iop_NarrowUn16to8x8;
   6374                      break;
   6375                   case 2:
   6376                      op = Iop_ShrN32x4;
   6377                      narOp = Iop_NarrowUn32to16x4;
   6378                      break;
   6379                   case 3:
   6380                      op = Iop_ShrN64x2;
   6381                      narOp = Iop_NarrowUn64to32x2;
   6382                      break;
   6383                   default:
   6384                      vassert(0);
   6385                }
   6386                assign(res, unop(narOp,
   6387                                 binop(op,
   6388                                       mkexpr(reg_m),
   6389                                       mkU8(shift_imm))));
   6390                putDRegI64(dreg, mkexpr(res), condT);
   6391                DIP("vshrn.i%d d%u, q%u, #%u\n", 8 << size, dreg, mreg,
   6392                    shift_imm);
   6393                return True;
   6394             } else {
   6395                /* VRSHRN   */
   6396                IROp addOp, shOp, narOp;
   6397                IRExpr *imm_val;
   6398                reg_m = newTemp(Ity_V128);
   6399                assign(reg_m, getQReg(mreg));
   6400                res = newTemp(Ity_I64);
   6401                imm = 1L;
   6402                switch (size) {
   6403                   case 0: imm = (imm <<  8) | imm; /* fall through */
   6404                   case 1: imm = (imm << 16) | imm; /* fall through */
   6405                   case 2: imm = (imm << 32) | imm; /* fall through */
   6406                   case 3: break;
   6407                   default: vassert(0);
   6408                }
   6409                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
   6410                switch (size) {
   6411                   case 1:
   6412                      addOp = Iop_Add16x8;
   6413                      shOp = Iop_ShrN16x8;
   6414                      narOp = Iop_NarrowUn16to8x8;
   6415                      break;
   6416                   case 2:
   6417                      addOp = Iop_Add32x4;
   6418                      shOp = Iop_ShrN32x4;
   6419                      narOp = Iop_NarrowUn32to16x4;
   6420                      break;
   6421                   case 3:
   6422                      addOp = Iop_Add64x2;
   6423                      shOp = Iop_ShrN64x2;
   6424                      narOp = Iop_NarrowUn64to32x2;
   6425                      break;
   6426                   default:
   6427                      vassert(0);
   6428                }
   6429                assign(res, unop(narOp,
   6430                                 binop(addOp,
   6431                                       binop(shOp,
   6432                                             mkexpr(reg_m),
   6433                                             mkU8(shift_imm)),
   6434                                       binop(Iop_AndV128,
   6435                                             binop(shOp,
   6436                                                   mkexpr(reg_m),
   6437                                                   mkU8(shift_imm - 1)),
   6438                                             imm_val))));
   6439                putDRegI64(dreg, mkexpr(res), condT);
   6440                if (shift_imm == 0) {
   6441                   DIP("vmov%d d%u, q%u, #%u\n", 8 << size, dreg, mreg,
   6442                       shift_imm);
   6443                } else {
   6444                   DIP("vrshrn.i%d d%u, q%u, #%u\n", 8 << size, dreg, mreg,
   6445                       shift_imm);
   6446                }
   6447                return True;
   6448             }
   6449          } else {
   6450             /* fall through */
   6451          }
   6452       case 9:
   6453          dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
   6454          mreg = ((theInstr >>  1) & 0x10) | (theInstr & 0xF);
   6455          if (mreg & 1)
   6456             return False;
   6457          mreg >>= 1;
   6458          size++;
   6459          if ((theInstr >> 8) & 1) {
   6460             switch (size) {
   6461                case 1:
   6462                   op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
   6463                   cvt = U ? Iop_QNarrowUn16Uto8Ux8 : Iop_QNarrowUn16Sto8Sx8;
   6464                   cvt2 = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
   6465                   break;
   6466                case 2:
   6467                   op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
   6468                   cvt = U ? Iop_QNarrowUn32Uto16Ux4 : Iop_QNarrowUn32Sto16Sx4;
   6469                   cvt2 = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
   6470                   break;
   6471                case 3:
   6472                   op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
   6473                   cvt = U ? Iop_QNarrowUn64Uto32Ux2 : Iop_QNarrowUn64Sto32Sx2;
   6474                   cvt2 = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
   6475                   break;
   6476                default:
   6477                   vassert(0);
   6478             }
   6479             DIP("vq%sshrn.%c%d d%u, q%u, #%u\n", B ? "r" : "",
   6480                 U ? 'u' : 's', 8 << size, dreg, mreg, shift_imm);
   6481          } else {
   6482             vassert(U);
   6483             switch (size) {
   6484                case 1:
   6485                   op = Iop_SarN16x8;
   6486                   cvt = Iop_QNarrowUn16Sto8Ux8;
   6487                   cvt2 = Iop_Widen8Uto16x8;
   6488                   break;
   6489                case 2:
   6490                   op = Iop_SarN32x4;
   6491                   cvt = Iop_QNarrowUn32Sto16Ux4;
   6492                   cvt2 = Iop_Widen16Uto32x4;
   6493                   break;
   6494                case 3:
   6495                   op = Iop_SarN64x2;
   6496                   cvt = Iop_QNarrowUn64Sto32Ux2;
   6497                   cvt2 = Iop_Widen32Uto64x2;
   6498                   break;
   6499                default:
   6500                   vassert(0);
   6501             }
   6502             DIP("vq%sshrun.s%d d%u, q%u, #%u\n", B ? "r" : "",
   6503                 8 << size, dreg, mreg, shift_imm);
   6504          }
   6505          if (B) {
   6506             if (shift_imm > 0) {
   6507                imm = 1;
   6508                switch (size) {
   6509                   case 1: imm = (imm << 16) | imm; /* fall through */
   6510                   case 2: imm = (imm << 32) | imm; /* fall through */
   6511                   case 3: break;
   6512                   case 0: default: vassert(0);
   6513                }
   6514                switch (size) {
   6515                   case 1: add = Iop_Add16x8; break;
   6516                   case 2: add = Iop_Add32x4; break;
   6517                   case 3: add = Iop_Add64x2; break;
   6518                   case 0: default: vassert(0);
   6519                }
   6520             }
   6521          }
   6522          reg_m = newTemp(Ity_V128);
   6523          res = newTemp(Ity_V128);
   6524          assign(reg_m, getQReg(mreg));
   6525          if (B) {
   6526             /* VQRSHRN, VQRSHRUN */
   6527             assign(res, binop(add,
   6528                               binop(op, mkexpr(reg_m), mkU8(shift_imm)),
   6529                               binop(Iop_AndV128,
   6530                                     binop(op,
   6531                                           mkexpr(reg_m),
   6532                                           mkU8(shift_imm - 1)),
   6533                                     mkU128(imm))));
   6534          } else {
   6535             /* VQSHRN, VQSHRUN */
   6536             assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
   6537          }
   6538          setFlag_QC(unop(cvt2, unop(cvt, mkexpr(res))), mkexpr(res),
   6539                     True, condT);
   6540          putDRegI64(dreg, unop(cvt, mkexpr(res)), condT);
   6541          return True;
   6542       case 10:
   6543          /* VSHLL
   6544             VMOVL ::= VSHLL #0 */
   6545          if (B)
   6546             return False;
   6547          if (dreg & 1)
   6548             return False;
   6549          dreg >>= 1;
   6550          shift_imm = (8 << size) - shift_imm;
   6551          res = newTemp(Ity_V128);
   6552          switch (size) {
   6553             case 0:
   6554                op = Iop_ShlN16x8;
   6555                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
   6556                break;
   6557             case 1:
   6558                op = Iop_ShlN32x4;
   6559                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
   6560                break;
   6561             case 2:
   6562                op = Iop_ShlN64x2;
   6563                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
   6564                break;
   6565             case 3:
   6566                return False;
   6567             default:
   6568                vassert(0);
   6569          }
   6570          assign(res, binop(op, unop(cvt, getDRegI64(mreg)), mkU8(shift_imm)));
   6571          putQReg(dreg, mkexpr(res), condT);
   6572          if (shift_imm == 0) {
   6573             DIP("vmovl.%c%d q%u, d%u\n", U ? 'u' : 's', 8 << size,
   6574                 dreg, mreg);
   6575          } else {
   6576             DIP("vshll.%c%d q%u, d%u, #%u\n", U ? 'u' : 's', 8 << size,
   6577                 dreg, mreg, shift_imm);
   6578          }
   6579          return True;
   6580       case 14:
   6581       case 15:
   6582          /* VCVT floating-point <-> fixed-point */
   6583          if ((theInstr >> 8) & 1) {
   6584             if (U) {
   6585                op = Q ? Iop_F32ToFixed32Ux4_RZ : Iop_F32ToFixed32Ux2_RZ;
   6586             } else {
   6587                op = Q ? Iop_F32ToFixed32Sx4_RZ : Iop_F32ToFixed32Sx2_RZ;
   6588             }
   6589             DIP("vcvt.%c32.f32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
   6590                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
   6591                 64 - ((theInstr >> 16) & 0x3f));
   6592          } else {
   6593             if (U) {
   6594                op = Q ? Iop_Fixed32UToF32x4_RN : Iop_Fixed32UToF32x2_RN;
   6595             } else {
   6596                op = Q ? Iop_Fixed32SToF32x4_RN : Iop_Fixed32SToF32x2_RN;
   6597             }
   6598             DIP("vcvt.f32.%c32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
   6599                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
   6600                 64 - ((theInstr >> 16) & 0x3f));
   6601          }
   6602          if (((theInstr >> 21) & 1) == 0)
   6603             return False;
   6604          if (Q) {
   6605             putQReg(dreg, binop(op, getQReg(mreg),
   6606                      mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
   6607          } else {
   6608             putDRegI64(dreg, binop(op, getDRegI64(mreg),
   6609                        mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
   6610          }
   6611          return True;
   6612       default:
   6613          return False;
   6614 
   6615    }
   6616    return False;
   6617 }
   6618 
   6619 /* A7.4.5 Two registers, miscellaneous */
   6620 static
   6621 Bool dis_neon_data_2reg_misc ( UInt theInstr, IRTemp condT )
   6622 {
   6623    UInt A = (theInstr >> 16) & 3;
   6624    UInt B = (theInstr >> 6) & 0x1f;
   6625    UInt Q = (theInstr >> 6) & 1;
   6626    UInt U = (theInstr >> 24) & 1;
   6627    UInt size = (theInstr >> 18) & 3;
   6628    UInt dreg = get_neon_d_regno(theInstr);
   6629    UInt mreg = get_neon_m_regno(theInstr);
   6630    UInt F = (theInstr >> 10) & 1;
   6631    IRTemp arg_d = IRTemp_INVALID;
   6632    IRTemp arg_m = IRTemp_INVALID;
   6633    IRTemp res = IRTemp_INVALID;
   6634    switch (A) {
   6635       case 0:
   6636          if (Q) {
   6637             arg_m = newTemp(Ity_V128);
   6638             res = newTemp(Ity_V128);
   6639             assign(arg_m, getQReg(mreg));
   6640          } else {
   6641             arg_m = newTemp(Ity_I64);
   6642             res = newTemp(Ity_I64);
   6643             assign(arg_m, getDRegI64(mreg));
   6644          }
   6645          switch (B >> 1) {
   6646             case 0: {
   6647                /* VREV64 */
   6648                IROp op;
   6649                switch (size) {
   6650                   case 0:
   6651                      op = Q ? Iop_Reverse8sIn64_x2 : Iop_Reverse8sIn64_x1;
   6652                      break;
   6653                   case 1:
   6654                      op = Q ? Iop_Reverse16sIn64_x2 : Iop_Reverse16sIn64_x1;
   6655                      break;
   6656                   case 2:
   6657                      op = Q ? Iop_Reverse32sIn64_x2 : Iop_Reverse32sIn64_x1;
   6658                      break;
   6659                   case 3:
   6660                      return False;
   6661                   default:
   6662                      vassert(0);
   6663                }
   6664                assign(res, unop(op, mkexpr(arg_m)));
   6665                DIP("vrev64.%d %c%u, %c%u\n", 8 << size,
   6666                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   6667                break;
   6668             }
   6669             case 1: {
   6670                /* VREV32 */
   6671                IROp op;
   6672                switch (size) {
   6673                   case 0:
   6674                      op = Q ? Iop_Reverse8sIn32_x4 : Iop_Reverse8sIn32_x2;
   6675                      break;
   6676                   case 1:
   6677                      op = Q ? Iop_Reverse16sIn32_x4 : Iop_Reverse16sIn32_x2;
   6678                      break;
   6679                   case 2:
   6680                   case 3:
   6681                      return False;
   6682                   default:
   6683                      vassert(0);
   6684                }
   6685                assign(res, unop(op, mkexpr(arg_m)));
   6686                DIP("vrev32.%d %c%u, %c%u\n", 8 << size,
   6687                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   6688                break;
   6689             }
   6690             case 2: {
   6691                /* VREV16 */
   6692                IROp op;
   6693                switch (size) {
   6694                   case 0:
   6695                      op = Q ? Iop_Reverse8sIn16_x8 : Iop_Reverse8sIn16_x4;
   6696                      break;
   6697                   case 1:
   6698                   case 2:
   6699                   case 3:
   6700                      return False;
   6701                   default:
   6702                      vassert(0);
   6703                }
   6704                assign(res, unop(op, mkexpr(arg_m)));
   6705                DIP("vrev16.%d %c%u, %c%u\n", 8 << size,
   6706                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   6707                break;
   6708             }
   6709             case 3:
   6710                return False;
   6711             case 4:
   6712             case 5: {
   6713                /* VPADDL */
   6714                IROp op;
   6715                U = (theInstr >> 7) & 1;
   6716                if (Q) {
   6717                   switch (size) {
   6718                      case 0: op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16; break;
   6719                      case 1: op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8; break;
   6720                      case 2: op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4; break;
   6721                      case 3: return False;
   6722                      default: vassert(0);
   6723                   }
   6724                } else {
   6725                   switch (size) {
   6726                      case 0: op = U ? Iop_PwAddL8Ux8  : Iop_PwAddL8Sx8;  break;
   6727                      case 1: op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4; break;
   6728                      case 2: op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2; break;
   6729                      case 3: return False;
   6730                      default: vassert(0);
   6731                   }
   6732                }
   6733                assign(res, unop(op, mkexpr(arg_m)));
   6734                DIP("vpaddl.%c%d %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
   6735                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   6736                break;
   6737             }
   6738             case 6:
   6739             case 7:
   6740                return False;
   6741             case 8: {
   6742                /* VCLS */
   6743                IROp op;
   6744                switch (size) {
   6745                   case 0: op = Q ? Iop_Cls8x16 : Iop_Cls8x8; break;
   6746                   case 1: op = Q ? Iop_Cls16x8 : Iop_Cls16x4; break;
   6747                   case 2: op = Q ? Iop_Cls32x4 : Iop_Cls32x2; break;
   6748                   case 3: return False;
   6749                   default: vassert(0);
   6750                }
   6751                assign(res, unop(op, mkexpr(arg_m)));
   6752                DIP("vcls.s%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
   6753                    Q ? 'q' : 'd', mreg);
   6754                break;
   6755             }
   6756             case 9: {
   6757                /* VCLZ */
   6758                IROp op;
   6759                switch (size) {
   6760                   case 0: op = Q ? Iop_Clz8x16 : Iop_Clz8x8; break;
   6761                   case 1: op = Q ? Iop_Clz16x8 : Iop_Clz16x4; break;
   6762                   case 2: op = Q ? Iop_Clz32x4 : Iop_Clz32x2; break;
   6763                   case 3: return False;
   6764                   default: vassert(0);
   6765                }
   6766                assign(res, unop(op, mkexpr(arg_m)));
   6767                DIP("vclz.i%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
   6768                    Q ? 'q' : 'd', mreg);
   6769                break;
   6770             }
   6771             case 10:
   6772                /* VCNT */
   6773                assign(res, unop(Q ? Iop_Cnt8x16 : Iop_Cnt8x8, mkexpr(arg_m)));
   6774                DIP("vcnt.8 %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
   6775                    mreg);
   6776                break;
   6777             case 11:
   6778                /* VMVN */
   6779                if (Q)
   6780                   assign(res, unop(Iop_NotV128, mkexpr(arg_m)));
   6781                else
   6782                   assign(res, unop(Iop_Not64, mkexpr(arg_m)));
   6783                DIP("vmvn %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
   6784                    mreg);
   6785                break;
   6786             case 12:
   6787             case 13: {
   6788                /* VPADAL */
   6789                IROp op, add_op;
   6790                U = (theInstr >> 7) & 1;
   6791                if (Q) {
   6792                   switch (size) {
   6793                      case 0:
   6794                         op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16;
   6795                         add_op = Iop_Add16x8;
   6796                         break;
   6797                      case 1:
   6798                         op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8;
   6799                         add_op = Iop_Add32x4;
   6800                         break;
   6801                      case 2:
   6802                         op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4;
   6803                         add_op = Iop_Add64x2;
   6804                         break;
   6805                      case 3:
   6806                         return False;
   6807                      default:
   6808                         vassert(0);
   6809                   }
   6810                } else {
   6811                   switch (size) {
   6812                      case 0:
   6813                         op = U ? Iop_PwAddL8Ux8 : Iop_PwAddL8Sx8;
   6814                         add_op = Iop_Add16x4;
   6815                         break;
   6816                      case 1:
   6817                         op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4;
   6818                         add_op = Iop_Add32x2;
   6819                         break;
   6820                      case 2:
   6821                         op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2;
   6822                         add_op = Iop_Add64;
   6823                         break;
   6824                      case 3:
   6825                         return False;
   6826                      default:
   6827                         vassert(0);
   6828                   }
   6829                }
   6830                if (Q) {
   6831                   arg_d = newTemp(Ity_V128);
   6832                   assign(arg_d, getQReg(dreg));
   6833                } else {
   6834                   arg_d = newTemp(Ity_I64);
   6835                   assign(arg_d, getDRegI64(dreg));
   6836                }
   6837                assign(res, binop(add_op, unop(op, mkexpr(arg_m)),
   6838                                          mkexpr(arg_d)));
   6839                DIP("vpadal.%c%d %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
   6840                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   6841                break;
   6842             }
   6843             case 14: {
   6844                /* VQABS */
   6845                IROp op_sub, op_qsub, op_cmp;
   6846                IRTemp mask, tmp;
   6847                IRExpr *zero1, *zero2;
   6848                IRExpr *neg, *neg2;
   6849                if (Q) {
   6850                   zero1 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   6851                   zero2 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   6852                   mask = newTemp(Ity_V128);
   6853                   tmp = newTemp(Ity_V128);
   6854                } else {
   6855                   zero1 = mkU64(0);
   6856                   zero2 = mkU64(0);
   6857                   mask = newTemp(Ity_I64);
   6858                   tmp = newTemp(Ity_I64);
   6859                }
   6860                switch (size) {
   6861                   case 0:
   6862                      op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   6863                      op_qsub = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
   6864                      op_cmp = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
   6865                      break;
   6866                   case 1:
   6867                      op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   6868                      op_qsub = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
   6869                      op_cmp = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4;
   6870                      break;
   6871                   case 2:
   6872                      op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   6873                      op_qsub = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
   6874                      op_cmp = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2;
   6875                      break;
   6876                   case 3:
   6877                      return False;
   6878                   default:
   6879                      vassert(0);
   6880                }
   6881                assign(mask, binop(op_cmp, mkexpr(arg_m), zero1));
   6882                neg = binop(op_qsub, zero2, mkexpr(arg_m));
   6883                neg2 = binop(op_sub, zero2, mkexpr(arg_m));
   6884                assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
   6885                                  binop(Q ? Iop_AndV128 : Iop_And64,
   6886                                        mkexpr(mask),
   6887                                        mkexpr(arg_m)),
   6888                                  binop(Q ? Iop_AndV128 : Iop_And64,
   6889                                        unop(Q ? Iop_NotV128 : Iop_Not64,
   6890                                             mkexpr(mask)),
   6891                                        neg)));
   6892                assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
   6893                                  binop(Q ? Iop_AndV128 : Iop_And64,
   6894                                        mkexpr(mask),
   6895                                        mkexpr(arg_m)),
   6896                                  binop(Q ? Iop_AndV128 : Iop_And64,
   6897                                        unop(Q ? Iop_NotV128 : Iop_Not64,
   6898                                             mkexpr(mask)),
   6899                                        neg2)));
   6900                setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
   6901                DIP("vqabs.s%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
   6902                    Q ? 'q' : 'd', mreg);
   6903                break;
   6904             }
   6905             case 15: {
   6906                /* VQNEG */
   6907                IROp op, op2;
   6908                IRExpr *zero;
   6909                if (Q) {
   6910                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   6911                } else {
   6912                   zero = mkU64(0);
   6913                }
   6914                switch (size) {
   6915                   case 0:
   6916                      op = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
   6917                      op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   6918                      break;
   6919                   case 1:
   6920                      op = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
   6921                      op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   6922                      break;
   6923                   case 2:
   6924                      op = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
   6925                      op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   6926                      break;
   6927                   case 3:
   6928                      return False;
   6929                   default:
   6930                      vassert(0);
   6931                }
   6932                assign(res, binop(op, zero, mkexpr(arg_m)));
   6933                setFlag_QC(mkexpr(res), binop(op2, zero, mkexpr(arg_m)),
   6934                           Q, condT);
   6935                DIP("vqneg.s%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
   6936                    Q ? 'q' : 'd', mreg);
   6937                break;
   6938             }
   6939             default:
   6940                vassert(0);
   6941          }
   6942          if (Q) {
   6943             putQReg(dreg, mkexpr(res), condT);
   6944          } else {
   6945             putDRegI64(dreg, mkexpr(res), condT);
   6946          }
   6947          return True;
   6948       case 1:
   6949          if (Q) {
   6950             arg_m = newTemp(Ity_V128);
   6951             res = newTemp(Ity_V128);
   6952             assign(arg_m, getQReg(mreg));
   6953          } else {
   6954             arg_m = newTemp(Ity_I64);
   6955             res = newTemp(Ity_I64);
   6956             assign(arg_m, getDRegI64(mreg));
   6957          }
   6958          switch ((B >> 1) & 0x7) {
   6959             case 0: {
   6960                /* VCGT #0 */
   6961                IRExpr *zero;
   6962                IROp op;
   6963                if (Q) {
   6964                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   6965                } else {
   6966                   zero = mkU64(0);
   6967                }
   6968                if (F) {
   6969                   switch (size) {
   6970                      case 0: case 1: case 3: return False;
   6971                      case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
   6972                      default: vassert(0);
   6973                   }
   6974                } else {
   6975                   switch (size) {
   6976                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
   6977                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
   6978                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
   6979                      case 3: return False;
   6980                      default: vassert(0);
   6981                   }
   6982                }
   6983                assign(res, binop(op, mkexpr(arg_m), zero));
   6984                DIP("vcgt.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
   6985                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   6986                break;
   6987             }
   6988             case 1: {
   6989                /* VCGE #0 */
   6990                IROp op;
   6991                IRExpr *zero;
   6992                if (Q) {
   6993                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   6994                } else {
   6995                   zero = mkU64(0);
   6996                }
   6997                if (F) {
   6998                   switch (size) {
   6999                      case 0: case 1: case 3: return False;
   7000                      case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
   7001                      default: vassert(0);
   7002                   }
   7003                   assign(res, binop(op, mkexpr(arg_m), zero));
   7004                } else {
   7005                   switch (size) {
   7006                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
   7007                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
   7008                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
   7009                      case 3: return False;
   7010                      default: vassert(0);
   7011                   }
   7012                   assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
   7013                                    binop(op, zero, mkexpr(arg_m))));
   7014                }
   7015                DIP("vcge.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
   7016                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7017                break;
   7018             }
   7019             case 2: {
   7020                /* VCEQ #0 */
   7021                IROp op;
   7022                IRExpr *zero;
   7023                if (F) {
   7024                   if (Q) {
   7025                      zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   7026                   } else {
   7027                      zero = mkU64(0);
   7028                   }
   7029                   switch (size) {
   7030                      case 0: case 1: case 3: return False;
   7031                      case 2: op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2; break;
   7032                      default: vassert(0);
   7033                   }
   7034                   assign(res, binop(op, zero, mkexpr(arg_m)));
   7035                } else {
   7036                   switch (size) {
   7037                      case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
   7038                      case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
   7039                      case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
   7040                      case 3: return False;
   7041                      default: vassert(0);
   7042                   }
   7043                   assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
   7044                                    unop(op, mkexpr(arg_m))));
   7045                }
   7046                DIP("vceq.%c%d %c%u, %c%u, #0\n", F ? 'f' : 'i', 8 << size,
   7047                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7048                break;
   7049             }
   7050             case 3: {
   7051                /* VCLE #0 */
   7052                IRExpr *zero;
   7053                IROp op;
   7054                if (Q) {
   7055                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   7056                } else {
   7057                   zero = mkU64(0);
   7058                }
   7059                if (F) {
   7060                   switch (size) {
   7061                      case 0: case 1: case 3: return False;
   7062                      case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
   7063                      default: vassert(0);
   7064                   }
   7065                   assign(res, binop(op, zero, mkexpr(arg_m)));
   7066                } else {
   7067                   switch (size) {
   7068                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
   7069                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
   7070                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
   7071                      case 3: return False;
   7072                      default: vassert(0);
   7073                   }
   7074                   assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
   7075                                    binop(op, mkexpr(arg_m), zero)));
   7076                }
   7077                DIP("vcle.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
   7078                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7079                break;
   7080             }
   7081             case 4: {
   7082                /* VCLT #0 */
   7083                IROp op;
   7084                IRExpr *zero;
   7085                if (Q) {
   7086                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   7087                } else {
   7088                   zero = mkU64(0);
   7089                }
   7090                if (F) {
   7091                   switch (size) {
   7092                      case 0: case 1: case 3: return False;
   7093                      case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
   7094                      default: vassert(0);
   7095                   }
   7096                   assign(res, binop(op, zero, mkexpr(arg_m)));
   7097                } else {
   7098                   switch (size) {
   7099                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
   7100                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
   7101                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
   7102                      case 3: return False;
   7103                      default: vassert(0);
   7104                   }
   7105                   assign(res, binop(op, zero, mkexpr(arg_m)));
   7106                }
   7107                DIP("vclt.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
   7108                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7109                break;
   7110             }
   7111             case 5:
   7112                return False;
   7113             case 6: {
   7114                /* VABS */
   7115                if (!F) {
   7116                   IROp op;
   7117                   switch(size) {
   7118                      case 0: op = Q ? Iop_Abs8x16 : Iop_Abs8x8; break;
   7119                      case 1: op = Q ? Iop_Abs16x8 : Iop_Abs16x4; break;
   7120                      case 2: op = Q ? Iop_Abs32x4 : Iop_Abs32x2; break;
   7121                      case 3: return False;
   7122                      default: vassert(0);
   7123                   }
   7124                   assign(res, unop(op, mkexpr(arg_m)));
   7125                } else {
   7126                   assign(res, unop(Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2,
   7127                                    mkexpr(arg_m)));
   7128                }
   7129                DIP("vabs.%c%d %c%u, %c%u\n",
   7130                    F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
   7131                    Q ? 'q' : 'd', mreg);
   7132                break;
   7133             }
   7134             case 7: {
   7135                /* VNEG */
   7136                IROp op;
   7137                IRExpr *zero;
   7138                if (F) {
   7139                   switch (size) {
   7140                      case 0: case 1: case 3: return False;
   7141                      case 2: op = Q ? Iop_Neg32Fx4 : Iop_Neg32Fx2; break;
   7142                      default: vassert(0);
   7143                   }
   7144                   assign(res, unop(op, mkexpr(arg_m)));
   7145                } else {
   7146                   if (Q) {
   7147                      zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   7148                   } else {
   7149                      zero = mkU64(0);
   7150                   }
   7151                   switch (size) {
   7152                      case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
   7153                      case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
   7154                      case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
   7155                      case 3: return False;
   7156                      default: vassert(0);
   7157                   }
   7158                   assign(res, binop(op, zero, mkexpr(arg_m)));
   7159                }
   7160                DIP("vneg.%c%d %c%u, %c%u\n",
   7161                    F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
   7162                    Q ? 'q' : 'd', mreg);
   7163                break;
   7164             }
   7165             default:
   7166                vassert(0);
   7167          }
   7168          if (Q) {
   7169             putQReg(dreg, mkexpr(res), condT);
   7170          } else {
   7171             putDRegI64(dreg, mkexpr(res), condT);
   7172          }
   7173          return True;
   7174       case 2:
   7175          if ((B >> 1) == 0) {
   7176             /* VSWP */
   7177             if (Q) {
   7178                arg_m = newTemp(Ity_V128);
   7179                assign(arg_m, getQReg(mreg));
   7180                putQReg(mreg, getQReg(dreg), condT);
   7181                putQReg(dreg, mkexpr(arg_m), condT);
   7182             } else {
   7183                arg_m = newTemp(Ity_I64);
   7184                assign(arg_m, getDRegI64(mreg));
   7185                putDRegI64(mreg, getDRegI64(dreg), condT);
   7186                putDRegI64(dreg, mkexpr(arg_m), condT);
   7187             }
   7188             DIP("vswp %c%u, %c%u\n",
   7189                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7190             return True;
   7191          } else if ((B >> 1) == 1) {
   7192             /* VTRN */
   7193             IROp op_odd = Iop_INVALID, op_even = Iop_INVALID;
   7194             IRTemp old_m, old_d, new_d, new_m;
   7195             if (Q) {
   7196                old_m = newTemp(Ity_V128);
   7197                old_d = newTemp(Ity_V128);
   7198                new_m = newTemp(Ity_V128);
   7199                new_d = newTemp(Ity_V128);
   7200                assign(old_m, getQReg(mreg));
   7201                assign(old_d, getQReg(dreg));
   7202             } else {
   7203                old_m = newTemp(Ity_I64);
   7204                old_d = newTemp(Ity_I64);
   7205                new_m = newTemp(Ity_I64);
   7206                new_d = newTemp(Ity_I64);
   7207                assign(old_m, getDRegI64(mreg));
   7208                assign(old_d, getDRegI64(dreg));
   7209             }
   7210             if (Q) {
   7211                switch (size) {
   7212                   case 0:
   7213                      op_odd  = Iop_InterleaveOddLanes8x16;
   7214                      op_even = Iop_InterleaveEvenLanes8x16;
   7215                      break;
   7216                   case 1:
   7217                      op_odd  = Iop_InterleaveOddLanes16x8;
   7218                      op_even = Iop_InterleaveEvenLanes16x8;
   7219                      break;
   7220                   case 2:
   7221                      op_odd  = Iop_InterleaveOddLanes32x4;
   7222                      op_even = Iop_InterleaveEvenLanes32x4;
   7223                      break;
   7224                   case 3:
   7225                      return False;
   7226                   default:
   7227                      vassert(0);
   7228                }
   7229             } else {
   7230                switch (size) {
   7231                   case 0:
   7232                      op_odd  = Iop_InterleaveOddLanes8x8;
   7233                      op_even = Iop_InterleaveEvenLanes8x8;
   7234                      break;
   7235                   case 1:
   7236                      op_odd  = Iop_InterleaveOddLanes16x4;
   7237                      op_even = Iop_InterleaveEvenLanes16x4;
   7238                      break;
   7239                   case 2:
   7240                      op_odd  = Iop_InterleaveHI32x2;
   7241                      op_even = Iop_InterleaveLO32x2;
   7242                      break;
   7243                   case 3:
   7244                      return False;
   7245                   default:
   7246                      vassert(0);
   7247                }
   7248             }
   7249             assign(new_d, binop(op_even, mkexpr(old_m), mkexpr(old_d)));
   7250             assign(new_m, binop(op_odd, mkexpr(old_m), mkexpr(old_d)));
   7251             if (Q) {
   7252                putQReg(dreg, mkexpr(new_d), condT);
   7253                putQReg(mreg, mkexpr(new_m), condT);
   7254             } else {
   7255                putDRegI64(dreg, mkexpr(new_d), condT);
   7256                putDRegI64(mreg, mkexpr(new_m), condT);
   7257             }
   7258             DIP("vtrn.%d %c%u, %c%u\n",
   7259                 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7260             return True;
   7261          } else if ((B >> 1) == 2) {
   7262             /* VUZP */
   7263             IROp op_even, op_odd;
   7264             IRTemp old_m, old_d, new_m, new_d;
   7265             if (!Q && size == 2)
   7266                return False;
   7267             if (Q) {
   7268                old_m = newTemp(Ity_V128);
   7269                old_d = newTemp(Ity_V128);
   7270                new_m = newTemp(Ity_V128);
   7271                new_d = newTemp(Ity_V128);
   7272                assign(old_m, getQReg(mreg));
   7273                assign(old_d, getQReg(dreg));
   7274             } else {
   7275                old_m = newTemp(Ity_I64);
   7276                old_d = newTemp(Ity_I64);
   7277                new_m = newTemp(Ity_I64);
   7278                new_d = newTemp(Ity_I64);
   7279                assign(old_m, getDRegI64(mreg));
   7280                assign(old_d, getDRegI64(dreg));
   7281             }
   7282             switch (size) {
   7283                case 0:
   7284                   op_odd  = Q ? Iop_CatOddLanes8x16 : Iop_CatOddLanes8x8;
   7285                   op_even = Q ? Iop_CatEvenLanes8x16 : Iop_CatEvenLanes8x8;
   7286                   break;
   7287                case 1:
   7288                   op_odd  = Q ? Iop_CatOddLanes16x8 : Iop_CatOddLanes16x4;
   7289                   op_even = Q ? Iop_CatEvenLanes16x8 : Iop_CatEvenLanes16x4;
   7290                   break;
   7291                case 2:
   7292                   op_odd  = Iop_CatOddLanes32x4;
   7293                   op_even = Iop_CatEvenLanes32x4;
   7294                   break;
   7295                case 3:
   7296                   return False;
   7297                default:
   7298                   vassert(0);
   7299             }
   7300             assign(new_d, binop(op_even, mkexpr(old_m), mkexpr(old_d)));
   7301             assign(new_m, binop(op_odd,  mkexpr(old_m), mkexpr(old_d)));
   7302             if (Q) {
   7303                putQReg(dreg, mkexpr(new_d), condT);
   7304                putQReg(mreg, mkexpr(new_m), condT);
   7305             } else {
   7306                putDRegI64(dreg, mkexpr(new_d), condT);
   7307                putDRegI64(mreg, mkexpr(new_m), condT);
   7308             }
   7309             DIP("vuzp.%d %c%u, %c%u\n",
   7310                 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7311             return True;
   7312          } else if ((B >> 1) == 3) {
   7313             /* VZIP */
   7314             IROp op_lo, op_hi;
   7315             IRTemp old_m, old_d, new_m, new_d;
   7316             if (!Q && size == 2)
   7317                return False;
   7318             if (Q) {
   7319                old_m = newTemp(Ity_V128);
   7320                old_d = newTemp(Ity_V128);
   7321                new_m = newTemp(Ity_V128);
   7322                new_d = newTemp(Ity_V128);
   7323                assign(old_m, getQReg(mreg));
   7324                assign(old_d, getQReg(dreg));
   7325             } else {
   7326                old_m = newTemp(Ity_I64);
   7327                old_d = newTemp(Ity_I64);
   7328                new_m = newTemp(Ity_I64);
   7329                new_d = newTemp(Ity_I64);
   7330                assign(old_m, getDRegI64(mreg));
   7331                assign(old_d, getDRegI64(dreg));
   7332             }
   7333             switch (size) {
   7334                case 0:
   7335                   op_hi = Q ? Iop_InterleaveHI8x16 : Iop_InterleaveHI8x8;
   7336                   op_lo = Q ? Iop_InterleaveLO8x16 : Iop_InterleaveLO8x8;
   7337                   break;
   7338                case 1:
   7339                   op_hi = Q ? Iop_InterleaveHI16x8 : Iop_InterleaveHI16x4;
   7340                   op_lo = Q ? Iop_InterleaveLO16x8 : Iop_InterleaveLO16x4;
   7341                   break;
   7342                case 2:
   7343                   op_hi = Iop_InterleaveHI32x4;
   7344                   op_lo = Iop_InterleaveLO32x4;
   7345                   break;
   7346                case 3:
   7347                   return False;
   7348                default:
   7349                   vassert(0);
   7350             }
   7351             assign(new_d, binop(op_lo, mkexpr(old_m), mkexpr(old_d)));
   7352             assign(new_m, binop(op_hi, mkexpr(old_m), mkexpr(old_d)));
   7353             if (Q) {
   7354                putQReg(dreg, mkexpr(new_d), condT);
   7355                putQReg(mreg, mkexpr(new_m), condT);
   7356             } else {
   7357                putDRegI64(dreg, mkexpr(new_d), condT);
   7358                putDRegI64(mreg, mkexpr(new_m), condT);
   7359             }
   7360             DIP("vzip.%d %c%u, %c%u\n",
   7361                 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7362             return True;
   7363          } else if (B == 8) {
   7364             /* VMOVN */
   7365             IROp op;
   7366             mreg >>= 1;
   7367             switch (size) {
   7368                case 0: op = Iop_NarrowUn16to8x8;  break;
   7369                case 1: op = Iop_NarrowUn32to16x4; break;
   7370                case 2: op = Iop_NarrowUn64to32x2; break;
   7371                case 3: return False;
   7372                default: vassert(0);
   7373             }
   7374             putDRegI64(dreg, unop(op, getQReg(mreg)), condT);
   7375             DIP("vmovn.i%d d%u, q%u\n", 16 << size, dreg, mreg);
   7376             return True;
   7377          } else if (B == 9 || (B >> 1) == 5) {
   7378             /* VQMOVN, VQMOVUN */
   7379             IROp op, op2;
   7380             IRTemp tmp;
   7381             dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
   7382             mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
   7383             if (mreg & 1)
   7384                return False;
   7385             mreg >>= 1;
   7386             switch (size) {
   7387                case 0: op2 = Iop_NarrowUn16to8x8;  break;
   7388                case 1: op2 = Iop_NarrowUn32to16x4; break;
   7389                case 2: op2 = Iop_NarrowUn64to32x2; break;
   7390                case 3: return False;
   7391                default: vassert(0);
   7392             }
   7393             switch (B & 3) {
   7394                case 0:
   7395                   vassert(0);
   7396                case 1:
   7397                   switch (size) {
   7398                      case 0: op = Iop_QNarrowUn16Sto8Ux8;  break;
   7399                      case 1: op = Iop_QNarrowUn32Sto16Ux4; break;
   7400                      case 2: op = Iop_QNarrowUn64Sto32Ux2; break;
   7401                      case 3: return False;
   7402                      default: vassert(0);
   7403                   }
   7404                   DIP("vqmovun.s%d d%u, q%u\n", 16 << size, dreg, mreg);
   7405                   break;
   7406                case 2:
   7407                   switch (size) {
   7408                      case 0: op = Iop_QNarrowUn16Sto8Sx8;  break;
   7409                      case 1: op = Iop_QNarrowUn32Sto16Sx4; break;
   7410                      case 2: op = Iop_QNarrowUn64Sto32Sx2; break;
   7411                      case 3: return False;
   7412                      default: vassert(0);
   7413                   }
   7414                   DIP("vqmovn.s%d d%u, q%u\n", 16 << size, dreg, mreg);
   7415                   break;
   7416                case 3:
   7417                   switch (size) {
   7418                      case 0: op = Iop_QNarrowUn16Uto8Ux8;  break;
   7419                      case 1: op = Iop_QNarrowUn32Uto16Ux4; break;
   7420                      case 2: op = Iop_QNarrowUn64Uto32Ux2; break;
   7421                      case 3: return False;
   7422                      default: vassert(0);
   7423                   }
   7424                   DIP("vqmovn.u%d d%u, q%u\n", 16 << size, dreg, mreg);
   7425                   break;
   7426                default:
   7427                   vassert(0);
   7428             }
   7429             res = newTemp(Ity_I64);
   7430             tmp = newTemp(Ity_I64);
   7431             assign(res, unop(op, getQReg(mreg)));
   7432             assign(tmp, unop(op2, getQReg(mreg)));
   7433             setFlag_QC(mkexpr(res), mkexpr(tmp), False, condT);
   7434             putDRegI64(dreg, mkexpr(res), condT);
   7435             return True;
   7436          } else if (B == 12) {
   7437             /* VSHLL (maximum shift) */
   7438             IROp op, cvt;
   7439             UInt shift_imm;
   7440             if (Q)
   7441                return False;
   7442             if (dreg & 1)
   7443                return False;
   7444             dreg >>= 1;
   7445             shift_imm = 8 << size;
   7446             res = newTemp(Ity_V128);
   7447             switch (size) {
   7448                case 0: op = Iop_ShlN16x8; cvt = Iop_Widen8Uto16x8;  break;
   7449                case 1: op = Iop_ShlN32x4; cvt = Iop_Widen16Uto32x4; break;
   7450                case 2: op = Iop_ShlN64x2; cvt = Iop_Widen32Uto64x2; break;
   7451                case 3: return False;
   7452                default: vassert(0);
   7453             }
   7454             assign(res, binop(op, unop(cvt, getDRegI64(mreg)),
   7455                                   mkU8(shift_imm)));
   7456             putQReg(dreg, mkexpr(res), condT);
   7457             DIP("vshll.i%d q%u, d%u, #%d\n", 8 << size, dreg, mreg, 8 << size);
   7458             return True;
   7459          } else if ((B >> 3) == 3 && (B & 3) == 0) {
   7460             /* VCVT (half<->single) */
   7461             /* Half-precision extensions are needed to run this */
   7462             vassert(0); // ATC
   7463             if (((theInstr >> 18) & 3) != 1)
   7464                return False;
   7465             if ((theInstr >> 8) & 1) {
   7466                if (dreg & 1)
   7467                   return False;
   7468                dreg >>= 1;
   7469                putQReg(dreg, unop(Iop_F16toF32x4, getDRegI64(mreg)),
   7470                      condT);
   7471                DIP("vcvt.f32.f16 q%u, d%u\n", dreg, mreg);
   7472             } else {
   7473                if (mreg & 1)
   7474                   return False;
   7475                mreg >>= 1;
   7476                putDRegI64(dreg, unop(Iop_F32toF16x4, getQReg(mreg)),
   7477                                 condT);
   7478                DIP("vcvt.f16.f32 d%u, q%u\n", dreg, mreg);
   7479             }
   7480             return True;
   7481          } else {
   7482             return False;
   7483          }
   7484          vassert(0);
   7485          return True;
   7486       case 3:
   7487          if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,0)) {
   7488             /* VRECPE */
   7489             IROp op;
   7490             F = (theInstr >> 8) & 1;
   7491             if (size != 2)
   7492                return False;
   7493             if (Q) {
   7494                op = F ? Iop_RecipEst32Fx4 : Iop_RecipEst32Ux4;
   7495                putQReg(dreg, unop(op, getQReg(mreg)), condT);
   7496                DIP("vrecpe.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
   7497             } else {
   7498                op = F ? Iop_RecipEst32Fx2 : Iop_RecipEst32Ux2;
   7499                putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
   7500                DIP("vrecpe.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
   7501             }
   7502             return True;
   7503          } else if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,1)) {
   7504             /* VRSQRTE */
   7505             IROp op;
   7506             F = (B >> 2) & 1;
   7507             if (size != 2)
   7508                return False;
   7509             if (F) {
   7510                /* fp */
   7511                op = Q ? Iop_RSqrtEst32Fx4 : Iop_RSqrtEst32Fx2;
   7512             } else {
   7513                /* unsigned int */
   7514                op = Q ? Iop_RSqrtEst32Ux4 : Iop_RSqrtEst32Ux2;
   7515             }
   7516             if (Q) {
   7517                putQReg(dreg, unop(op, getQReg(mreg)), condT);
   7518                DIP("vrsqrte.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
   7519             } else {
   7520                putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
   7521                DIP("vrsqrte.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
   7522             }
   7523             return True;
   7524          } else if ((B >> 3) == 3) {
   7525             /* VCVT (fp<->integer) */
   7526             IROp op;
   7527             if (size != 2)
   7528                return False;
   7529             switch ((B >> 1) & 3) {
   7530                case 0:
   7531                   op = Q ? Iop_I32StoFx4 : Iop_I32StoFx2;
   7532                   DIP("vcvt.f32.s32 %c%u, %c%u\n",
   7533                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7534                   break;
   7535                case 1:
   7536                   op = Q ? Iop_I32UtoFx4 : Iop_I32UtoFx2;
   7537                   DIP("vcvt.f32.u32 %c%u, %c%u\n",
   7538                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7539                   break;
   7540                case 2:
   7541                   op = Q ? Iop_FtoI32Sx4_RZ : Iop_FtoI32Sx2_RZ;
   7542                   DIP("vcvt.s32.f32 %c%u, %c%u\n",
   7543                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7544                   break;
   7545                case 3:
   7546                   op = Q ? Iop_FtoI32Ux4_RZ : Iop_FtoI32Ux2_RZ;
   7547                   DIP("vcvt.u32.f32 %c%u, %c%u\n",
   7548                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7549                   break;
   7550                default:
   7551                   vassert(0);
   7552             }
   7553             if (Q) {
   7554                putQReg(dreg, unop(op, getQReg(mreg)), condT);
   7555             } else {
   7556                putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
   7557             }
   7558             return True;
   7559          } else {
   7560             return False;
   7561          }
   7562          vassert(0);
   7563          return True;
   7564       default:
   7565          vassert(0);
   7566    }
   7567    return False;
   7568 }
   7569 
   7570 /* A7.4.6 One register and a modified immediate value */
   7571 static
   7572 void ppNeonImm(UInt imm, UInt cmode, UInt op)
   7573 {
   7574    int i;
   7575    switch (cmode) {
   7576       case 0: case 1: case 8: case 9:
   7577          vex_printf("0x%x", imm);
   7578          break;
   7579       case 2: case 3: case 10: case 11:
   7580          vex_printf("0x%x00", imm);
   7581          break;
   7582       case 4: case 5:
   7583          vex_printf("0x%x0000", imm);
   7584          break;
   7585       case 6: case 7:
   7586          vex_printf("0x%x000000", imm);
   7587          break;
   7588       case 12:
   7589          vex_printf("0x%xff", imm);
   7590          break;
   7591       case 13:
   7592          vex_printf("0x%xffff", imm);
   7593          break;
   7594       case 14:
   7595          if (op) {
   7596             vex_printf("0x");
   7597             for (i = 7; i >= 0; i--)
   7598                vex_printf("%s", (imm & (1 << i)) ? "ff" : "00");
   7599          } else {
   7600             vex_printf("0x%x", imm);
   7601          }
   7602          break;
   7603       case 15:
   7604          vex_printf("0x%x", imm);
   7605          break;
   7606    }
   7607 }
   7608 
   7609 static
   7610 const char *ppNeonImmType(UInt cmode, UInt op)
   7611 {
   7612    switch (cmode) {
   7613       case 0 ... 7:
   7614       case 12: case 13:
   7615          return "i32";
   7616       case 8 ... 11:
   7617          return "i16";
   7618       case 14:
   7619          if (op)
   7620             return "i64";
   7621          else
   7622             return "i8";
   7623       case 15:
   7624          if (op)
   7625             vassert(0);
   7626          else
   7627             return "f32";
   7628       default:
   7629          vassert(0);
   7630    }
   7631 }
   7632 
   7633 static
   7634 void DIPimm(UInt imm, UInt cmode, UInt op,
   7635             const char *instr, UInt Q, UInt dreg)
   7636 {
   7637    if (vex_traceflags & VEX_TRACE_FE) {
   7638       vex_printf("%s.%s %c%u, #", instr,
   7639                  ppNeonImmType(cmode, op), Q ? 'q' : 'd', dreg);
   7640       ppNeonImm(imm, cmode, op);
   7641       vex_printf("\n");
   7642    }
   7643 }
   7644 
   7645 static
   7646 Bool dis_neon_data_1reg_and_imm ( UInt theInstr, IRTemp condT )
   7647 {
   7648    UInt dreg = get_neon_d_regno(theInstr);
   7649    ULong imm_raw = ((theInstr >> 17) & 0x80) | ((theInstr >> 12) & 0x70) |
   7650                   (theInstr & 0xf);
   7651    ULong imm_raw_pp = imm_raw;
   7652    UInt cmode = (theInstr >> 8) & 0xf;
   7653    UInt op_bit = (theInstr >> 5) & 1;
   7654    ULong imm = 0;
   7655    UInt Q = (theInstr >> 6) & 1;
   7656    int i, j;
   7657    UInt tmp;
   7658    IRExpr *imm_val;
   7659    IRExpr *expr;
   7660    IRTemp tmp_var;
   7661    switch(cmode) {
   7662       case 7: case 6:
   7663          imm_raw = imm_raw << 8;
   7664          /* fallthrough */
   7665       case 5: case 4:
   7666          imm_raw = imm_raw << 8;
   7667          /* fallthrough */
   7668       case 3: case 2:
   7669          imm_raw = imm_raw << 8;
   7670          /* fallthrough */
   7671       case 0: case 1:
   7672          imm = (imm_raw << 32) | imm_raw;
   7673          break;
   7674       case 11: case 10:
   7675          imm_raw = imm_raw << 8;
   7676          /* fallthrough */
   7677       case 9: case 8:
   7678          imm_raw = (imm_raw << 16) | imm_raw;
   7679          imm = (imm_raw << 32) | imm_raw;
   7680          break;
   7681       case 13:
   7682          imm_raw = (imm_raw << 8) | 0xff;
   7683          /* fallthrough */
   7684       case 12:
   7685          imm_raw = (imm_raw << 8) | 0xff;
   7686          imm = (imm_raw << 32) | imm_raw;
   7687          break;
   7688       case 14:
   7689          if (! op_bit) {
   7690             for(i = 0; i < 8; i++) {
   7691                imm = (imm << 8) | imm_raw;
   7692             }
   7693          } else {
   7694             for(i = 7; i >= 0; i--) {
   7695                tmp = 0;
   7696                for(j = 0; j < 8; j++) {
   7697                   tmp = (tmp << 1) | ((imm_raw >> i) & 1);
   7698                }
   7699                imm = (imm << 8) | tmp;
   7700             }
   7701          }
   7702          break;
   7703       case 15:
   7704          imm = (imm_raw & 0x80) << 5;
   7705          imm |= ((~imm_raw & 0x40) << 5);
   7706          for(i = 1; i <= 4; i++)
   7707             imm |= (imm_raw & 0x40) << i;
   7708          imm |= (imm_raw & 0x7f);
   7709          imm = imm << 19;
   7710          imm = (imm << 32) | imm;
   7711          break;
   7712       default:
   7713          return False;
   7714    }
   7715    if (Q) {
   7716       imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
   7717    } else {
   7718       imm_val = mkU64(imm);
   7719    }
   7720    if (((op_bit == 0) &&
   7721       (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 12) == 12))) ||
   7722       ((op_bit == 1) && (cmode == 14))) {
   7723       /* VMOV (immediate) */
   7724       if (Q) {
   7725          putQReg(dreg, imm_val, condT);
   7726       } else {
   7727          putDRegI64(dreg, imm_val, condT);
   7728       }
   7729       DIPimm(imm_raw_pp, cmode, op_bit, "vmov", Q, dreg);
   7730       return True;
   7731    }
   7732    if ((op_bit == 1) &&
   7733       (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 14) == 12))) {
   7734       /* VMVN (immediate) */
   7735       if (Q) {
   7736          putQReg(dreg, unop(Iop_NotV128, imm_val), condT);
   7737       } else {
   7738          putDRegI64(dreg, unop(Iop_Not64, imm_val), condT);
   7739       }
   7740       DIPimm(imm_raw_pp, cmode, op_bit, "vmvn", Q, dreg);
   7741       return True;
   7742    }
   7743    if (Q) {
   7744       tmp_var = newTemp(Ity_V128);
   7745       assign(tmp_var, getQReg(dreg));
   7746    } else {
   7747       tmp_var = newTemp(Ity_I64);
   7748       assign(tmp_var, getDRegI64(dreg));
   7749    }
   7750    if ((op_bit == 0) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
   7751       /* VORR (immediate) */
   7752       if (Q)
   7753          expr = binop(Iop_OrV128, mkexpr(tmp_var), imm_val);
   7754       else
   7755          expr = binop(Iop_Or64, mkexpr(tmp_var), imm_val);
   7756       DIPimm(imm_raw_pp, cmode, op_bit, "vorr", Q, dreg);
   7757    } else if ((op_bit == 1) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
   7758       /* VBIC (immediate) */
   7759       if (Q)
   7760          expr = binop(Iop_AndV128, mkexpr(tmp_var),
   7761                                    unop(Iop_NotV128, imm_val));
   7762       else
   7763          expr = binop(Iop_And64, mkexpr(tmp_var), unop(Iop_Not64, imm_val));
   7764       DIPimm(imm_raw_pp, cmode, op_bit, "vbic", Q, dreg);
   7765    } else {
   7766       return False;
   7767    }
   7768    if (Q)
   7769       putQReg(dreg, expr, condT);
   7770    else
   7771       putDRegI64(dreg, expr, condT);
   7772    return True;
   7773 }
   7774 
   7775 /* A7.4 Advanced SIMD data-processing instructions */
   7776 static
   7777 Bool dis_neon_data_processing ( UInt theInstr, IRTemp condT )
   7778 {
   7779    UInt A = (theInstr >> 19) & 0x1F;
   7780    UInt B = (theInstr >>  8) & 0xF;
   7781    UInt C = (theInstr >>  4) & 0xF;
   7782    UInt U = (theInstr >> 24) & 0x1;
   7783 
   7784    if (! (A & 0x10)) {
   7785       return dis_neon_data_3same(theInstr, condT);
   7786    }
   7787    if (((A & 0x17) == 0x10) && ((C & 0x9) == 0x1)) {
   7788       return dis_neon_data_1reg_and_imm(theInstr, condT);
   7789    }
   7790    if ((C & 1) == 1) {
   7791       return dis_neon_data_2reg_and_shift(theInstr, condT);
   7792    }
   7793    if (((C & 5) == 0) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
   7794       return dis_neon_data_3diff(theInstr, condT);
   7795    }
   7796    if (((C & 5) == 4) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
   7797       return dis_neon_data_2reg_and_scalar(theInstr, condT);
   7798    }
   7799    if ((A & 0x16) == 0x16) {
   7800       if ((U == 0) && ((C & 1) == 0)) {
   7801          return dis_neon_vext(theInstr, condT);
   7802       }
   7803       if ((U != 1) || ((C & 1) == 1))
   7804          return False;
   7805       if ((B & 8) == 0) {
   7806          return dis_neon_data_2reg_misc(theInstr, condT);
   7807       }
   7808       if ((B & 12) == 8) {
   7809          return dis_neon_vtb(theInstr, condT);
   7810       }
   7811       if ((B == 12) && ((C & 9) == 0)) {
   7812          return dis_neon_vdup(theInstr, condT);
   7813       }
   7814    }
   7815    return False;
   7816 }
   7817 
   7818 
   7819 /*------------------------------------------------------------*/
   7820 /*--- NEON loads and stores                                ---*/
   7821 /*------------------------------------------------------------*/
   7822 
   7823 /* For NEON memory operations, we use the standard scheme to handle
   7824    conditionalisation: generate a jump around the instruction if the
   7825    condition is false.  That's only necessary in Thumb mode, however,
   7826    since in ARM mode NEON instructions are unconditional. */
   7827 
   7828 /* A helper function for what follows.  It assumes we already went
   7829    uncond as per comments at the top of this section. */
   7830 static
   7831 void mk_neon_elem_load_to_one_lane( UInt rD, UInt inc, UInt index,
   7832                                     UInt N, UInt size, IRTemp addr )
   7833 {
   7834    UInt i;
   7835    switch (size) {
   7836       case 0:
   7837          putDRegI64(rD, triop(Iop_SetElem8x8, getDRegI64(rD), mkU8(index),
   7838                     loadLE(Ity_I8, mkexpr(addr))), IRTemp_INVALID);
   7839          break;
   7840       case 1:
   7841          putDRegI64(rD, triop(Iop_SetElem16x4, getDRegI64(rD), mkU8(index),
   7842                     loadLE(Ity_I16, mkexpr(addr))), IRTemp_INVALID);
   7843          break;
   7844       case 2:
   7845          putDRegI64(rD, triop(Iop_SetElem32x2, getDRegI64(rD), mkU8(index),
   7846                     loadLE(Ity_I32, mkexpr(addr))), IRTemp_INVALID);
   7847          break;
   7848       default:
   7849          vassert(0);
   7850    }
   7851    for (i = 1; i <= N; i++) {
   7852       switch (size) {
   7853          case 0:
   7854             putDRegI64(rD + i * inc,
   7855                        triop(Iop_SetElem8x8,
   7856                              getDRegI64(rD + i * inc),
   7857                              mkU8(index),
   7858                              loadLE(Ity_I8, binop(Iop_Add32,
   7859                                                   mkexpr(addr),
   7860                                                   mkU32(i * 1)))),
   7861                        IRTemp_INVALID);
   7862             break;
   7863          case 1:
   7864             putDRegI64(rD + i * inc,
   7865                        triop(Iop_SetElem16x4,
   7866                              getDRegI64(rD + i * inc),
   7867                              mkU8(index),
   7868                              loadLE(Ity_I16, binop(Iop_Add32,
   7869                                                    mkexpr(addr),
   7870                                                    mkU32(i * 2)))),
   7871                        IRTemp_INVALID);
   7872             break;
   7873          case 2:
   7874             putDRegI64(rD + i * inc,
   7875                        triop(Iop_SetElem32x2,
   7876                              getDRegI64(rD + i * inc),
   7877                              mkU8(index),
   7878                              loadLE(Ity_I32, binop(Iop_Add32,
   7879                                                    mkexpr(addr),
   7880                                                    mkU32(i * 4)))),
   7881                        IRTemp_INVALID);
   7882             break;
   7883          default:
   7884             vassert(0);
   7885       }
   7886    }
   7887 }
   7888 
   7889 /* A(nother) helper function for what follows.  It assumes we already
   7890    went uncond as per comments at the top of this section. */
   7891 static
   7892 void mk_neon_elem_store_from_one_lane( UInt rD, UInt inc, UInt index,
   7893                                        UInt N, UInt size, IRTemp addr )
   7894 {
   7895    UInt i;
   7896    switch (size) {
   7897       case 0:
   7898          storeLE(mkexpr(addr),
   7899                  binop(Iop_GetElem8x8, getDRegI64(rD), mkU8(index)));
   7900          break;
   7901       case 1:
   7902          storeLE(mkexpr(addr),
   7903                  binop(Iop_GetElem16x4, getDRegI64(rD), mkU8(index)));
   7904          break;
   7905       case 2:
   7906          storeLE(mkexpr(addr),
   7907                  binop(Iop_GetElem32x2, getDRegI64(rD), mkU8(index)));
   7908          break;
   7909       default:
   7910          vassert(0);
   7911    }
   7912    for (i = 1; i <= N; i++) {
   7913       switch (size) {
   7914          case 0:
   7915             storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 1)),
   7916                     binop(Iop_GetElem8x8, getDRegI64(rD + i * inc),
   7917                                           mkU8(index)));
   7918             break;
   7919          case 1:
   7920             storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 2)),
   7921                     binop(Iop_GetElem16x4, getDRegI64(rD + i * inc),
   7922                                            mkU8(index)));
   7923             break;
   7924          case 2:
   7925             storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 4)),
   7926                     binop(Iop_GetElem32x2, getDRegI64(rD + i * inc),
   7927                                            mkU8(index)));
   7928             break;
   7929          default:
   7930             vassert(0);
   7931       }
   7932    }
   7933 }
   7934 
   7935 /* Generate 2x64 -> 2x64 deinterleave code, for VLD2.  Caller must
   7936    make *u0 and *u1 be valid IRTemps before the call. */
   7937 static void math_DEINTERLEAVE_2 (/*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1,
   7938                                  IRTemp i0, IRTemp i1, Int laneszB)
   7939 {
   7940    /* The following assumes that the guest is little endian, and hence
   7941       that the memory-side (interleaved) data is stored
   7942       little-endianly. */
   7943    vassert(u0 && u1);
   7944    /* This is pretty easy, since we have primitives directly to
   7945       hand. */
   7946    if (laneszB == 4) {
   7947       // memLE(128 bits) == A0 B0 A1 B1
   7948       // i0 == B0 A0, i1 == B1 A1
   7949       // u0 == A1 A0, u1 == B1 B0
   7950       assign(*u0, binop(Iop_InterleaveLO32x2, mkexpr(i1), mkexpr(i0)));
   7951       assign(*u1, binop(Iop_InterleaveHI32x2, mkexpr(i1), mkexpr(i0)));
   7952    } else if (laneszB == 2) {
   7953       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3
   7954       // i0 == B1 A1 B0 A0, i1 == B3 A3 B2 A2
   7955       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0
   7956       assign(*u0, binop(Iop_CatEvenLanes16x4, mkexpr(i1), mkexpr(i0)));
   7957       assign(*u1, binop(Iop_CatOddLanes16x4,  mkexpr(i1), mkexpr(i0)));
   7958    } else if (laneszB == 1) {
   7959       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A6 B6 A7 B7
   7960       // i0 == B3 A3 B2 A2 B1 A1 B0 A0, i1 == B7 A7 B6 A6 B5 A5 B4 A4
   7961       // u0 == A7 A6 A5 A4 A3 A2 A1 A0, u1 == B7 B6 B5 B4 B3 B2 B1 B0
   7962       assign(*u0, binop(Iop_CatEvenLanes8x8, mkexpr(i1), mkexpr(i0)));
   7963       assign(*u1, binop(Iop_CatOddLanes8x8,  mkexpr(i1), mkexpr(i0)));
   7964    } else {
   7965       // Can never happen, since VLD2 only has valid lane widths of 32,
   7966       // 16 or 8 bits.
   7967       vpanic("math_DEINTERLEAVE_2");
   7968    }
   7969 }
   7970 
   7971 /* Generate 2x64 -> 2x64 interleave code, for VST2.  Caller must make
   7972    *u0 and *u1 be valid IRTemps before the call. */
   7973 static void math_INTERLEAVE_2 (/*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1,
   7974                                IRTemp u0, IRTemp u1, Int laneszB)
   7975 {
   7976    /* The following assumes that the guest is little endian, and hence
   7977       that the memory-side (interleaved) data is stored
   7978       little-endianly. */
   7979    vassert(i0 && i1);
   7980    /* This is pretty easy, since we have primitives directly to
   7981       hand. */
   7982    if (laneszB == 4) {
   7983       // memLE(128 bits) == A0 B0 A1 B1
   7984       // i0 == B0 A0, i1 == B1 A1
   7985       // u0 == A1 A0, u1 == B1 B0
   7986       assign(*i0, binop(Iop_InterleaveLO32x2, mkexpr(u1), mkexpr(u0)));
   7987       assign(*i1, binop(Iop_InterleaveHI32x2, mkexpr(u1), mkexpr(u0)));
   7988    } else if (laneszB == 2) {
   7989       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3
   7990       // i0 == B1 A1 B0 A0, i1 == B3 A3 B2 A2
   7991       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0
   7992       assign(*i0, binop(Iop_InterleaveLO16x4, mkexpr(u1), mkexpr(u0)));
   7993       assign(*i1, binop(Iop_InterleaveHI16x4, mkexpr(u1), mkexpr(u0)));
   7994    } else if (laneszB == 1) {
   7995       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A6 B6 A7 B7
   7996       // i0 == B3 A3 B2 A2 B1 A1 B0 A0, i1 == B7 A7 B6 A6 B5 A5 B4 A4
   7997       // u0 == A7 A6 A5 A4 A3 A2 A1 A0, u1 == B7 B6 B5 B4 B3 B2 B1 B0
   7998       assign(*i0, binop(Iop_InterleaveLO8x8, mkexpr(u1), mkexpr(u0)));
   7999       assign(*i1, binop(Iop_InterleaveHI8x8, mkexpr(u1), mkexpr(u0)));
   8000    } else {
   8001       // Can never happen, since VST2 only has valid lane widths of 32,
   8002       // 16 or 8 bits.
   8003       vpanic("math_INTERLEAVE_2");
   8004    }
   8005 }
   8006 
   8007 // Helper function for generating arbitrary slicing 'n' dicing of
   8008 // 3 8x8 vectors, as needed for VLD3.8 and VST3.8.
   8009 static IRExpr* math_PERM_8x8x3(const UChar* desc,
   8010                                IRTemp s0, IRTemp s1, IRTemp s2)
   8011 {
   8012    // desc is an array of 8 pairs, encoded as 16 bytes,
   8013    // that describe how to assemble the result lanes, starting with
   8014    // lane 7.  Each pair is: first component (0..2) says which of
   8015    // s0/s1/s2 to use.  Second component (0..7) is the lane number
   8016    // in the source to use.
   8017    UInt si;
   8018    for (si = 0; si < 7; si++) {
   8019       vassert(desc[2 * si + 0] <= 2);
   8020       vassert(desc[2 * si + 1] <= 7);
   8021    }
   8022    IRTemp h3 = newTemp(Ity_I64);
   8023    IRTemp h2 = newTemp(Ity_I64);
   8024    IRTemp h1 = newTemp(Ity_I64);
   8025    IRTemp h0 = newTemp(Ity_I64);
   8026    IRTemp srcs[3] = {s0, s1, s2};
   8027 #  define SRC_VEC(_lane)   mkexpr(srcs[desc[2 * (7-(_lane)) + 0]])
   8028 #  define SRC_SHIFT(_lane) mkU8(56-8*(desc[2 * (7-(_lane)) + 1]))
   8029    assign(h3, binop(Iop_InterleaveHI8x8,
   8030                     binop(Iop_Shl64, SRC_VEC(7), SRC_SHIFT(7)),
   8031                     binop(Iop_Shl64, SRC_VEC(6), SRC_SHIFT(6))));
   8032    assign(h2, binop(Iop_InterleaveHI8x8,
   8033                     binop(Iop_Shl64, SRC_VEC(5), SRC_SHIFT(5)),
   8034                     binop(Iop_Shl64, SRC_VEC(4), SRC_SHIFT(4))));
   8035    assign(h1, binop(Iop_InterleaveHI8x8,
   8036                     binop(Iop_Shl64, SRC_VEC(3), SRC_SHIFT(3)),
   8037                     binop(Iop_Shl64, SRC_VEC(2), SRC_SHIFT(2))));
   8038    assign(h0, binop(Iop_InterleaveHI8x8,
   8039                     binop(Iop_Shl64, SRC_VEC(1), SRC_SHIFT(1)),
   8040                     binop(Iop_Shl64, SRC_VEC(0), SRC_SHIFT(0))));
   8041 #  undef SRC_VEC
   8042 #  undef SRC_SHIFT
   8043    // Now h3..h0 are 64 bit vectors with useful information only
   8044    // in the top 16 bits.  We now concatentate those four 16-bit
   8045    // groups so as to produce the final result.
   8046    IRTemp w1 = newTemp(Ity_I64);
   8047    IRTemp w0 = newTemp(Ity_I64);
   8048    assign(w1, binop(Iop_InterleaveHI16x4, mkexpr(h3), mkexpr(h2)));
   8049    assign(w0, binop(Iop_InterleaveHI16x4, mkexpr(h1), mkexpr(h0)));
   8050    return binop(Iop_InterleaveHI32x2, mkexpr(w1), mkexpr(w0));
   8051 }
   8052 
   8053 /* Generate 3x64 -> 3x64 deinterleave code, for VLD3.  Caller must
   8054    make *u0, *u1 and *u2 be valid IRTemps before the call. */
   8055 static void math_DEINTERLEAVE_3 (
   8056                /*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1, /*OUT*/IRTemp* u2,
   8057                IRTemp i0, IRTemp i1, IRTemp i2, Int laneszB
   8058             )
   8059 {
   8060 #  define IHI32x2(_e1, _e2) binop(Iop_InterleaveHI32x2, (_e1), (_e2))
   8061 #  define IHI16x4(_e1, _e2) binop(Iop_InterleaveHI16x4, (_e1), (_e2))
   8062 #  define SHL64(_tmp, _amt) binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
   8063    /* The following assumes that the guest is little endian, and hence
   8064       that the memory-side (interleaved) data is stored
   8065       little-endianly. */
   8066    vassert(u0 && u1 && u2);
   8067    if (laneszB == 4) {
   8068       // memLE(192 bits) == A0 B0 C0 A1 B1 C1
   8069       // i0 == B0 A0, i1 == A1 C0, i2 == C1 B1
   8070       // u0 == A1 A0, u1 == B1 B0, u2 == C1 C0
   8071       assign(*u0, IHI32x2(SHL64(i1,  0), SHL64(i0, 32)));
   8072       assign(*u1, IHI32x2(SHL64(i2, 32), SHL64(i0,  0)));
   8073       assign(*u2, IHI32x2(SHL64(i2,  0), SHL64(i1, 32)));
   8074    } else if (laneszB == 2) {
   8075       // memLE(192 bits) == A0 B0 C0 A1, B1 C1 A2 B2, C2 A3 B3 C3
   8076       // i0 == A1 C0 B0 A0, i1 == B2 A2 C1 B1, i2 == C3 B3 A3 C2
   8077       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0, u2 == C3 C2 C1 C0
   8078 #     define XXX(_tmp3,_la3,_tmp2,_la2,_tmp1,_la1,_tmp0,_la0) \
   8079                 IHI32x2(                                      \
   8080                    IHI16x4(SHL64((_tmp3),48-16*(_la3)),       \
   8081                            SHL64((_tmp2),48-16*(_la2))),      \
   8082                    IHI16x4(SHL64((_tmp1),48-16*(_la1)),       \
   8083                            SHL64((_tmp0),48-16*(_la0))))
   8084       assign(*u0, XXX(i2,1, i1,2, i0,3, i0,0));
   8085       assign(*u1, XXX(i2,2, i1,3, i1,0, i0,1));
   8086       assign(*u2, XXX(i2,3, i2,0, i1,1, i0,2));
   8087 #     undef XXX
   8088    } else if (laneszB == 1) {
   8089       // These describe how the result vectors [7..0] are
   8090       // assembled from the source vectors.  Each pair is
   8091       // (source vector number, lane number).
   8092       static const UChar de0[16] = {2,5, 2,2, 1,7, 1,4, 1,1, 0,6, 0,3, 0,0};
   8093       static const UChar de1[16] = {2,6, 2,3, 2,0, 1,5, 1,2, 0,7, 0,4, 0,1};
   8094       static const UChar de2[16] = {2,7, 2,4, 2,1, 1,6, 1,3, 1,0, 0,5, 0,2};
   8095       assign(*u0, math_PERM_8x8x3(de0, i0, i1, i2));
   8096       assign(*u1, math_PERM_8x8x3(de1, i0, i1, i2));
   8097       assign(*u2, math_PERM_8x8x3(de2, i0, i1, i2));
   8098    } else {
   8099       // Can never happen, since VLD3 only has valid lane widths of 32,
   8100       // 16 or 8 bits.
   8101       vpanic("math_DEINTERLEAVE_3");
   8102    }
   8103 #  undef SHL64
   8104 #  undef IHI16x4
   8105 #  undef IHI32x2
   8106 }
   8107 
   8108 /* Generate 3x64 -> 3x64 interleave code, for VST3.  Caller must
   8109    make *i0, *i1 and *i2 be valid IRTemps before the call. */
   8110 static void math_INTERLEAVE_3 (
   8111                /*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1, /*OUT*/IRTemp* i2,
   8112                IRTemp u0, IRTemp u1, IRTemp u2, Int laneszB
   8113             )
   8114 {
   8115 #  define IHI32x2(_e1, _e2) binop(Iop_InterleaveHI32x2, (_e1), (_e2))
   8116 #  define IHI16x4(_e1, _e2) binop(Iop_InterleaveHI16x4, (_e1), (_e2))
   8117 #  define SHL64(_tmp, _amt) binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
   8118    /* The following assumes that the guest is little endian, and hence
   8119       that the memory-side (interleaved) data is stored
   8120       little-endianly. */
   8121    vassert(i0 && i1 && i2);
   8122    if (laneszB == 4) {
   8123       // memLE(192 bits) == A0 B0 C0 A1 B1 C1
   8124       // i0 == B0 A0, i1 == A1 C0, i2 == C1 B1
   8125       // u0 == A1 A0, u1 == B1 B0, u2 == C1 C0
   8126       assign(*i0, IHI32x2(SHL64(u1, 32), SHL64(u0, 32)));
   8127       assign(*i1, IHI32x2(SHL64(u0,  0), SHL64(u2, 32)));
   8128       assign(*i2, IHI32x2(SHL64(u2,  0), SHL64(u1,  0)));
   8129    } else if (laneszB == 2) {
   8130       // memLE(192 bits) == A0 B0 C0 A1, B1 C1 A2 B2, C2 A3 B3 C3
   8131       // i0 == A1 C0 B0 A0, i1 == B2 A2 C1 B1, i2 == C3 B3 A3 C2
   8132       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0, u2 == C3 C2 C1 C0
   8133 #     define XXX(_tmp3,_la3,_tmp2,_la2,_tmp1,_la1,_tmp0,_la0) \
   8134                 IHI32x2(                                      \
   8135                    IHI16x4(SHL64((_tmp3),48-16*(_la3)),       \
   8136                            SHL64((_tmp2),48-16*(_la2))),      \
   8137                    IHI16x4(SHL64((_tmp1),48-16*(_la1)),       \
   8138                            SHL64((_tmp0),48-16*(_la0))))
   8139       assign(*i0, XXX(u0,1, u2,0, u1,0, u0,0));
   8140       assign(*i1, XXX(u1,2, u0,2, u2,1, u1,1));
   8141       assign(*i2, XXX(u2,3, u1,3, u0,3, u2,2));
   8142 #     undef XXX
   8143    } else if (laneszB == 1) {
   8144       // These describe how the result vectors [7..0] are
   8145       // assembled from the source vectors.  Each pair is
   8146       // (source vector number, lane number).
   8147       static const UChar in0[16] = {1,2, 0,2, 2,1, 1,1, 0,1, 2,0, 1,0, 0,0};
   8148       static const UChar in1[16] = {0,5, 2,4, 1,4, 0,4, 2,3, 1,3, 0,3, 2,2};
   8149       static const UChar in2[16] = {2,7, 1,7, 0,7, 2,6, 1,6, 0,6, 2,5, 1,5};
   8150       assign(*i0, math_PERM_8x8x3(in0, u0, u1, u2));
   8151       assign(*i1, math_PERM_8x8x3(in1, u0, u1, u2));
   8152       assign(*i2, math_PERM_8x8x3(in2, u0, u1, u2));
   8153    } else {
   8154       // Can never happen, since VST3 only has valid lane widths of 32,
   8155       // 16 or 8 bits.
   8156       vpanic("math_INTERLEAVE_3");
   8157    }
   8158 #  undef SHL64
   8159 #  undef IHI16x4
   8160 #  undef IHI32x2
   8161 }
   8162 
   8163 /* Generate 4x64 -> 4x64 deinterleave code, for VLD4.  Caller must
   8164    make *u0, *u1, *u2 and *u3 be valid IRTemps before the call. */
   8165 static void math_DEINTERLEAVE_4 (
   8166                /*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1,
   8167                /*OUT*/IRTemp* u2, /*OUT*/IRTemp* u3,
   8168                IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3, Int laneszB
   8169             )
   8170 {
   8171 #  define IHI32x2(_t1, _t2) \
   8172              binop(Iop_InterleaveHI32x2, mkexpr(_t1), mkexpr(_t2))
   8173 #  define ILO32x2(_t1, _t2) \
   8174              binop(Iop_InterleaveLO32x2, mkexpr(_t1), mkexpr(_t2))
   8175 #  define IHI16x4(_t1, _t2) \
   8176              binop(Iop_InterleaveHI16x4, mkexpr(_t1), mkexpr(_t2))
   8177 #  define ILO16x4(_t1, _t2) \
   8178              binop(Iop_InterleaveLO16x4, mkexpr(_t1), mkexpr(_t2))
   8179 #  define IHI8x8(_t1, _e2) \
   8180              binop(Iop_InterleaveHI8x8, mkexpr(_t1), _e2)
   8181 #  define SHL64(_tmp, _amt) \
   8182              binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
   8183    /* The following assumes that the guest is little endian, and hence
   8184       that the memory-side (interleaved) data is stored
   8185       little-endianly. */
   8186    vassert(u0 && u1 && u2 && u3);
   8187    if (laneszB == 4) {
   8188       assign(*u0, ILO32x2(i2, i0));
   8189       assign(*u1, IHI32x2(i2, i0));
   8190       assign(*u2, ILO32x2(i3, i1));
   8191       assign(*u3, IHI32x2(i3, i1));
   8192    } else if (laneszB == 2) {
   8193       IRTemp b1b0a1a0 = newTemp(Ity_I64);
   8194       IRTemp b3b2a3a2 = newTemp(Ity_I64);
   8195       IRTemp d1d0c1c0 = newTemp(Ity_I64);
   8196       IRTemp d3d2c3c2 = newTemp(Ity_I64);
   8197       assign(b1b0a1a0, ILO16x4(i1, i0));
   8198       assign(b3b2a3a2, ILO16x4(i3, i2));
   8199       assign(d1d0c1c0, IHI16x4(i1, i0));
   8200       assign(d3d2c3c2, IHI16x4(i3, i2));
   8201       // And now do what we did for the 32-bit case.
   8202       assign(*u0, ILO32x2(b3b2a3a2, b1b0a1a0));
   8203       assign(*u1, IHI32x2(b3b2a3a2, b1b0a1a0));
   8204       assign(*u2, ILO32x2(d3d2c3c2, d1d0c1c0));
   8205       assign(*u3, IHI32x2(d3d2c3c2, d1d0c1c0));
   8206    } else if (laneszB == 1) {
   8207       // Deinterleave into 16-bit chunks, then do as the 16-bit case.
   8208       IRTemp i0x = newTemp(Ity_I64);
   8209       IRTemp i1x = newTemp(Ity_I64);
   8210       IRTemp i2x = newTemp(Ity_I64);
   8211       IRTemp i3x = newTemp(Ity_I64);
   8212       assign(i0x, IHI8x8(i0, SHL64(i0, 32)));
   8213       assign(i1x, IHI8x8(i1, SHL64(i1, 32)));
   8214       assign(i2x, IHI8x8(i2, SHL64(i2, 32)));
   8215       assign(i3x, IHI8x8(i3, SHL64(i3, 32)));
   8216       // From here on is like the 16 bit case.
   8217       IRTemp b1b0a1a0 = newTemp(Ity_I64);
   8218       IRTemp b3b2a3a2 = newTemp(Ity_I64);
   8219       IRTemp d1d0c1c0 = newTemp(Ity_I64);
   8220       IRTemp d3d2c3c2 = newTemp(Ity_I64);
   8221       assign(b1b0a1a0, ILO16x4(i1x, i0x));
   8222       assign(b3b2a3a2, ILO16x4(i3x, i2x));
   8223       assign(d1d0c1c0, IHI16x4(i1x, i0x));
   8224       assign(d3d2c3c2, IHI16x4(i3x, i2x));
   8225       // And now do what we did for the 32-bit case.
   8226       assign(*u0, ILO32x2(b3b2a3a2, b1b0a1a0));
   8227       assign(*u1, IHI32x2(b3b2a3a2, b1b0a1a0));
   8228       assign(*u2, ILO32x2(d3d2c3c2, d1d0c1c0));
   8229       assign(*u3, IHI32x2(d3d2c3c2, d1d0c1c0));
   8230    } else {
   8231       // Can never happen, since VLD4 only has valid lane widths of 32,
   8232       // 16 or 8 bits.
   8233       vpanic("math_DEINTERLEAVE_4");
   8234    }
   8235 #  undef SHL64
   8236 #  undef IHI8x8
   8237 #  undef ILO16x4
   8238 #  undef IHI16x4
   8239 #  undef ILO32x2
   8240 #  undef IHI32x2
   8241 }
   8242 
   8243 /* Generate 4x64 -> 4x64 interleave code, for VST4.  Caller must
   8244    make *i0, *i1, *i2 and *i3 be valid IRTemps before the call. */
   8245 static void math_INTERLEAVE_4 (
   8246                /*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1,
   8247                /*OUT*/IRTemp* i2, /*OUT*/IRTemp* i3,
   8248                IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3, Int laneszB
   8249             )
   8250 {
   8251 #  define IHI32x2(_t1, _t2) \
   8252              binop(Iop_InterleaveHI32x2, mkexpr(_t1), mkexpr(_t2))
   8253 #  define ILO32x2(_t1, _t2) \
   8254              binop(Iop_InterleaveLO32x2, mkexpr(_t1), mkexpr(_t2))
   8255 #  define CEV16x4(_t1, _t2) \
   8256              binop(Iop_CatEvenLanes16x4, mkexpr(_t1), mkexpr(_t2))
   8257 #  define COD16x4(_t1, _t2) \
   8258              binop(Iop_CatOddLanes16x4, mkexpr(_t1), mkexpr(_t2))
   8259 #  define COD8x8(_t1, _e2) \
   8260              binop(Iop_CatOddLanes8x8, mkexpr(_t1), _e2)
   8261 #  define SHL64(_tmp, _amt) \
   8262              binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
   8263    /* The following assumes that the guest is little endian, and hence
   8264       that the memory-side (interleaved) data is stored
   8265       little-endianly. */
   8266    vassert(u0 && u1 && u2 && u3);
   8267    if (laneszB == 4) {
   8268       assign(*i0, ILO32x2(u1, u0));
   8269       assign(*i1, ILO32x2(u3, u2));
   8270       assign(*i2, IHI32x2(u1, u0));
   8271       assign(*i3, IHI32x2(u3, u2));
   8272    } else if (laneszB == 2) {
   8273       // First, interleave at the 32-bit lane size.
   8274       IRTemp b1b0a1a0 = newTemp(Ity_I64);
   8275       IRTemp b3b2a3a2 = newTemp(Ity_I64);
   8276       IRTemp d1d0c1c0 = newTemp(Ity_I64);
   8277       IRTemp d3d2c3c2 = newTemp(Ity_I64);
   8278       assign(b1b0a1a0, ILO32x2(u1, u0));
   8279       assign(b3b2a3a2, IHI32x2(u1, u0));
   8280       assign(d1d0c1c0, ILO32x2(u3, u2));
   8281       assign(d3d2c3c2, IHI32x2(u3, u2));
   8282       // And interleave (cat) at the 16 bit size.
   8283       assign(*i0, CEV16x4(d1d0c1c0, b1b0a1a0));
   8284       assign(*i1, COD16x4(d1d0c1c0, b1b0a1a0));
   8285       assign(*i2, CEV16x4(d3d2c3c2, b3b2a3a2));
   8286       assign(*i3, COD16x4(d3d2c3c2, b3b2a3a2));
   8287    } else if (laneszB == 1) {
   8288       // First, interleave at the 32-bit lane size.
   8289       IRTemp b1b0a1a0 = newTemp(Ity_I64);
   8290       IRTemp b3b2a3a2 = newTemp(Ity_I64);
   8291       IRTemp d1d0c1c0 = newTemp(Ity_I64);
   8292       IRTemp d3d2c3c2 = newTemp(Ity_I64);
   8293       assign(b1b0a1a0, ILO32x2(u1, u0));
   8294       assign(b3b2a3a2, IHI32x2(u1, u0));
   8295       assign(d1d0c1c0, ILO32x2(u3, u2));
   8296       assign(d3d2c3c2, IHI32x2(u3, u2));
   8297       // And interleave (cat) at the 16 bit size.
   8298       IRTemp i0x = newTemp(Ity_I64);
   8299       IRTemp i1x = newTemp(Ity_I64);
   8300       IRTemp i2x = newTemp(Ity_I64);
   8301       IRTemp i3x = newTemp(Ity_I64);
   8302       assign(i0x, CEV16x4(d1d0c1c0, b1b0a1a0));
   8303       assign(i1x, COD16x4(d1d0c1c0, b1b0a1a0));
   8304       assign(i2x, CEV16x4(d3d2c3c2, b3b2a3a2));
   8305       assign(i3x, COD16x4(d3d2c3c2, b3b2a3a2));
   8306       // And rearrange within each word, to get the right 8 bit lanes.
   8307       assign(*i0, COD8x8(i0x, SHL64(i0x, 8)));
   8308       assign(*i1, COD8x8(i1x, SHL64(i1x, 8)));
   8309       assign(*i2, COD8x8(i2x, SHL64(i2x, 8)));
   8310       assign(*i3, COD8x8(i3x, SHL64(i3x, 8)));
   8311    } else {
   8312       // Can never happen, since VLD4 only has valid lane widths of 32,
   8313       // 16 or 8 bits.
   8314       vpanic("math_DEINTERLEAVE_4");
   8315    }
   8316 #  undef SHL64
   8317 #  undef COD8x8
   8318 #  undef COD16x4
   8319 #  undef CEV16x4
   8320 #  undef ILO32x2
   8321 #  undef IHI32x2
   8322 }
   8323 
   8324 /* A7.7 Advanced SIMD element or structure load/store instructions */
   8325 static
   8326 Bool dis_neon_load_or_store ( UInt theInstr,
   8327                               Bool isT, IRTemp condT )
   8328 {
   8329 #  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
   8330    UInt bA = INSN(23,23);
   8331    UInt fB = INSN(11,8);
   8332    UInt bL = INSN(21,21);
   8333    UInt rD = (INSN(22,22) << 4) | INSN(15,12);
   8334    UInt rN = INSN(19,16);
   8335    UInt rM = INSN(3,0);
   8336    UInt N, size, i, j;
   8337    UInt inc;
   8338    UInt regs = 1;
   8339 
   8340    if (isT) {
   8341       vassert(condT != IRTemp_INVALID);
   8342    } else {
   8343       vassert(condT == IRTemp_INVALID);
   8344    }
   8345    /* So now, if condT is not IRTemp_INVALID, we know we're
   8346       dealing with Thumb code. */
   8347 
   8348    if (INSN(20,20) != 0)
   8349       return False;
   8350 
   8351    IRTemp initialRn = newTemp(Ity_I32);
   8352    assign(initialRn, isT ? getIRegT(rN) : getIRegA(rN));
   8353 
   8354    IRTemp initialRm = newTemp(Ity_I32);
   8355    assign(initialRm, isT ? getIRegT(rM) : getIRegA(rM));
   8356 
   8357    /* There are 3 cases:
   8358       (1) VSTn / VLDn (n-element structure from/to one lane)
   8359       (2) VLDn (single element to all lanes)
   8360       (3) VSTn / VLDn (multiple n-element structures)
   8361    */
   8362    if (bA) {
   8363       N = fB & 3;
   8364       if ((fB >> 2) < 3) {
   8365          /* ------------ Case (1) ------------
   8366             VSTn / VLDn (n-element structure from/to one lane) */
   8367 
   8368          size = fB >> 2;
   8369 
   8370          switch (size) {
   8371             case 0: i = INSN(7,5); inc = 1; break;
   8372             case 1: i = INSN(7,6); inc = INSN(5,5) ? 2 : 1; break;
   8373             case 2: i = INSN(7,7); inc = INSN(6,6) ? 2 : 1; break;
   8374             case 3: return False;
   8375             default: vassert(0);
   8376          }
   8377 
   8378          IRTemp addr = newTemp(Ity_I32);
   8379          assign(addr, mkexpr(initialRn));
   8380 
   8381          // go uncond
   8382          if (condT != IRTemp_INVALID)
   8383             mk_skip_over_T32_if_cond_is_false(condT);
   8384          // now uncond
   8385 
   8386          if (bL)
   8387             mk_neon_elem_load_to_one_lane(rD, inc, i, N, size, addr);
   8388          else
   8389             mk_neon_elem_store_from_one_lane(rD, inc, i, N, size, addr);
   8390          DIP("v%s%u.%d {", bL ? "ld" : "st", N + 1, 8 << size);
   8391          for (j = 0; j <= N; j++) {
   8392             if (j)
   8393                DIP(", ");
   8394             DIP("d%u[%u]", rD + j * inc, i);
   8395          }
   8396          DIP("}, [r%u]", rN);
   8397          if (rM != 13 && rM != 15) {
   8398             DIP(", r%u\n", rM);
   8399          } else {
   8400             DIP("%s\n", (rM != 15) ? "!" : "");
   8401          }
   8402       } else {
   8403          /* ------------ Case (2) ------------
   8404             VLDn (single element to all lanes) */
   8405          UInt r;
   8406          if (bL == 0)
   8407             return False;
   8408 
   8409          inc = INSN(5,5) + 1;
   8410          size = INSN(7,6);
   8411 
   8412          /* size == 3 and size == 2 cases differ in alignment constraints */
   8413          if (size == 3 && N == 3 && INSN(4,4) == 1)
   8414             size = 2;
   8415 
   8416          if (size == 0 && N == 0 && INSN(4,4) == 1)
   8417             return False;
   8418          if (N == 2 && INSN(4,4) == 1)
   8419             return False;
   8420          if (size == 3)
   8421             return False;
   8422 
   8423          // go uncond
   8424          if (condT != IRTemp_INVALID)
   8425             mk_skip_over_T32_if_cond_is_false(condT);
   8426          // now uncond
   8427 
   8428          IRTemp addr = newTemp(Ity_I32);
   8429          assign(addr, mkexpr(initialRn));
   8430 
   8431          if (N == 0 && INSN(5,5))
   8432             regs = 2;
   8433 
   8434          for (r = 0; r < regs; r++) {
   8435             switch (size) {
   8436                case 0:
   8437                   putDRegI64(rD + r, unop(Iop_Dup8x8,
   8438                                           loadLE(Ity_I8, mkexpr(addr))),
   8439                              IRTemp_INVALID);
   8440                   break;
   8441                case 1:
   8442                   putDRegI64(rD + r, unop(Iop_Dup16x4,
   8443                                           loadLE(Ity_I16, mkexpr(addr))),
   8444                              IRTemp_INVALID);
   8445                   break;
   8446                case 2:
   8447                   putDRegI64(rD + r, unop(Iop_Dup32x2,
   8448                                           loadLE(Ity_I32, mkexpr(addr))),
   8449                              IRTemp_INVALID);
   8450                   break;
   8451                default:
   8452                   vassert(0);
   8453             }
   8454             for (i = 1; i <= N; i++) {
   8455                switch (size) {
   8456                   case 0:
   8457                      putDRegI64(rD + r + i * inc,
   8458                                 unop(Iop_Dup8x8,
   8459                                      loadLE(Ity_I8, binop(Iop_Add32,
   8460                                                           mkexpr(addr),
   8461                                                           mkU32(i * 1)))),
   8462                                 IRTemp_INVALID);
   8463                      break;
   8464                   case 1:
   8465                      putDRegI64(rD + r + i * inc,
   8466                                 unop(Iop_Dup16x4,
   8467                                      loadLE(Ity_I16, binop(Iop_Add32,
   8468                                                            mkexpr(addr),
   8469                                                            mkU32(i * 2)))),
   8470                                 IRTemp_INVALID);
   8471                      break;
   8472                   case 2:
   8473                      putDRegI64(rD + r + i * inc,
   8474                                 unop(Iop_Dup32x2,
   8475                                      loadLE(Ity_I32, binop(Iop_Add32,
   8476                                                            mkexpr(addr),
   8477                                                            mkU32(i * 4)))),
   8478                                 IRTemp_INVALID);
   8479                      break;
   8480                   default:
   8481                      vassert(0);
   8482                }
   8483             }
   8484          }
   8485          DIP("vld%u.%d {", N + 1, 8 << size);
   8486          for (r = 0; r < regs; r++) {
   8487             for (i = 0; i <= N; i++) {
   8488                if (i || r)
   8489                   DIP(", ");
   8490                DIP("d%u[]", rD + r + i * inc);
   8491             }
   8492          }
   8493          DIP("}, [r%u]", rN);
   8494          if (rM != 13 && rM != 15) {
   8495             DIP(", r%u\n", rM);
   8496          } else {
   8497             DIP("%s\n", (rM != 15) ? "!" : "");
   8498          }
   8499       }
   8500       /* Writeback.  We're uncond here, so no condT-ing. */
   8501       if (rM != 15) {
   8502          if (rM == 13) {
   8503             IRExpr* e = binop(Iop_Add32,
   8504                               mkexpr(initialRn),
   8505                               mkU32((1 << size) * (N + 1)));
   8506             if (isT)
   8507                putIRegT(rN, e, IRTemp_INVALID);
   8508             else
   8509                putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
   8510          } else {
   8511             IRExpr* e = binop(Iop_Add32,
   8512                               mkexpr(initialRn),
   8513                               mkexpr(initialRm));
   8514             if (isT)
   8515                putIRegT(rN, e, IRTemp_INVALID);
   8516             else
   8517                putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
   8518          }
   8519       }
   8520       return True;
   8521    } else {
   8522       /* ------------ Case (3) ------------
   8523          VSTn / VLDn (multiple n-element structures) */
   8524       inc = (fB & 1) + 1;
   8525 
   8526       if (fB == BITS4(0,0,1,0)       // Dd, Dd+1, Dd+2, Dd+3  inc = 1  regs = 4
   8527           || fB == BITS4(0,1,1,0)    // Dd, Dd+1, Dd+2        inc = 1  regs = 3
   8528           || fB == BITS4(0,1,1,1)    // Dd                    inc = 2  regs = 1
   8529           || fB == BITS4(1,0,1,0)) { // Dd, Dd+1              inc = 1  regs = 2
   8530          N = 0; // VLD1/VST1.  'inc' does not appear to have any
   8531                 // meaning for the VLD1/VST1 cases.  'regs' is the number of
   8532                 // registers involved.
   8533          if (rD + regs > 32) return False;
   8534       }
   8535       else
   8536       if (fB == BITS4(0,0,1,1)       // Dd, Dd+1, Dd+2, Dd+3  inc=2  regs = 2
   8537           || fB == BITS4(1,0,0,0)    // Dd, Dd+1              inc=1  regs = 1
   8538           || fB == BITS4(1,0,0,1)) { // Dd, Dd+2              inc=2  regs = 1
   8539          N = 1; // VLD2/VST2.  'regs' is the number of register-pairs involved
   8540          if (regs == 1 && inc == 1 && rD + 1 >= 32) return False;
   8541          if (regs == 1 && inc == 2 && rD + 2 >= 32) return False;
   8542          if (regs == 2 && inc == 2 && rD + 3 >= 32) return False;
   8543       } else if (fB == BITS4(0,1,0,0) || fB == BITS4(0,1,0,1)) {
   8544          N = 2; // VLD3/VST3
   8545          if (inc == 1 && rD + 2 >= 32) return False;
   8546          if (inc == 2 && rD + 4 >= 32) return False;
   8547       } else if (fB == BITS4(0,0,0,0) || fB == BITS4(0,0,0,1)) {
   8548          N = 3; // VLD4/VST4
   8549          if (inc == 1 && rD + 3 >= 32) return False;
   8550          if (inc == 2 && rD + 6 >= 32) return False;
   8551       } else {
   8552          return False;
   8553       }
   8554 
   8555       if (N == 1 && fB == BITS4(0,0,1,1)) {
   8556          regs = 2;
   8557       } else if (N == 0) {
   8558          if (fB == BITS4(1,0,1,0)) {
   8559             regs = 2;
   8560          } else if (fB == BITS4(0,1,1,0)) {
   8561             regs = 3;
   8562          } else if (fB == BITS4(0,0,1,0)) {
   8563             regs = 4;
   8564          }
   8565       }
   8566 
   8567       size = INSN(7,6);
   8568       if (N == 0 && size == 3)
   8569          size = 2;
   8570       if (size == 3)
   8571          return False;
   8572 
   8573       // go uncond
   8574       if (condT != IRTemp_INVALID)
   8575          mk_skip_over_T32_if_cond_is_false(condT);
   8576       // now uncond
   8577 
   8578       IRTemp addr = newTemp(Ity_I32);
   8579       assign(addr, mkexpr(initialRn));
   8580 
   8581       if (N == 0 /* No interleaving -- VLD1/VST1 */) {
   8582          UInt r;
   8583          vassert(regs == 1 || regs == 2 || regs == 3 || regs == 4);
   8584          /* inc has no relevance here */
   8585          for (r = 0; r < regs; r++) {
   8586             if (bL)
   8587                putDRegI64(rD+r, loadLE(Ity_I64, mkexpr(addr)), IRTemp_INVALID);
   8588             else
   8589                storeLE(mkexpr(addr), getDRegI64(rD+r));
   8590             IRTemp tmp = newTemp(Ity_I32);
   8591             assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(8)));
   8592             addr = tmp;
   8593          }
   8594       }
   8595       else
   8596       if (N == 1 /* 2-interleaving -- VLD2/VST2 */) {
   8597          vassert( (regs == 1 && (inc == 1 || inc == 2))
   8598                    || (regs == 2 && inc == 2) );
   8599          // Make 'nregs' be the number of registers and 'regstep'
   8600          // equal the actual register-step.  The ARM encoding, using 'regs'
   8601          // and 'inc', is bizarre.  After this, we have:
   8602          // Dd, Dd+1              regs = 1, inc = 1,   nregs = 2, regstep = 1
   8603          // Dd, Dd+2              regs = 1, inc = 2,   nregs = 2, regstep = 2
   8604          // Dd, Dd+1, Dd+2, Dd+3  regs = 2, inc = 2,   nregs = 4, regstep = 1
   8605          UInt nregs   = 2;
   8606          UInt regstep = 1;
   8607          if (regs == 1 && inc == 1) {
   8608             /* nothing */
   8609          } else if (regs == 1 && inc == 2) {
   8610             regstep = 2;
   8611          } else if (regs == 2 && inc == 2) {
   8612             nregs = 4;
   8613          } else {
   8614             vassert(0);
   8615          }
   8616          // 'a' is address,
   8617          // 'di' is interleaved data, 'du' is uninterleaved data
   8618          if (nregs == 2) {
   8619             IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
   8620             IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
   8621             IRTemp  di0 = newTemp(Ity_I64);
   8622             IRTemp  di1 = newTemp(Ity_I64);
   8623             IRTemp  du0 = newTemp(Ity_I64);
   8624             IRTemp  du1 = newTemp(Ity_I64);
   8625             if (bL) {
   8626                assign(di0, loadLE(Ity_I64, a0));
   8627                assign(di1, loadLE(Ity_I64, a1));
   8628                math_DEINTERLEAVE_2(&du0, &du1, di0, di1, 1 << size);
   8629                putDRegI64(rD + 0 * regstep, mkexpr(du0), IRTemp_INVALID);
   8630                putDRegI64(rD + 1 * regstep, mkexpr(du1), IRTemp_INVALID);
   8631             } else {
   8632                assign(du0, getDRegI64(rD + 0 * regstep));
   8633                assign(du1, getDRegI64(rD + 1 * regstep));
   8634                math_INTERLEAVE_2(&di0, &di1, du0, du1, 1 << size);
   8635                storeLE(a0, mkexpr(di0));
   8636                storeLE(a1, mkexpr(di1));
   8637             }
   8638             IRTemp tmp = newTemp(Ity_I32);
   8639             assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(16)));
   8640             addr = tmp;
   8641          } else {
   8642             vassert(nregs == 4);
   8643             vassert(regstep == 1);
   8644             IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
   8645             IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
   8646             IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
   8647             IRExpr* a3  = binop(Iop_Add32, mkexpr(addr), mkU32(24));
   8648             IRTemp  di0 = newTemp(Ity_I64);
   8649             IRTemp  di1 = newTemp(Ity_I64);
   8650             IRTemp  di2 = newTemp(Ity_I64);
   8651             IRTemp  di3 = newTemp(Ity_I64);
   8652             IRTemp  du0 = newTemp(Ity_I64);
   8653             IRTemp  du1 = newTemp(Ity_I64);
   8654             IRTemp  du2 = newTemp(Ity_I64);
   8655             IRTemp  du3 = newTemp(Ity_I64);
   8656             if (bL) {
   8657                assign(di0, loadLE(Ity_I64, a0));
   8658                assign(di1, loadLE(Ity_I64, a1));
   8659                assign(di2, loadLE(Ity_I64, a2));
   8660                assign(di3, loadLE(Ity_I64, a3));
   8661                // Note spooky interleaving: du0, du2, di0, di1 etc
   8662                math_DEINTERLEAVE_2(&du0, &du2, di0, di1, 1 << size);
   8663                math_DEINTERLEAVE_2(&du1, &du3, di2, di3, 1 << size);
   8664                putDRegI64(rD + 0 * regstep, mkexpr(du0), IRTemp_INVALID);
   8665                putDRegI64(rD + 1 * regstep, mkexpr(du1), IRTemp_INVALID);
   8666                putDRegI64(rD + 2 * regstep, mkexpr(du2), IRTemp_INVALID);
   8667                putDRegI64(rD + 3 * regstep, mkexpr(du3), IRTemp_INVALID);
   8668             } else {
   8669                assign(du0, getDRegI64(rD + 0 * regstep));
   8670                assign(du1, getDRegI64(rD + 1 * regstep));
   8671                assign(du2, getDRegI64(rD + 2 * regstep));
   8672                assign(du3, getDRegI64(rD + 3 * regstep));
   8673                // Note spooky interleaving: du0, du2, di0, di1 etc
   8674                math_INTERLEAVE_2(&di0, &di1, du0, du2, 1 << size);
   8675                math_INTERLEAVE_2(&di2, &di3, du1, du3, 1 << size);
   8676                storeLE(a0, mkexpr(di0));
   8677                storeLE(a1, mkexpr(di1));
   8678                storeLE(a2, mkexpr(di2));
   8679                storeLE(a3, mkexpr(di3));
   8680             }
   8681 
   8682             IRTemp tmp = newTemp(Ity_I32);
   8683             assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(32)));
   8684             addr = tmp;
   8685          }
   8686       }
   8687       else
   8688       if (N == 2 /* 3-interleaving -- VLD3/VST3 */) {
   8689          // Dd, Dd+1, Dd+2   regs = 1, inc = 1
   8690          // Dd, Dd+2, Dd+4   regs = 1, inc = 2
   8691          vassert(regs == 1 && (inc == 1 || inc == 2));
   8692          IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
   8693          IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
   8694          IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
   8695          IRTemp  di0 = newTemp(Ity_I64);
   8696          IRTemp  di1 = newTemp(Ity_I64);
   8697          IRTemp  di2 = newTemp(Ity_I64);
   8698          IRTemp  du0 = newTemp(Ity_I64);
   8699          IRTemp  du1 = newTemp(Ity_I64);
   8700          IRTemp  du2 = newTemp(Ity_I64);
   8701          if (bL) {
   8702             assign(di0, loadLE(Ity_I64, a0));
   8703             assign(di1, loadLE(Ity_I64, a1));
   8704             assign(di2, loadLE(Ity_I64, a2));
   8705             math_DEINTERLEAVE_3(&du0, &du1, &du2, di0, di1, di2, 1 << size);
   8706             putDRegI64(rD + 0 * inc, mkexpr(du0), IRTemp_INVALID);
   8707             putDRegI64(rD + 1 * inc, mkexpr(du1), IRTemp_INVALID);
   8708             putDRegI64(rD + 2 * inc, mkexpr(du2), IRTemp_INVALID);
   8709          } else {
   8710             assign(du0, getDRegI64(rD + 0 * inc));
   8711             assign(du1, getDRegI64(rD + 1 * inc));
   8712             assign(du2, getDRegI64(rD + 2 * inc));
   8713             math_INTERLEAVE_3(&di0, &di1, &di2, du0, du1, du2, 1 << size);
   8714             storeLE(a0, mkexpr(di0));
   8715             storeLE(a1, mkexpr(di1));
   8716             storeLE(a2, mkexpr(di2));
   8717          }
   8718          IRTemp tmp = newTemp(Ity_I32);
   8719          assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(24)));
   8720          addr = tmp;
   8721       }
   8722       else
   8723       if (N == 3 /* 4-interleaving -- VLD4/VST4 */) {
   8724          // Dd, Dd+1, Dd+2, Dd+3   regs = 1, inc = 1
   8725          // Dd, Dd+2, Dd+4, Dd+6   regs = 1, inc = 2
   8726          vassert(regs == 1 && (inc == 1 || inc == 2));
   8727          IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
   8728          IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
   8729          IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
   8730          IRExpr* a3  = binop(Iop_Add32, mkexpr(addr), mkU32(24));
   8731          IRTemp  di0 = newTemp(Ity_I64);
   8732          IRTemp  di1 = newTemp(Ity_I64);
   8733          IRTemp  di2 = newTemp(Ity_I64);
   8734          IRTemp  di3 = newTemp(Ity_I64);
   8735          IRTemp  du0 = newTemp(Ity_I64);
   8736          IRTemp  du1 = newTemp(Ity_I64);
   8737          IRTemp  du2 = newTemp(Ity_I64);
   8738          IRTemp  du3 = newTemp(Ity_I64);
   8739          if (bL) {
   8740             assign(di0, loadLE(Ity_I64, a0));
   8741             assign(di1, loadLE(Ity_I64, a1));
   8742             assign(di2, loadLE(Ity_I64, a2));
   8743             assign(di3, loadLE(Ity_I64, a3));
   8744             math_DEINTERLEAVE_4(&du0, &du1, &du2, &du3,
   8745                                 di0, di1, di2, di3, 1 << size);
   8746             putDRegI64(rD + 0 * inc, mkexpr(du0), IRTemp_INVALID);
   8747             putDRegI64(rD + 1 * inc, mkexpr(du1), IRTemp_INVALID);
   8748             putDRegI64(rD + 2 * inc, mkexpr(du2), IRTemp_INVALID);
   8749             putDRegI64(rD + 3 * inc, mkexpr(du3), IRTemp_INVALID);
   8750          } else {
   8751             assign(du0, getDRegI64(rD + 0 * inc));
   8752             assign(du1, getDRegI64(rD + 1 * inc));
   8753             assign(du2, getDRegI64(rD + 2 * inc));
   8754             assign(du3, getDRegI64(rD + 3 * inc));
   8755             math_INTERLEAVE_4(&di0, &di1, &di2, &di3,
   8756                               du0, du1, du2, du3, 1 << size);
   8757             storeLE(a0, mkexpr(di0));
   8758             storeLE(a1, mkexpr(di1));
   8759             storeLE(a2, mkexpr(di2));
   8760             storeLE(a3, mkexpr(di3));
   8761          }
   8762          IRTemp tmp = newTemp(Ity_I32);
   8763          assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(32)));
   8764          addr = tmp;
   8765       }
   8766       else {
   8767          vassert(0);
   8768       }
   8769 
   8770       /* Writeback */
   8771       if (rM != 15) {
   8772          IRExpr* e;
   8773          if (rM == 13) {
   8774             e = binop(Iop_Add32, mkexpr(initialRn),
   8775                                  mkU32(8 * (N + 1) * regs));
   8776          } else {
   8777             e = binop(Iop_Add32, mkexpr(initialRn),
   8778                                  mkexpr(initialRm));
   8779          }
   8780          if (isT)
   8781             putIRegT(rN, e, IRTemp_INVALID);
   8782          else
   8783             putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
   8784       }
   8785 
   8786       DIP("v%s%u.%d {", bL ? "ld" : "st", N + 1, 8 << INSN(7,6));
   8787       if ((inc == 1 && regs * (N + 1) > 1)
   8788           || (inc == 2 && regs > 1 && N > 0)) {
   8789          DIP("d%u-d%u", rD, rD + regs * (N + 1) - 1);
   8790       } else {
   8791          UInt r;
   8792          for (r = 0; r < regs; r++) {
   8793             for (i = 0; i <= N; i++) {
   8794                if (i || r)
   8795                   DIP(", ");
   8796                DIP("d%u", rD + r + i * inc);
   8797             }
   8798          }
   8799       }
   8800       DIP("}, [r%u]", rN);
   8801       if (rM != 13 && rM != 15) {
   8802          DIP(", r%u\n", rM);
   8803       } else {
   8804          DIP("%s\n", (rM != 15) ? "!" : "");
   8805       }
   8806       return True;
   8807    }
   8808 #  undef INSN
   8809 }
   8810 
   8811 
   8812 /*------------------------------------------------------------*/
   8813 /*--- NEON, top level control                              ---*/
   8814 /*------------------------------------------------------------*/
   8815 
   8816 /* Both ARM and Thumb */
   8817 
   8818 /* Translate a NEON instruction.    If successful, returns
   8819    True and *dres may or may not be updated.  If failure, returns
   8820    False and doesn't change *dres nor create any IR.
   8821 
   8822    The Thumb and ARM encodings are similar for the 24 bottom bits, but
   8823    the top 8 bits are slightly different.  In both cases, the caller
   8824    must pass the entire 32 bits.  Callers may pass any instruction;
   8825    this ignores non-NEON ones.
   8826 
   8827    Caller must supply an IRTemp 'condT' holding the gating condition,
   8828    or IRTemp_INVALID indicating the insn is always executed.  In ARM
   8829    code, this must always be IRTemp_INVALID because NEON insns are
   8830    unconditional for ARM.
   8831 
   8832    Finally, the caller must indicate whether this occurs in ARM or in
   8833    Thumb code.
   8834 */
   8835 static Bool decode_NEON_instruction (
   8836                /*MOD*/DisResult* dres,
   8837                UInt              insn32,
   8838                IRTemp            condT,
   8839                Bool              isT
   8840             )
   8841 {
   8842 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn32, (_bMax), (_bMin))
   8843 
   8844    /* There are two kinds of instruction to deal with: load/store and
   8845       data processing.  In each case, in ARM mode we merely identify
   8846       the kind, and pass it on to the relevant sub-handler.  In Thumb
   8847       mode we identify the kind, swizzle the bits around to make it
   8848       have the same encoding as in ARM, and hand it on to the
   8849       sub-handler.
   8850    */
   8851 
   8852    /* In ARM mode, NEON instructions can't be conditional. */
   8853    if (!isT)
   8854       vassert(condT == IRTemp_INVALID);
   8855 
   8856    /* Data processing:
   8857       Thumb: 111U 1111 AAAA Axxx xxxx BBBB CCCC xxxx
   8858       ARM:   1111 001U AAAA Axxx xxxx BBBB CCCC xxxx
   8859    */
   8860    if (!isT && INSN(31,25) == BITS7(1,1,1,1,0,0,1)) {
   8861       // ARM, DP
   8862       return dis_neon_data_processing(INSN(31,0), condT);
   8863    }
   8864    if (isT && INSN(31,29) == BITS3(1,1,1)
   8865        && INSN(27,24) == BITS4(1,1,1,1)) {
   8866       // Thumb, DP
   8867       UInt reformatted = INSN(23,0);
   8868       reformatted |= (INSN(28,28) << 24); // U bit
   8869       reformatted |= (BITS7(1,1,1,1,0,0,1) << 25);
   8870       return dis_neon_data_processing(reformatted, condT);
   8871    }
   8872 
   8873    /* Load/store:
   8874       Thumb: 1111 1001 AxL0 xxxx xxxx BBBB xxxx xxxx
   8875       ARM:   1111 0100 AxL0 xxxx xxxx BBBB xxxx xxxx
   8876    */
   8877    if (!isT && INSN(31,24) == BITS8(1,1,1,1,0,1,0,0)) {
   8878       // ARM, memory
   8879       return dis_neon_load_or_store(INSN(31,0), isT, condT);
   8880    }
   8881    if (isT && INSN(31,24) == BITS8(1,1,1,1,1,0,0,1)) {
   8882       UInt reformatted = INSN(23,0);
   8883       reformatted |= (BITS8(1,1,1,1,0,1,0,0) << 24);
   8884       return dis_neon_load_or_store(reformatted, isT, condT);
   8885    }
   8886 
   8887    /* Doesn't match. */
   8888    return False;
   8889 
   8890 #  undef INSN
   8891 }
   8892 
   8893 
   8894 /*------------------------------------------------------------*/
   8895 /*--- V6 MEDIA instructions                                ---*/
   8896 /*------------------------------------------------------------*/
   8897 
   8898 /* Both ARM and Thumb */
   8899 
   8900 /* Translate a V6 media instruction.    If successful, returns
   8901    True and *dres may or may not be updated.  If failure, returns
   8902    False and doesn't change *dres nor create any IR.
   8903 
   8904    The Thumb and ARM encodings are completely different.  In Thumb
   8905    mode, the caller must pass the entire 32 bits.  In ARM mode it must
   8906    pass the lower 28 bits.  Apart from that, callers may pass any
   8907    instruction; this function ignores anything it doesn't recognise.
   8908 
   8909    Caller must supply an IRTemp 'condT' holding the gating condition,
   8910    or IRTemp_INVALID indicating the insn is always executed.
   8911 
   8912    Caller must also supply an ARMCondcode 'cond'.  This is only used
   8913    for debug printing, no other purpose.  For ARM, this is simply the
   8914    top 4 bits of the original instruction.  For Thumb, the condition
   8915    is not (really) known until run time, and so ARMCondAL should be
   8916    passed, only so that printing of these instructions does not show
   8917    any condition.
   8918 
   8919    Finally, the caller must indicate whether this occurs in ARM or in
   8920    Thumb code.
   8921 */
   8922 static Bool decode_V6MEDIA_instruction (
   8923                /*MOD*/DisResult* dres,
   8924                UInt              insnv6m,
   8925                IRTemp            condT,
   8926                ARMCondcode       conq,
   8927                Bool              isT
   8928             )
   8929 {
   8930 #  define INSNA(_bMax,_bMin)   SLICE_UInt(insnv6m, (_bMax), (_bMin))
   8931 #  define INSNT0(_bMax,_bMin)  SLICE_UInt( ((insnv6m >> 16) & 0xFFFF), \
   8932                                            (_bMax), (_bMin) )
   8933 #  define INSNT1(_bMax,_bMin)  SLICE_UInt( ((insnv6m >> 0)  & 0xFFFF), \
   8934                                            (_bMax), (_bMin) )
   8935    HChar dis_buf[128];
   8936    dis_buf[0] = 0;
   8937 
   8938    if (isT) {
   8939       vassert(conq == ARMCondAL);
   8940    } else {
   8941       vassert(INSNA(31,28) == BITS4(0,0,0,0)); // caller's obligation
   8942       vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
   8943    }
   8944 
   8945    /* ----------- smulbb, smulbt, smultb, smultt ----------- */
   8946    {
   8947      UInt regD = 99, regM = 99, regN = 99, bitM = 0, bitN = 0;
   8948      Bool gate = False;
   8949 
   8950      if (isT) {
   8951         if (INSNT0(15,4) == 0xFB1 && INSNT1(15,12) == BITS4(1,1,1,1)
   8952             && INSNT1(7,6) == BITS2(0,0)) {
   8953            regD = INSNT1(11,8);
   8954            regM = INSNT1(3,0);
   8955            regN = INSNT0(3,0);
   8956            bitM = INSNT1(4,4);
   8957            bitN = INSNT1(5,5);
   8958            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   8959               gate = True;
   8960         }
   8961      } else {
   8962         if (BITS8(0,0,0,1,0,1,1,0) == INSNA(27,20) &&
   8963             BITS4(0,0,0,0)         == INSNA(15,12) &&
   8964             BITS4(1,0,0,0)         == (INSNA(7,4) & BITS4(1,0,0,1)) ) {
   8965            regD = INSNA(19,16);
   8966            regM = INSNA(11,8);
   8967            regN = INSNA(3,0);
   8968            bitM = INSNA(6,6);
   8969            bitN = INSNA(5,5);
   8970            if (regD != 15 && regN != 15 && regM != 15)
   8971               gate = True;
   8972         }
   8973      }
   8974 
   8975      if (gate) {
   8976         IRTemp srcN = newTemp(Ity_I32);
   8977         IRTemp srcM = newTemp(Ity_I32);
   8978         IRTemp res  = newTemp(Ity_I32);
   8979 
   8980         assign( srcN, binop(Iop_Sar32,
   8981                             binop(Iop_Shl32,
   8982                                   isT ? getIRegT(regN) : getIRegA(regN),
   8983                                   mkU8(bitN ? 0 : 16)), mkU8(16)) );
   8984         assign( srcM, binop(Iop_Sar32,
   8985                             binop(Iop_Shl32,
   8986                                   isT ? getIRegT(regM) : getIRegA(regM),
   8987                                   mkU8(bitM ? 0 : 16)), mkU8(16)) );
   8988         assign( res, binop(Iop_Mul32, mkexpr(srcN), mkexpr(srcM)) );
   8989 
   8990         if (isT)
   8991            putIRegT( regD, mkexpr(res), condT );
   8992         else
   8993            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   8994 
   8995         DIP( "smul%c%c%s r%u, r%u, r%u\n", bitN ? 't' : 'b', bitM ? 't' : 'b',
   8996              nCC(conq), regD, regN, regM );
   8997         return True;
   8998      }
   8999      /* fall through */
   9000    }
   9001 
   9002    /* ------------ smulwb<y><c> <Rd>,<Rn>,<Rm> ------------- */
   9003    /* ------------ smulwt<y><c> <Rd>,<Rn>,<Rm> ------------- */
   9004    {
   9005      UInt regD = 99, regN = 99, regM = 99, bitM = 0;
   9006      Bool gate = False;
   9007 
   9008      if (isT) {
   9009         if (INSNT0(15,4) == 0xFB3 && INSNT1(15,12) == BITS4(1,1,1,1)
   9010             && INSNT1(7,5) == BITS3(0,0,0)) {
   9011           regN = INSNT0(3,0);
   9012           regD = INSNT1(11,8);
   9013           regM = INSNT1(3,0);
   9014           bitM = INSNT1(4,4);
   9015           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9016              gate = True;
   9017         }
   9018      } else {
   9019         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
   9020             INSNA(15,12) == BITS4(0,0,0,0)         &&
   9021             (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,1,0)) {
   9022            regD = INSNA(19,16);
   9023            regN = INSNA(3,0);
   9024            regM = INSNA(11,8);
   9025            bitM = INSNA(6,6);
   9026            if (regD != 15 && regN != 15 && regM != 15)
   9027               gate = True;
   9028         }
   9029      }
   9030 
   9031      if (gate) {
   9032         IRTemp irt_prod = newTemp(Ity_I64);
   9033 
   9034         assign( irt_prod,
   9035                 binop(Iop_MullS32,
   9036                       isT ? getIRegT(regN) : getIRegA(regN),
   9037                       binop(Iop_Sar32,
   9038                             binop(Iop_Shl32,
   9039                                   isT ? getIRegT(regM) : getIRegA(regM),
   9040                                   mkU8(bitM ? 0 : 16)),
   9041                             mkU8(16))) );
   9042 
   9043         IRExpr* ire_result = binop(Iop_Or32,
   9044                                    binop( Iop_Shl32,
   9045                                           unop(Iop_64HIto32, mkexpr(irt_prod)),
   9046                                           mkU8(16) ),
   9047                                    binop( Iop_Shr32,
   9048                                           unop(Iop_64to32, mkexpr(irt_prod)),
   9049                                           mkU8(16) ) );
   9050 
   9051         if (isT)
   9052            putIRegT( regD, ire_result, condT );
   9053         else
   9054            putIRegA( regD, ire_result, condT, Ijk_Boring );
   9055 
   9056         DIP("smulw%c%s r%u, r%u, r%u\n",
   9057             bitM ? 't' : 'b', nCC(conq),regD,regN,regM);
   9058         return True;
   9059      }
   9060      /* fall through */
   9061    }
   9062 
   9063    /* ------------ pkhbt<c> Rd, Rn, Rm {,LSL #imm} ------------- */
   9064    /* ------------ pkhtb<c> Rd, Rn, Rm {,ASR #imm} ------------- */
   9065    {
   9066      UInt regD = 99, regN = 99, regM = 99, imm5 = 99, shift_type = 99;
   9067      Bool tbform = False;
   9068      Bool gate = False;
   9069 
   9070      if (isT) {
   9071         if (INSNT0(15,4) == 0xEAC
   9072             && INSNT1(15,15) == 0 && INSNT1(4,4) == 0) {
   9073            regN = INSNT0(3,0);
   9074            regD = INSNT1(11,8);
   9075            regM = INSNT1(3,0);
   9076            imm5 = (INSNT1(14,12) << 2) | INSNT1(7,6);
   9077            shift_type = (INSNT1(5,5) << 1) | 0;
   9078            tbform = (INSNT1(5,5) == 0) ? False : True;
   9079            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9080               gate = True;
   9081         }
   9082      } else {
   9083         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
   9084             INSNA(5,4)   == BITS2(0,1)             &&
   9085             (INSNA(6,6)  == 0 || INSNA(6,6) == 1) ) {
   9086            regD = INSNA(15,12);
   9087            regN = INSNA(19,16);
   9088            regM = INSNA(3,0);
   9089            imm5 = INSNA(11,7);
   9090            shift_type = (INSNA(6,6) << 1) | 0;
   9091            tbform = (INSNA(6,6) == 0) ? False : True;
   9092            if (regD != 15 && regN != 15 && regM != 15)
   9093               gate = True;
   9094         }
   9095      }
   9096 
   9097      if (gate) {
   9098         IRTemp irt_regM       = newTemp(Ity_I32);
   9099         IRTemp irt_regM_shift = newTemp(Ity_I32);
   9100         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   9101         compute_result_and_C_after_shift_by_imm5(
   9102            dis_buf, &irt_regM_shift, NULL, irt_regM, shift_type, imm5, regM );
   9103 
   9104         UInt mask = (tbform == True) ? 0x0000FFFF : 0xFFFF0000;
   9105         IRExpr* ire_result
   9106           = binop( Iop_Or32,
   9107                    binop(Iop_And32, mkexpr(irt_regM_shift), mkU32(mask)),
   9108                    binop(Iop_And32, isT ? getIRegT(regN) : getIRegA(regN),
   9109                                     unop(Iop_Not32, mkU32(mask))) );
   9110 
   9111         if (isT)
   9112            putIRegT( regD, ire_result, condT );
   9113         else
   9114            putIRegA( regD, ire_result, condT, Ijk_Boring );
   9115 
   9116         DIP( "pkh%s%s r%u, r%u, r%u %s\n", tbform ? "tb" : "bt",
   9117              nCC(conq), regD, regN, regM, dis_buf );
   9118 
   9119         return True;
   9120      }
   9121      /* fall through */
   9122    }
   9123 
   9124    /* ---------- usat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
   9125    {
   9126      UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
   9127      Bool gate = False;
   9128 
   9129      if (isT) {
   9130         if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,1,0)
   9131             && INSNT0(4,4) == 0
   9132             && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
   9133            regD       = INSNT1(11,8);
   9134            regN       = INSNT0(3,0);
   9135            shift_type = (INSNT0(5,5) << 1) | 0;
   9136            imm5       = (INSNT1(14,12) << 2) | INSNT1(7,6);
   9137            sat_imm    = INSNT1(4,0);
   9138            if (!isBadRegT(regD) && !isBadRegT(regN))
   9139               gate = True;
   9140            if (shift_type == BITS2(1,0) && imm5 == 0)
   9141               gate = False;
   9142         }
   9143      } else {
   9144         if (INSNA(27,21) == BITS7(0,1,1,0,1,1,1) &&
   9145             INSNA(5,4)   == BITS2(0,1)) {
   9146            regD       = INSNA(15,12);
   9147            regN       = INSNA(3,0);
   9148            shift_type = (INSNA(6,6) << 1) | 0;
   9149            imm5       = INSNA(11,7);
   9150            sat_imm    = INSNA(20,16);
   9151            if (regD != 15 && regN != 15)
   9152               gate = True;
   9153         }
   9154      }
   9155 
   9156      if (gate) {
   9157         IRTemp irt_regN       = newTemp(Ity_I32);
   9158         IRTemp irt_regN_shift = newTemp(Ity_I32);
   9159         IRTemp irt_sat_Q      = newTemp(Ity_I32);
   9160         IRTemp irt_result     = newTemp(Ity_I32);
   9161 
   9162         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   9163         compute_result_and_C_after_shift_by_imm5(
   9164                 dis_buf, &irt_regN_shift, NULL,
   9165                 irt_regN, shift_type, imm5, regN );
   9166 
   9167         armUnsignedSatQ( &irt_result, &irt_sat_Q, irt_regN_shift, sat_imm );
   9168         or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
   9169 
   9170         if (isT)
   9171            putIRegT( regD, mkexpr(irt_result), condT );
   9172         else
   9173            putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
   9174 
   9175         DIP("usat%s r%u, #0x%04x, %s\n",
   9176             nCC(conq), regD, imm5, dis_buf);
   9177         return True;
   9178      }
   9179      /* fall through */
   9180    }
   9181 
   9182   /* ----------- ssat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
   9183    {
   9184      UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
   9185      Bool gate = False;
   9186 
   9187      if (isT) {
   9188         if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,0,0)
   9189             && INSNT0(4,4) == 0
   9190             && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
   9191            regD       = INSNT1(11,8);
   9192            regN       = INSNT0(3,0);
   9193            shift_type = (INSNT0(5,5) << 1) | 0;
   9194            imm5       = (INSNT1(14,12) << 2) | INSNT1(7,6);
   9195            sat_imm    = INSNT1(4,0) + 1;
   9196            if (!isBadRegT(regD) && !isBadRegT(regN))
   9197               gate = True;
   9198            if (shift_type == BITS2(1,0) && imm5 == 0)
   9199               gate = False;
   9200         }
   9201      } else {
   9202         if (INSNA(27,21) == BITS7(0,1,1,0,1,0,1) &&
   9203             INSNA(5,4)   == BITS2(0,1)) {
   9204            regD       = INSNA(15,12);
   9205            regN       = INSNA(3,0);
   9206            shift_type = (INSNA(6,6) << 1) | 0;
   9207            imm5       = INSNA(11,7);
   9208            sat_imm    = INSNA(20,16) + 1;
   9209            if (regD != 15 && regN != 15)
   9210               gate = True;
   9211         }
   9212      }
   9213 
   9214      if (gate) {
   9215         IRTemp irt_regN       = newTemp(Ity_I32);
   9216         IRTemp irt_regN_shift = newTemp(Ity_I32);
   9217         IRTemp irt_sat_Q      = newTemp(Ity_I32);
   9218         IRTemp irt_result     = newTemp(Ity_I32);
   9219 
   9220         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   9221         compute_result_and_C_after_shift_by_imm5(
   9222                 dis_buf, &irt_regN_shift, NULL,
   9223                 irt_regN, shift_type, imm5, regN );
   9224 
   9225         armSignedSatQ( irt_regN_shift, sat_imm, &irt_result, &irt_sat_Q );
   9226         or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
   9227 
   9228         if (isT)
   9229            putIRegT( regD, mkexpr(irt_result), condT );
   9230         else
   9231            putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
   9232 
   9233         DIP( "ssat%s r%u, #0x%04x, %s\n",
   9234              nCC(conq), regD, imm5, dis_buf);
   9235         return True;
   9236     }
   9237     /* fall through */
   9238   }
   9239 
   9240    /* ----------- ssat16<c> <Rd>,#<imm>,<Rn> ----------- */
   9241    {
   9242      UInt regD = 99, regN = 99, sat_imm = 99;
   9243      Bool gate = False;
   9244 
   9245      if (isT) {
   9246         if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,0,0)
   9247             && INSNT0(5,4) == BITS2(1,0)
   9248             && INSNT1(15,12) == BITS4(0,0,0,0)
   9249             && INSNT1(7,4) == BITS4(0,0,0,0)) {
   9250            regD       = INSNT1(11,8);
   9251            regN       = INSNT0(3,0);
   9252            sat_imm    = INSNT1(3,0) + 1;
   9253            if (!isBadRegT(regD) && !isBadRegT(regN))
   9254               gate = True;
   9255         }
   9256      } else {
   9257         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,1,0) &&
   9258             INSNA(11,4)   == BITS8(1,1,1,1,0,0,1,1)) {
   9259            regD       = INSNA(15,12);
   9260            regN       = INSNA(3,0);
   9261            sat_imm    = INSNA(19,16) + 1;
   9262            if (regD != 15 && regN != 15)
   9263               gate = True;
   9264         }
   9265      }
   9266 
   9267      if (gate) {
   9268         IRTemp irt_regN    = newTemp(Ity_I32);
   9269         IRTemp irt_regN_lo = newTemp(Ity_I32);
   9270         IRTemp irt_regN_hi = newTemp(Ity_I32);
   9271         IRTemp irt_Q_lo    = newTemp(Ity_I32);
   9272         IRTemp irt_Q_hi    = newTemp(Ity_I32);
   9273         IRTemp irt_res_lo  = newTemp(Ity_I32);
   9274         IRTemp irt_res_hi  = newTemp(Ity_I32);
   9275 
   9276         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   9277         assign( irt_regN_lo,
   9278                 binop( Iop_Sar32,
   9279                        binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
   9280                        mkU8(16)) );
   9281         assign( irt_regN_hi, binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)) );
   9282 
   9283         armSignedSatQ( irt_regN_lo, sat_imm, &irt_res_lo, &irt_Q_lo );
   9284         or_into_QFLAG32( mkexpr(irt_Q_lo), condT );
   9285 
   9286         armSignedSatQ( irt_regN_hi, sat_imm, &irt_res_hi, &irt_Q_hi );
   9287         or_into_QFLAG32( mkexpr(irt_Q_hi), condT );
   9288 
   9289         IRExpr* ire_result
   9290            = binop(Iop_Or32,
   9291                    binop(Iop_And32, mkexpr(irt_res_lo), mkU32(0xFFFF)),
   9292                    binop(Iop_Shl32, mkexpr(irt_res_hi), mkU8(16)));
   9293         if (isT)
   9294            putIRegT( regD, ire_result, condT );
   9295         else
   9296            putIRegA( regD, ire_result, condT, Ijk_Boring );
   9297 
   9298         DIP( "ssat16%s r%u, #0x%04x, r%u\n", nCC(conq), regD, sat_imm, regN );
   9299         return True;
   9300      }
   9301      /* fall through */
   9302    }
   9303 
   9304    /* -------------- usat16<c> <Rd>,#<imm4>,<Rn> --------------- */
   9305    {
   9306      UInt regD = 99, regN = 99, sat_imm = 99;
   9307      Bool gate = False;
   9308 
   9309      if (isT) {
   9310         if (INSNT0(15,4) == 0xF3A && (INSNT1(15,0) & 0xF0F0) == 0x0000) {
   9311            regN = INSNT0(3,0);
   9312            regD = INSNT1(11,8);
   9313            sat_imm = INSNT1(3,0);
   9314            if (!isBadRegT(regD) && !isBadRegT(regN))
   9315               gate = True;
   9316        }
   9317      } else {
   9318         if (INSNA(27,20) == BITS8(0,1,1,0,1,1,1,0) &&
   9319             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9320             INSNA(7,4)   == BITS4(0,0,1,1)) {
   9321            regD    = INSNA(15,12);
   9322            regN    = INSNA(3,0);
   9323            sat_imm = INSNA(19,16);
   9324            if (regD != 15 && regN != 15)
   9325               gate = True;
   9326         }
   9327      }
   9328 
   9329      if (gate) {
   9330         IRTemp irt_regN    = newTemp(Ity_I32);
   9331         IRTemp irt_regN_lo = newTemp(Ity_I32);
   9332         IRTemp irt_regN_hi = newTemp(Ity_I32);
   9333         IRTemp irt_Q_lo    = newTemp(Ity_I32);
   9334         IRTemp irt_Q_hi    = newTemp(Ity_I32);
   9335         IRTemp irt_res_lo  = newTemp(Ity_I32);
   9336         IRTemp irt_res_hi  = newTemp(Ity_I32);
   9337 
   9338         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   9339         assign( irt_regN_lo, binop( Iop_Sar32,
   9340                                     binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
   9341                                     mkU8(16)) );
   9342         assign( irt_regN_hi, binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)) );
   9343 
   9344         armUnsignedSatQ( &irt_res_lo, &irt_Q_lo, irt_regN_lo, sat_imm );
   9345         or_into_QFLAG32( mkexpr(irt_Q_lo), condT );
   9346 
   9347         armUnsignedSatQ( &irt_res_hi, &irt_Q_hi, irt_regN_hi, sat_imm );
   9348         or_into_QFLAG32( mkexpr(irt_Q_hi), condT );
   9349 
   9350         IRExpr* ire_result = binop( Iop_Or32,
   9351                                     binop(Iop_Shl32, mkexpr(irt_res_hi), mkU8(16)),
   9352                                     mkexpr(irt_res_lo) );
   9353 
   9354         if (isT)
   9355            putIRegT( regD, ire_result, condT );
   9356         else
   9357            putIRegA( regD, ire_result, condT, Ijk_Boring );
   9358 
   9359         DIP( "usat16%s r%u, #0x%04x, r%u\n", nCC(conq), regD, sat_imm, regN );
   9360         return True;
   9361      }
   9362      /* fall through */
   9363    }
   9364 
   9365    /* -------------- uadd16<c> <Rd>,<Rn>,<Rm> -------------- */
   9366    {
   9367      UInt regD = 99, regN = 99, regM = 99;
   9368      Bool gate = False;
   9369 
   9370      if (isT) {
   9371         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
   9372            regN = INSNT0(3,0);
   9373            regD = INSNT1(11,8);
   9374            regM = INSNT1(3,0);
   9375            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9376               gate = True;
   9377         }
   9378      } else {
   9379         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
   9380             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9381             INSNA(7,4)   == BITS4(0,0,0,1)) {
   9382            regD = INSNA(15,12);
   9383            regN = INSNA(19,16);
   9384            regM = INSNA(3,0);
   9385            if (regD != 15 && regN != 15 && regM != 15)
   9386               gate = True;
   9387         }
   9388      }
   9389 
   9390      if (gate) {
   9391         IRTemp rNt  = newTemp(Ity_I32);
   9392         IRTemp rMt  = newTemp(Ity_I32);
   9393         IRTemp res  = newTemp(Ity_I32);
   9394         IRTemp reso = newTemp(Ity_I32);
   9395 
   9396         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9397         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9398 
   9399         assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
   9400         if (isT)
   9401            putIRegT( regD, mkexpr(res), condT );
   9402         else
   9403            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9404 
   9405         assign(reso, binop(Iop_HAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
   9406         set_GE_32_10_from_bits_31_15(reso, condT);
   9407 
   9408         DIP("uadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9409         return True;
   9410      }
   9411      /* fall through */
   9412    }
   9413 
   9414    /* -------------- sadd16<c> <Rd>,<Rn>,<Rm> -------------- */
   9415    {
   9416      UInt regD = 99, regN = 99, regM = 99;
   9417      Bool gate = False;
   9418 
   9419      if (isT) {
   9420         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
   9421            regN = INSNT0(3,0);
   9422            regD = INSNT1(11,8);
   9423            regM = INSNT1(3,0);
   9424            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9425               gate = True;
   9426         }
   9427      } else {
   9428         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
   9429             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9430             INSNA(7,4)   == BITS4(0,0,0,1)) {
   9431            regD = INSNA(15,12);
   9432            regN = INSNA(19,16);
   9433            regM = INSNA(3,0);
   9434            if (regD != 15 && regN != 15 && regM != 15)
   9435               gate = True;
   9436         }
   9437      }
   9438 
   9439      if (gate) {
   9440         IRTemp rNt  = newTemp(Ity_I32);
   9441         IRTemp rMt  = newTemp(Ity_I32);
   9442         IRTemp res  = newTemp(Ity_I32);
   9443         IRTemp reso = newTemp(Ity_I32);
   9444 
   9445         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9446         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9447 
   9448         assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
   9449         if (isT)
   9450            putIRegT( regD, mkexpr(res), condT );
   9451         else
   9452            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9453 
   9454         assign(reso, unop(Iop_Not32,
   9455                           binop(Iop_HAdd16Sx2, mkexpr(rNt), mkexpr(rMt))));
   9456         set_GE_32_10_from_bits_31_15(reso, condT);
   9457 
   9458         DIP("sadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9459         return True;
   9460      }
   9461      /* fall through */
   9462    }
   9463 
   9464    /* ---------------- usub16<c> <Rd>,<Rn>,<Rm> ---------------- */
   9465    {
   9466      UInt regD = 99, regN = 99, regM = 99;
   9467      Bool gate = False;
   9468 
   9469      if (isT) {
   9470         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
   9471            regN = INSNT0(3,0);
   9472            regD = INSNT1(11,8);
   9473            regM = INSNT1(3,0);
   9474            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9475               gate = True;
   9476         }
   9477      } else {
   9478         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
   9479             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9480             INSNA(7,4)   == BITS4(0,1,1,1)) {
   9481            regD = INSNA(15,12);
   9482            regN = INSNA(19,16);
   9483            regM = INSNA(3,0);
   9484            if (regD != 15 && regN != 15 && regM != 15)
   9485              gate = True;
   9486         }
   9487      }
   9488 
   9489      if (gate) {
   9490         IRTemp rNt  = newTemp(Ity_I32);
   9491         IRTemp rMt  = newTemp(Ity_I32);
   9492         IRTemp res  = newTemp(Ity_I32);
   9493         IRTemp reso = newTemp(Ity_I32);
   9494 
   9495         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9496         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9497 
   9498         assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
   9499         if (isT)
   9500            putIRegT( regD, mkexpr(res), condT );
   9501         else
   9502            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9503 
   9504         assign(reso, unop(Iop_Not32,
   9505                           binop(Iop_HSub16Ux2, mkexpr(rNt), mkexpr(rMt))));
   9506         set_GE_32_10_from_bits_31_15(reso, condT);
   9507 
   9508         DIP("usub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9509         return True;
   9510      }
   9511      /* fall through */
   9512    }
   9513 
   9514    /* -------------- ssub16<c> <Rd>,<Rn>,<Rm> -------------- */
   9515    {
   9516      UInt regD = 99, regN = 99, regM = 99;
   9517      Bool gate = False;
   9518 
   9519      if (isT) {
   9520         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
   9521            regN = INSNT0(3,0);
   9522            regD = INSNT1(11,8);
   9523            regM = INSNT1(3,0);
   9524            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9525               gate = True;
   9526         }
   9527      } else {
   9528         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
   9529             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9530             INSNA(7,4)   == BITS4(0,1,1,1)) {
   9531            regD = INSNA(15,12);
   9532            regN = INSNA(19,16);
   9533            regM = INSNA(3,0);
   9534            if (regD != 15 && regN != 15 && regM != 15)
   9535               gate = True;
   9536         }
   9537      }
   9538 
   9539      if (gate) {
   9540         IRTemp rNt  = newTemp(Ity_I32);
   9541         IRTemp rMt  = newTemp(Ity_I32);
   9542         IRTemp res  = newTemp(Ity_I32);
   9543         IRTemp reso = newTemp(Ity_I32);
   9544 
   9545         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9546         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9547 
   9548         assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
   9549         if (isT)
   9550            putIRegT( regD, mkexpr(res), condT );
   9551         else
   9552            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9553 
   9554         assign(reso, unop(Iop_Not32,
   9555                           binop(Iop_HSub16Sx2, mkexpr(rNt), mkexpr(rMt))));
   9556         set_GE_32_10_from_bits_31_15(reso, condT);
   9557 
   9558         DIP("ssub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9559         return True;
   9560      }
   9561      /* fall through */
   9562    }
   9563 
   9564    /* ----------------- uadd8<c> <Rd>,<Rn>,<Rm> ---------------- */
   9565    {
   9566      UInt regD = 99, regN = 99, regM = 99;
   9567      Bool gate = False;
   9568 
   9569      if (isT) {
   9570         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
   9571            regN = INSNT0(3,0);
   9572            regD = INSNT1(11,8);
   9573            regM = INSNT1(3,0);
   9574            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9575               gate = True;
   9576         }
   9577      } else {
   9578         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
   9579             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9580             (INSNA(7,4)  == BITS4(1,0,0,1))) {
   9581            regD = INSNA(15,12);
   9582            regN = INSNA(19,16);
   9583            regM = INSNA(3,0);
   9584            if (regD != 15 && regN != 15 && regM != 15)
   9585               gate = True;
   9586         }
   9587      }
   9588 
   9589      if (gate) {
   9590         IRTemp rNt  = newTemp(Ity_I32);
   9591         IRTemp rMt  = newTemp(Ity_I32);
   9592         IRTemp res  = newTemp(Ity_I32);
   9593         IRTemp reso = newTemp(Ity_I32);
   9594 
   9595         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9596         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9597 
   9598         assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
   9599         if (isT)
   9600            putIRegT( regD, mkexpr(res), condT );
   9601         else
   9602            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9603 
   9604         assign(reso, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
   9605         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
   9606 
   9607         DIP("uadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9608         return True;
   9609      }
   9610      /* fall through */
   9611    }
   9612 
   9613    /* ------------------- sadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
   9614    {
   9615      UInt regD = 99, regN = 99, regM = 99;
   9616      Bool gate = False;
   9617 
   9618      if (isT) {
   9619         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
   9620            regN = INSNT0(3,0);
   9621            regD = INSNT1(11,8);
   9622            regM = INSNT1(3,0);
   9623            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9624               gate = True;
   9625         }
   9626      } else {
   9627         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
   9628             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9629             (INSNA(7,4)  == BITS4(1,0,0,1))) {
   9630            regD = INSNA(15,12);
   9631            regN = INSNA(19,16);
   9632            regM = INSNA(3,0);
   9633            if (regD != 15 && regN != 15 && regM != 15)
   9634               gate = True;
   9635         }
   9636      }
   9637 
   9638      if (gate) {
   9639         IRTemp rNt  = newTemp(Ity_I32);
   9640         IRTemp rMt  = newTemp(Ity_I32);
   9641         IRTemp res  = newTemp(Ity_I32);
   9642         IRTemp reso = newTemp(Ity_I32);
   9643 
   9644         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9645         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9646 
   9647         assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
   9648         if (isT)
   9649            putIRegT( regD, mkexpr(res), condT );
   9650         else
   9651            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9652 
   9653         assign(reso, unop(Iop_Not32,
   9654                           binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt))));
   9655         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
   9656 
   9657         DIP("sadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9658         return True;
   9659      }
   9660      /* fall through */
   9661    }
   9662 
   9663    /* ------------------- usub8<c> <Rd>,<Rn>,<Rm> ------------------ */
   9664    {
   9665      UInt regD = 99, regN = 99, regM = 99;
   9666      Bool gate = False;
   9667 
   9668      if (isT) {
   9669         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
   9670            regN = INSNT0(3,0);
   9671            regD = INSNT1(11,8);
   9672            regM = INSNT1(3,0);
   9673            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9674               gate = True;
   9675         }
   9676      } else {
   9677         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
   9678             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9679             (INSNA(7,4)  == BITS4(1,1,1,1))) {
   9680            regD = INSNA(15,12);
   9681            regN = INSNA(19,16);
   9682            regM = INSNA(3,0);
   9683            if (regD != 15 && regN != 15 && regM != 15)
   9684              gate = True;
   9685         }
   9686      }
   9687 
   9688      if (gate) {
   9689         IRTemp rNt  = newTemp(Ity_I32);
   9690         IRTemp rMt  = newTemp(Ity_I32);
   9691         IRTemp res  = newTemp(Ity_I32);
   9692         IRTemp reso = newTemp(Ity_I32);
   9693 
   9694         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9695         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9696 
   9697         assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
   9698         if (isT)
   9699            putIRegT( regD, mkexpr(res), condT );
   9700         else
   9701            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9702 
   9703         assign(reso, unop(Iop_Not32,
   9704                           binop(Iop_HSub8Ux4, mkexpr(rNt), mkexpr(rMt))));
   9705         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
   9706 
   9707         DIP("usub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9708         return True;
   9709      }
   9710      /* fall through */
   9711    }
   9712 
   9713    /* ------------------- ssub8<c> <Rd>,<Rn>,<Rm> ------------------ */
   9714    {
   9715      UInt regD = 99, regN = 99, regM = 99;
   9716      Bool gate = False;
   9717 
   9718      if (isT) {
   9719         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
   9720            regN = INSNT0(3,0);
   9721            regD = INSNT1(11,8);
   9722            regM = INSNT1(3,0);
   9723            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9724               gate = True;
   9725         }
   9726      } else {
   9727         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
   9728             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9729             INSNA(7,4)   == BITS4(1,1,1,1)) {
   9730            regD = INSNA(15,12);
   9731            regN = INSNA(19,16);
   9732            regM = INSNA(3,0);
   9733            if (regD != 15 && regN != 15 && regM != 15)
   9734               gate = True;
   9735         }
   9736      }
   9737 
   9738      if (gate) {
   9739         IRTemp rNt  = newTemp(Ity_I32);
   9740         IRTemp rMt  = newTemp(Ity_I32);
   9741         IRTemp res  = newTemp(Ity_I32);
   9742         IRTemp reso = newTemp(Ity_I32);
   9743 
   9744         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9745         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9746 
   9747         assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
   9748         if (isT)
   9749            putIRegT( regD, mkexpr(res), condT );
   9750         else
   9751            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9752 
   9753         assign(reso, unop(Iop_Not32,
   9754                           binop(Iop_HSub8Sx4, mkexpr(rNt), mkexpr(rMt))));
   9755         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
   9756 
   9757         DIP("ssub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9758         return True;
   9759      }
   9760      /* fall through */
   9761    }
   9762 
   9763    /* ------------------ qadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
   9764    {
   9765      UInt regD = 99, regN = 99, regM = 99;
   9766      Bool gate = False;
   9767 
   9768      if (isT) {
   9769         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
   9770            regN = INSNT0(3,0);
   9771            regD = INSNT1(11,8);
   9772            regM = INSNT1(3,0);
   9773            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9774               gate = True;
   9775         }
   9776      } else {
   9777         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
   9778             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9779             INSNA(7,4)   == BITS4(1,0,0,1)) {
   9780            regD = INSNA(15,12);
   9781            regN = INSNA(19,16);
   9782            regM = INSNA(3,0);
   9783            if (regD != 15 && regN != 15 && regM != 15)
   9784               gate = True;
   9785         }
   9786      }
   9787 
   9788      if (gate) {
   9789         IRTemp rNt   = newTemp(Ity_I32);
   9790         IRTemp rMt   = newTemp(Ity_I32);
   9791         IRTemp res_q = newTemp(Ity_I32);
   9792 
   9793         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9794         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9795 
   9796         assign(res_q, binop(Iop_QAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
   9797         if (isT)
   9798            putIRegT( regD, mkexpr(res_q), condT );
   9799         else
   9800            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   9801 
   9802         DIP("qadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9803         return True;
   9804      }
   9805      /* fall through */
   9806    }
   9807 
   9808    /* ------------------ qsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
   9809    {
   9810      UInt regD = 99, regN = 99, regM = 99;
   9811      Bool gate = False;
   9812 
   9813      if (isT) {
   9814         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
   9815            regN = INSNT0(3,0);
   9816            regD = INSNT1(11,8);
   9817            regM = INSNT1(3,0);
   9818            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9819               gate = True;
   9820         }
   9821      } else {
   9822         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
   9823             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9824             INSNA(7,4)   == BITS4(1,1,1,1)) {
   9825            regD = INSNA(15,12);
   9826            regN = INSNA(19,16);
   9827            regM = INSNA(3,0);
   9828            if (regD != 15 && regN != 15 && regM != 15)
   9829               gate = True;
   9830         }
   9831      }
   9832 
   9833      if (gate) {
   9834         IRTemp rNt   = newTemp(Ity_I32);
   9835         IRTemp rMt   = newTemp(Ity_I32);
   9836         IRTemp res_q = newTemp(Ity_I32);
   9837 
   9838         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9839         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9840 
   9841         assign(res_q, binop(Iop_QSub8Sx4, mkexpr(rNt), mkexpr(rMt)));
   9842         if (isT)
   9843            putIRegT( regD, mkexpr(res_q), condT );
   9844         else
   9845            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   9846 
   9847         DIP("qsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9848         return True;
   9849      }
   9850      /* fall through */
   9851    }
   9852 
   9853    /* ------------------ uqadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
   9854    {
   9855      UInt regD = 99, regN = 99, regM = 99;
   9856      Bool gate = False;
   9857 
   9858      if (isT) {
   9859         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
   9860            regN = INSNT0(3,0);
   9861            regD = INSNT1(11,8);
   9862            regM = INSNT1(3,0);
   9863            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9864               gate = True;
   9865         }
   9866      } else {
   9867         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
   9868             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9869             (INSNA(7,4)  == BITS4(1,0,0,1))) {
   9870            regD = INSNA(15,12);
   9871            regN = INSNA(19,16);
   9872            regM = INSNA(3,0);
   9873            if (regD != 15 && regN != 15 && regM != 15)
   9874               gate = True;
   9875         }
   9876      }
   9877 
   9878      if (gate) {
   9879         IRTemp rNt   = newTemp(Ity_I32);
   9880         IRTemp rMt   = newTemp(Ity_I32);
   9881         IRTemp res_q = newTemp(Ity_I32);
   9882 
   9883         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9884         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9885 
   9886         assign(res_q, binop(Iop_QAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
   9887         if (isT)
   9888            putIRegT( regD, mkexpr(res_q), condT );
   9889         else
   9890            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   9891 
   9892         DIP("uqadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9893         return True;
   9894      }
   9895      /* fall through */
   9896    }
   9897 
   9898    /* ------------------ uqsub8<c> <Rd>,<Rn>,<Rm> ------------------ */
   9899    {
   9900      UInt regD = 99, regN = 99, regM = 99;
   9901      Bool gate = False;
   9902 
   9903      if (isT) {
   9904         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
   9905            regN = INSNT0(3,0);
   9906            regD = INSNT1(11,8);
   9907            regM = INSNT1(3,0);
   9908            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9909               gate = True;
   9910         }
   9911      } else {
   9912         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
   9913             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9914             (INSNA(7,4)  == BITS4(1,1,1,1))) {
   9915            regD = INSNA(15,12);
   9916            regN = INSNA(19,16);
   9917            regM = INSNA(3,0);
   9918            if (regD != 15 && regN != 15 && regM != 15)
   9919              gate = True;
   9920         }
   9921      }
   9922 
   9923      if (gate) {
   9924         IRTemp rNt   = newTemp(Ity_I32);
   9925         IRTemp rMt   = newTemp(Ity_I32);
   9926         IRTemp res_q = newTemp(Ity_I32);
   9927 
   9928         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9929         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9930 
   9931         assign(res_q, binop(Iop_QSub8Ux4, mkexpr(rNt), mkexpr(rMt)));
   9932         if (isT)
   9933            putIRegT( regD, mkexpr(res_q), condT );
   9934         else
   9935            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   9936 
   9937         DIP("uqsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9938         return True;
   9939      }
   9940      /* fall through */
   9941    }
   9942 
   9943    /* ----------------- uhadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
   9944    {
   9945      UInt regD = 99, regN = 99, regM = 99;
   9946      Bool gate = False;
   9947 
   9948      if (isT) {
   9949         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
   9950            regN = INSNT0(3,0);
   9951            regD = INSNT1(11,8);
   9952            regM = INSNT1(3,0);
   9953            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9954               gate = True;
   9955         }
   9956      } else {
   9957         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
   9958             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9959             INSNA(7,4)   == BITS4(1,0,0,1)) {
   9960            regD = INSNA(15,12);
   9961            regN = INSNA(19,16);
   9962            regM = INSNA(3,0);
   9963            if (regD != 15 && regN != 15 && regM != 15)
   9964               gate = True;
   9965         }
   9966      }
   9967 
   9968      if (gate) {
   9969         IRTemp rNt   = newTemp(Ity_I32);
   9970         IRTemp rMt   = newTemp(Ity_I32);
   9971         IRTemp res_q = newTemp(Ity_I32);
   9972 
   9973         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9974         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9975 
   9976         assign(res_q, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
   9977         if (isT)
   9978            putIRegT( regD, mkexpr(res_q), condT );
   9979         else
   9980            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   9981 
   9982         DIP("uhadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9983         return True;
   9984      }
   9985      /* fall through */
   9986    }
   9987 
   9988    /* ----------------- uhadd16<c> <Rd>,<Rn>,<Rm> ------------------- */
   9989    {
   9990      UInt regD = 99, regN = 99, regM = 99;
   9991      Bool gate = False;
   9992 
   9993      if (isT) {
   9994         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
   9995            regN = INSNT0(3,0);
   9996            regD = INSNT1(11,8);
   9997            regM = INSNT1(3,0);
   9998            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9999               gate = True;
   10000         }
   10001      } else {
   10002         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
   10003             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10004             INSNA(7,4)   == BITS4(0,0,0,1)) {
   10005            regD = INSNA(15,12);
   10006            regN = INSNA(19,16);
   10007            regM = INSNA(3,0);
   10008            if (regD != 15 && regN != 15 && regM != 15)
   10009               gate = True;
   10010         }
   10011      }
   10012 
   10013      if (gate) {
   10014         IRTemp rNt   = newTemp(Ity_I32);
   10015         IRTemp rMt   = newTemp(Ity_I32);
   10016         IRTemp res_q = newTemp(Ity_I32);
   10017 
   10018         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   10019         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   10020 
   10021         assign(res_q, binop(Iop_HAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
   10022         if (isT)
   10023            putIRegT( regD, mkexpr(res_q), condT );
   10024         else
   10025            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   10026 
   10027         DIP("uhadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   10028         return True;
   10029      }
   10030      /* fall through */
   10031    }
   10032 
   10033    /* ----------------- shadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
   10034    {
   10035      UInt regD = 99, regN = 99, regM = 99;
   10036      Bool gate = False;
   10037 
   10038      if (isT) {
   10039         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
   10040            regN = INSNT0(3,0);
   10041            regD = INSNT1(11,8);
   10042            regM = INSNT1(3,0);
   10043            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10044               gate = True;
   10045         }
   10046      } else {
   10047         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
   10048             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10049             INSNA(7,4)   == BITS4(1,0,0,1)) {
   10050            regD = INSNA(15,12);
   10051            regN = INSNA(19,16);
   10052            regM = INSNA(3,0);
   10053            if (regD != 15 && regN != 15 && regM != 15)
   10054               gate = True;
   10055         }
   10056      }
   10057 
   10058      if (gate) {
   10059         IRTemp rNt   = newTemp(Ity_I32);
   10060         IRTemp rMt   = newTemp(Ity_I32);
   10061         IRTemp res_q = newTemp(Ity_I32);
   10062 
   10063         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   10064         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   10065 
   10066         assign(res_q, binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
   10067         if (isT)
   10068            putIRegT( regD, mkexpr(res_q), condT );
   10069         else
   10070            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   10071 
   10072         DIP("shadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   10073         return True;
   10074      }
   10075      /* fall through */
   10076    }
   10077 
   10078    /* ------------------ qadd16<c> <Rd>,<Rn>,<Rm> ------------------ */
   10079    {
   10080      UInt regD = 99, regN = 99, regM = 99;
   10081      Bool gate = False;
   10082 
   10083      if (isT) {
   10084         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
   10085            regN = INSNT0(3,0);
   10086            regD = INSNT1(11,8);
   10087            regM = INSNT1(3,0);
   10088            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10089               gate = True;
   10090         }
   10091      } else {
   10092         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
   10093             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10094             INSNA(7,4)   == BITS4(0,0,0,1)) {
   10095            regD = INSNA(15,12);
   10096            regN = INSNA(19,16);
   10097            regM = INSNA(3,0);
   10098            if (regD != 15 && regN != 15 && regM != 15)
   10099               gate = True;
   10100         }
   10101      }
   10102 
   10103      if (gate) {
   10104         IRTemp rNt   = newTemp(Ity_I32);
   10105         IRTemp rMt   = newTemp(Ity_I32);
   10106         IRTemp res_q = newTemp(Ity_I32);
   10107 
   10108         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   10109         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   10110 
   10111         assign(res_q, binop(Iop_QAdd16Sx2, mkexpr(rNt), mkexpr(rMt)));
   10112         if (isT)
   10113            putIRegT( regD, mkexpr(res_q), condT );
   10114         else
   10115            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   10116 
   10117         DIP("qadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   10118         return True;
   10119      }
   10120      /* fall through */
   10121    }
   10122 
   10123    /* ------------------ qsub16<c> <Rd>,<Rn>,<Rm> ------------------ */
   10124    {
   10125      UInt regD = 99, regN = 99, regM = 99;
   10126      Bool gate = False;
   10127 
   10128       if (isT) {
   10129         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
   10130            regN = INSNT0(3,0);
   10131            regD = INSNT1(11,8);
   10132            regM = INSNT1(3,0);
   10133            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10134               gate = True;
   10135         }
   10136      } else {
   10137         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
   10138             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10139             INSNA(7,4)   == BITS4(0,1,1,1)) {
   10140            regD = INSNA(15,12);
   10141            regN = INSNA(19,16);
   10142            regM = INSNA(3,0);
   10143            if (regD != 15 && regN != 15 && regM != 15)
   10144              gate = True;
   10145         }
   10146      }
   10147 
   10148      if (gate) {
   10149         IRTemp rNt   = newTemp(Ity_I32);
   10150         IRTemp rMt   = newTemp(Ity_I32);
   10151         IRTemp res_q = newTemp(Ity_I32);
   10152 
   10153         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   10154         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   10155 
   10156         assign(res_q, binop(Iop_QSub16Sx2, mkexpr(rNt), mkexpr(rMt)));
   10157         if (isT)
   10158            putIRegT( regD, mkexpr(res_q), condT );
   10159         else
   10160            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   10161 
   10162         DIP("qsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   10163         return True;
   10164      }
   10165      /* fall through */
   10166    }
   10167 
   10168    /* ------------------- qsax<c> <Rd>,<Rn>,<Rm> ------------------- */
   10169    /* note: the hardware seems to construct the result differently
   10170       from wot the manual says. */
   10171    {
   10172      UInt regD = 99, regN = 99, regM = 99;
   10173      Bool gate = False;
   10174 
   10175      if (isT) {
   10176         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
   10177            regN = INSNT0(3,0);
   10178            regD = INSNT1(11,8);
   10179            regM = INSNT1(3,0);
   10180            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10181               gate = True;
   10182         }
   10183      } else {
   10184         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
   10185             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10186             INSNA(7,4)   == BITS4(0,1,0,1)) {
   10187            regD = INSNA(15,12);
   10188            regN = INSNA(19,16);
   10189            regM = INSNA(3,0);
   10190            if (regD != 15 && regN != 15 && regM != 15)
   10191               gate = True;
   10192         }
   10193      }
   10194 
   10195      if (gate) {
   10196         IRTemp irt_regN     = newTemp(Ity_I32);
   10197         IRTemp irt_regM     = newTemp(Ity_I32);
   10198         IRTemp irt_sum      = newTemp(Ity_I32);
   10199         IRTemp irt_diff     = newTemp(Ity_I32);
   10200         IRTemp irt_sum_res  = newTemp(Ity_I32);
   10201         IRTemp irt_diff_res = newTemp(Ity_I32);
   10202 
   10203         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   10204         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   10205 
   10206         assign( irt_diff,
   10207                 binop( Iop_Sub32,
   10208                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
   10209                        binop( Iop_Sar32,
   10210                               binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
   10211                               mkU8(16) ) ) );
   10212         armSignedSatQ( irt_diff, 0x10, &irt_diff_res, NULL);
   10213 
   10214         assign( irt_sum,
   10215                 binop( Iop_Add32,
   10216                        binop( Iop_Sar32,
   10217                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   10218                               mkU8(16) ),
   10219                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) )) );
   10220         armSignedSatQ( irt_sum, 0x10, &irt_sum_res, NULL );
   10221 
   10222         IRExpr* ire_result = binop( Iop_Or32,
   10223                                     binop( Iop_Shl32, mkexpr(irt_diff_res),
   10224                                            mkU8(16) ),
   10225                                     binop( Iop_And32, mkexpr(irt_sum_res),
   10226                                            mkU32(0xFFFF)) );
   10227 
   10228         if (isT)
   10229            putIRegT( regD, ire_result, condT );
   10230         else
   10231            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10232 
   10233         DIP( "qsax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   10234         return True;
   10235      }
   10236      /* fall through */
   10237    }
   10238 
   10239    /* ------------------- qasx<c> <Rd>,<Rn>,<Rm> ------------------- */
   10240    {
   10241      UInt regD = 99, regN = 99, regM = 99;
   10242      Bool gate = False;
   10243 
   10244      if (isT) {
   10245         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
   10246            regN = INSNT0(3,0);
   10247            regD = INSNT1(11,8);
   10248            regM = INSNT1(3,0);
   10249            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10250               gate = True;
   10251         }
   10252      } else {
   10253         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
   10254             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10255             INSNA(7,4)   == BITS4(0,0,1,1)) {
   10256            regD = INSNA(15,12);
   10257            regN = INSNA(19,16);
   10258            regM = INSNA(3,0);
   10259            if (regD != 15 && regN != 15 && regM != 15)
   10260               gate = True;
   10261         }
   10262      }
   10263 
   10264      if (gate) {
   10265         IRTemp irt_regN     = newTemp(Ity_I32);
   10266         IRTemp irt_regM     = newTemp(Ity_I32);
   10267         IRTemp irt_sum      = newTemp(Ity_I32);
   10268         IRTemp irt_diff     = newTemp(Ity_I32);
   10269         IRTemp irt_res_sum  = newTemp(Ity_I32);
   10270         IRTemp irt_res_diff = newTemp(Ity_I32);
   10271 
   10272         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   10273         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   10274 
   10275         assign( irt_diff,
   10276                 binop( Iop_Sub32,
   10277                        binop( Iop_Sar32,
   10278                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   10279                               mkU8(16) ),
   10280                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
   10281         armSignedSatQ( irt_diff, 0x10, &irt_res_diff, NULL );
   10282 
   10283         assign( irt_sum,
   10284                 binop( Iop_Add32,
   10285                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
   10286                        binop( Iop_Sar32,
   10287                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
   10288                               mkU8(16) ) ) );
   10289         armSignedSatQ( irt_sum, 0x10, &irt_res_sum, NULL );
   10290 
   10291         IRExpr* ire_result
   10292           = binop( Iop_Or32,
   10293                    binop( Iop_Shl32, mkexpr(irt_res_sum), mkU8(16) ),
   10294                    binop( Iop_And32, mkexpr(irt_res_diff), mkU32(0xFFFF) ) );
   10295 
   10296         if (isT)
   10297            putIRegT( regD, ire_result, condT );
   10298         else
   10299            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10300 
   10301         DIP( "qasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   10302         return True;
   10303      }
   10304      /* fall through */
   10305    }
   10306 
   10307    /* ------------------- sasx<c> <Rd>,<Rn>,<Rm> ------------------- */
   10308    {
   10309      UInt regD = 99, regN = 99, regM = 99;
   10310      Bool gate = False;
   10311 
   10312      if (isT) {
   10313         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
   10314            regN = INSNT0(3,0);
   10315            regD = INSNT1(11,8);
   10316            regM = INSNT1(3,0);
   10317            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10318               gate = True;
   10319         }
   10320      } else {
   10321         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
   10322             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10323             INSNA(7,4)   == BITS4(0,0,1,1)) {
   10324            regD = INSNA(15,12);
   10325            regN = INSNA(19,16);
   10326            regM = INSNA(3,0);
   10327            if (regD != 15 && regN != 15 && regM != 15)
   10328               gate = True;
   10329         }
   10330      }
   10331 
   10332      if (gate) {
   10333         IRTemp irt_regN = newTemp(Ity_I32);
   10334         IRTemp irt_regM = newTemp(Ity_I32);
   10335         IRTemp irt_sum  = newTemp(Ity_I32);
   10336         IRTemp irt_diff = newTemp(Ity_I32);
   10337 
   10338         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   10339         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   10340 
   10341         assign( irt_diff,
   10342                 binop( Iop_Sub32,
   10343                        binop( Iop_Sar32,
   10344                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   10345                               mkU8(16) ),
   10346                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
   10347 
   10348         assign( irt_sum,
   10349                 binop( Iop_Add32,
   10350                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
   10351                        binop( Iop_Sar32,
   10352                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
   10353                               mkU8(16) ) ) );
   10354 
   10355         IRExpr* ire_result
   10356           = binop( Iop_Or32,
   10357                    binop( Iop_Shl32, mkexpr(irt_sum), mkU8(16) ),
   10358                    binop( Iop_And32, mkexpr(irt_diff), mkU32(0xFFFF) ) );
   10359 
   10360         IRTemp ge10 = newTemp(Ity_I32);
   10361         assign(ge10, unop(Iop_Not32, mkexpr(irt_diff)));
   10362         put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
   10363         put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
   10364 
   10365         IRTemp ge32 = newTemp(Ity_I32);
   10366         assign(ge32, unop(Iop_Not32, mkexpr(irt_sum)));
   10367         put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
   10368         put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
   10369 
   10370         if (isT)
   10371            putIRegT( regD, ire_result, condT );
   10372         else
   10373            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10374 
   10375         DIP( "sasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   10376         return True;
   10377      }
   10378      /* fall through */
   10379    }
   10380 
   10381    /* --------------- smuad, smuadx<c><Rd>,<Rn>,<Rm> --------------- */
   10382    /* --------------- smsad, smsadx<c><Rd>,<Rn>,<Rm> --------------- */
   10383    {
   10384      UInt regD = 99, regN = 99, regM = 99, bitM = 99;
   10385      Bool gate = False, isAD = False;
   10386 
   10387      if (isT) {
   10388         if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
   10389             && (INSNT1(15,0) & 0xF0E0) == 0xF000) {
   10390            regN = INSNT0(3,0);
   10391            regD = INSNT1(11,8);
   10392            regM = INSNT1(3,0);
   10393            bitM = INSNT1(4,4);
   10394            isAD = INSNT0(15,4) == 0xFB2;
   10395            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10396               gate = True;
   10397         }
   10398      } else {
   10399         if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
   10400             INSNA(15,12) == BITS4(1,1,1,1)         &&
   10401             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1) ) {
   10402            regD = INSNA(19,16);
   10403            regN = INSNA(3,0);
   10404            regM = INSNA(11,8);
   10405            bitM = INSNA(5,5);
   10406            isAD = INSNA(6,6) == 0;
   10407            if (regD != 15 && regN != 15 && regM != 15)
   10408               gate = True;
   10409         }
   10410      }
   10411 
   10412      if (gate) {
   10413         IRTemp irt_regN    = newTemp(Ity_I32);
   10414         IRTemp irt_regM    = newTemp(Ity_I32);
   10415         IRTemp irt_prod_lo = newTemp(Ity_I32);
   10416         IRTemp irt_prod_hi = newTemp(Ity_I32);
   10417         IRTemp tmpM        = newTemp(Ity_I32);
   10418 
   10419         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   10420 
   10421         assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
   10422         assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
   10423 
   10424         assign( irt_prod_lo,
   10425                 binop( Iop_Mul32,
   10426                        binop( Iop_Sar32,
   10427                               binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
   10428                               mkU8(16) ),
   10429                        binop( Iop_Sar32,
   10430                               binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
   10431                               mkU8(16) ) ) );
   10432         assign( irt_prod_hi, binop(Iop_Mul32,
   10433                                    binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)),
   10434                                    binop(Iop_Sar32, mkexpr(irt_regM), mkU8(16))) );
   10435         IRExpr* ire_result
   10436            = binop( isAD ? Iop_Add32 : Iop_Sub32,
   10437                     mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) );
   10438 
   10439         if (isT)
   10440            putIRegT( regD, ire_result, condT );
   10441         else
   10442            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10443 
   10444         if (isAD) {
   10445            or_into_QFLAG32(
   10446               signed_overflow_after_Add32( ire_result,
   10447                                            irt_prod_lo, irt_prod_hi ),
   10448               condT
   10449            );
   10450         }
   10451 
   10452         DIP("smu%cd%s%s r%u, r%u, r%u\n",
   10453             isAD ? 'a' : 's',
   10454             bitM ? "x" : "", nCC(conq), regD, regN, regM);
   10455         return True;
   10456      }
   10457      /* fall through */
   10458    }
   10459 
   10460    /* --------------- smlad{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
   10461    /* --------------- smlsd{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
   10462    {
   10463      UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
   10464      Bool gate = False, isAD = False;
   10465 
   10466      if (isT) {
   10467        if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
   10468            && INSNT1(7,5) == BITS3(0,0,0)) {
   10469            regN = INSNT0(3,0);
   10470            regD = INSNT1(11,8);
   10471            regM = INSNT1(3,0);
   10472            regA = INSNT1(15,12);
   10473            bitM = INSNT1(4,4);
   10474            isAD = INSNT0(15,4) == 0xFB2;
   10475            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
   10476                && !isBadRegT(regA))
   10477               gate = True;
   10478         }
   10479      } else {
   10480         if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
   10481             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
   10482            regD = INSNA(19,16);
   10483            regA = INSNA(15,12);
   10484            regN = INSNA(3,0);
   10485            regM = INSNA(11,8);
   10486            bitM = INSNA(5,5);
   10487            isAD = INSNA(6,6) == 0;
   10488            if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
   10489               gate = True;
   10490         }
   10491      }
   10492 
   10493      if (gate) {
   10494         IRTemp irt_regN    = newTemp(Ity_I32);
   10495         IRTemp irt_regM    = newTemp(Ity_I32);
   10496         IRTemp irt_regA    = newTemp(Ity_I32);
   10497         IRTemp irt_prod_lo = newTemp(Ity_I32);
   10498         IRTemp irt_prod_hi = newTemp(Ity_I32);
   10499         IRTemp irt_sum     = newTemp(Ity_I32);
   10500         IRTemp tmpM        = newTemp(Ity_I32);
   10501 
   10502         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   10503         assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
   10504 
   10505         assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
   10506         assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
   10507 
   10508         assign( irt_prod_lo,
   10509                 binop(Iop_Mul32,
   10510                       binop(Iop_Sar32,
   10511                             binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   10512                             mkU8(16)),
   10513                       binop(Iop_Sar32,
   10514                             binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
   10515                             mkU8(16))) );
   10516         assign( irt_prod_hi,
   10517                 binop( Iop_Mul32,
   10518                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
   10519                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
   10520         assign( irt_sum, binop( isAD ? Iop_Add32 : Iop_Sub32,
   10521                                 mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) ) );
   10522 
   10523         IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_sum), mkexpr(irt_regA));
   10524 
   10525         if (isT)
   10526            putIRegT( regD, ire_result, condT );
   10527         else
   10528            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10529 
   10530         if (isAD) {
   10531            or_into_QFLAG32(
   10532               signed_overflow_after_Add32( mkexpr(irt_sum),
   10533                                            irt_prod_lo, irt_prod_hi ),
   10534               condT
   10535            );
   10536         }
   10537 
   10538         or_into_QFLAG32(
   10539            signed_overflow_after_Add32( ire_result, irt_sum, irt_regA ),
   10540            condT
   10541         );
   10542 
   10543         DIP("sml%cd%s%s r%u, r%u, r%u, r%u\n",
   10544             isAD ? 'a' : 's',
   10545             bitM ? "x" : "", nCC(conq), regD, regN, regM, regA);
   10546         return True;
   10547      }
   10548      /* fall through */
   10549    }
   10550 
   10551    /* ----- smlabb, smlabt, smlatb, smlatt <Rd>,<Rn>,<Rm>,<Ra> ----- */
   10552    {
   10553      UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99, bitN = 99;
   10554      Bool gate = False;
   10555 
   10556      if (isT) {
   10557         if (INSNT0(15,4) == 0xFB1 && INSNT1(7,6) == BITS2(0,0)) {
   10558            regN = INSNT0(3,0);
   10559            regD = INSNT1(11,8);
   10560            regM = INSNT1(3,0);
   10561            regA = INSNT1(15,12);
   10562            bitM = INSNT1(4,4);
   10563            bitN = INSNT1(5,5);
   10564            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
   10565                && !isBadRegT(regA))
   10566               gate = True;
   10567         }
   10568      } else {
   10569         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
   10570             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(1,0,0,0)) {
   10571            regD = INSNA(19,16);
   10572            regN = INSNA(3,0);
   10573            regM = INSNA(11,8);
   10574            regA = INSNA(15,12);
   10575            bitM = INSNA(6,6);
   10576            bitN = INSNA(5,5);
   10577            if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
   10578               gate = True;
   10579         }
   10580      }
   10581 
   10582      if (gate) {
   10583         IRTemp irt_regA = newTemp(Ity_I32);
   10584         IRTemp irt_prod = newTemp(Ity_I32);
   10585 
   10586         assign( irt_prod,
   10587                 binop(Iop_Mul32,
   10588                       binop(Iop_Sar32,
   10589                             binop(Iop_Shl32,
   10590                                   isT ? getIRegT(regN) : getIRegA(regN),
   10591                                   mkU8(bitN ? 0 : 16)),
   10592                             mkU8(16)),
   10593                       binop(Iop_Sar32,
   10594                             binop(Iop_Shl32,
   10595                                   isT ? getIRegT(regM) : getIRegA(regM),
   10596                                   mkU8(bitM ? 0 : 16)),
   10597                             mkU8(16))) );
   10598 
   10599         assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
   10600 
   10601         IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_prod), mkexpr(irt_regA));
   10602 
   10603         if (isT)
   10604            putIRegT( regD, ire_result, condT );
   10605         else
   10606            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10607 
   10608         or_into_QFLAG32(
   10609            signed_overflow_after_Add32( ire_result, irt_prod, irt_regA ),
   10610            condT
   10611         );
   10612 
   10613         DIP( "smla%c%c%s r%u, r%u, r%u, r%u\n",
   10614              bitN ? 't' : 'b', bitM ? 't' : 'b',
   10615              nCC(conq), regD, regN, regM, regA );
   10616         return True;
   10617      }
   10618      /* fall through */
   10619    }
   10620 
   10621    /* ----- smlalbb, smlalbt, smlaltb, smlaltt <Rd>,<Rn>,<Rm>,<Ra> ----- */
   10622    {
   10623      UInt regDHi = 99, regN = 99, regM = 99, regDLo = 99, bitM = 99, bitN = 99;
   10624      Bool gate = False;
   10625 
   10626      if (isT) {
   10627         if (INSNT0(15,4) == 0xFBC && INSNT1(7,6) == BITS2(1,0)) {
   10628            regN   = INSNT0(3,0);
   10629            regDHi = INSNT1(11,8);
   10630            regM   = INSNT1(3,0);
   10631            regDLo = INSNT1(15,12);
   10632            bitM   = INSNT1(4,4);
   10633            bitN   = INSNT1(5,5);
   10634            if (!isBadRegT(regDHi) && !isBadRegT(regN) && !isBadRegT(regM)
   10635                && !isBadRegT(regDLo) && regDHi != regDLo)
   10636               gate = True;
   10637         }
   10638      } else {
   10639         if (INSNA(27,20) == BITS8(0,0,0,1,0,1,0,0) &&
   10640             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(1,0,0,0)) {
   10641            regDHi = INSNA(19,16);
   10642            regN   = INSNA(3,0);
   10643            regM   = INSNA(11,8);
   10644            regDLo = INSNA(15,12);
   10645            bitM   = INSNA(6,6);
   10646            bitN   = INSNA(5,5);
   10647            if (regDHi != 15 && regN != 15 && regM != 15 && regDLo != 15 &&
   10648                regDHi != regDLo)
   10649               gate = True;
   10650         }
   10651      }
   10652 
   10653      if (gate) {
   10654         IRTemp irt_regD  = newTemp(Ity_I64);
   10655         IRTemp irt_prod  = newTemp(Ity_I64);
   10656         IRTemp irt_res   = newTemp(Ity_I64);
   10657         IRTemp irt_resHi = newTemp(Ity_I32);
   10658         IRTemp irt_resLo = newTemp(Ity_I32);
   10659 
   10660         assign( irt_prod,
   10661                 binop(Iop_MullS32,
   10662                       binop(Iop_Sar32,
   10663                             binop(Iop_Shl32,
   10664                                   isT ? getIRegT(regN) : getIRegA(regN),
   10665                                   mkU8(bitN ? 0 : 16)),
   10666                             mkU8(16)),
   10667                       binop(Iop_Sar32,
   10668                             binop(Iop_Shl32,
   10669                                   isT ? getIRegT(regM) : getIRegA(regM),
   10670                                   mkU8(bitM ? 0 : 16)),
   10671                             mkU8(16))) );
   10672 
   10673         assign( irt_regD, binop(Iop_32HLto64,
   10674                                 isT ? getIRegT(regDHi) : getIRegA(regDHi),
   10675                                 isT ? getIRegT(regDLo) : getIRegA(regDLo)) );
   10676         assign( irt_res, binop(Iop_Add64, mkexpr(irt_regD), mkexpr(irt_prod)) );
   10677         assign( irt_resHi, unop(Iop_64HIto32, mkexpr(irt_res)) );
   10678         assign( irt_resLo, unop(Iop_64to32, mkexpr(irt_res)) );
   10679 
   10680         if (isT) {
   10681            putIRegT( regDHi, mkexpr(irt_resHi), condT );
   10682            putIRegT( regDLo, mkexpr(irt_resLo), condT );
   10683         } else {
   10684            putIRegA( regDHi, mkexpr(irt_resHi), condT, Ijk_Boring );
   10685            putIRegA( regDLo, mkexpr(irt_resLo), condT, Ijk_Boring );
   10686         }
   10687 
   10688         DIP( "smlal%c%c%s r%u, r%u, r%u, r%u\n",
   10689              bitN ? 't' : 'b', bitM ? 't' : 'b',
   10690              nCC(conq), regDHi, regN, regM, regDLo );
   10691         return True;
   10692      }
   10693      /* fall through */
   10694    }
   10695 
   10696    /* ----- smlawb, smlawt <Rd>,<Rn>,<Rm>,<Ra> ----- */
   10697    {
   10698      UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
   10699      Bool gate = False;
   10700 
   10701      if (isT) {
   10702         if (INSNT0(15,4) == 0xFB3 && INSNT1(7,5) == BITS3(0,0,0)) {
   10703            regN = INSNT0(3,0);
   10704            regD = INSNT1(11,8);
   10705            regM = INSNT1(3,0);
   10706            regA = INSNT1(15,12);
   10707            bitM = INSNT1(4,4);
   10708            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
   10709                && !isBadRegT(regA))
   10710               gate = True;
   10711         }
   10712      } else {
   10713         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
   10714             (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,0,0)) {
   10715            regD = INSNA(19,16);
   10716            regN = INSNA(3,0);
   10717            regM = INSNA(11,8);
   10718            regA = INSNA(15,12);
   10719            bitM = INSNA(6,6);
   10720            if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
   10721               gate = True;
   10722         }
   10723      }
   10724 
   10725      if (gate) {
   10726         IRTemp irt_regA = newTemp(Ity_I32);
   10727         IRTemp irt_prod = newTemp(Ity_I64);
   10728 
   10729         assign( irt_prod,
   10730                 binop(Iop_MullS32,
   10731                       isT ? getIRegT(regN) : getIRegA(regN),
   10732                       binop(Iop_Sar32,
   10733                             binop(Iop_Shl32,
   10734                                   isT ? getIRegT(regM) : getIRegA(regM),
   10735                                   mkU8(bitM ? 0 : 16)),
   10736                             mkU8(16))) );
   10737 
   10738         assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
   10739 
   10740         IRTemp prod32 = newTemp(Ity_I32);
   10741         assign(prod32,
   10742                binop(Iop_Or32,
   10743                      binop(Iop_Shl32, unop(Iop_64HIto32, mkexpr(irt_prod)), mkU8(16)),
   10744                      binop(Iop_Shr32, unop(Iop_64to32, mkexpr(irt_prod)), mkU8(16))
   10745         ));
   10746 
   10747         IRExpr* ire_result = binop(Iop_Add32, mkexpr(prod32), mkexpr(irt_regA));
   10748 
   10749         if (isT)
   10750            putIRegT( regD, ire_result, condT );
   10751         else
   10752            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10753 
   10754         or_into_QFLAG32(
   10755            signed_overflow_after_Add32( ire_result, prod32, irt_regA ),
   10756            condT
   10757         );
   10758 
   10759         DIP( "smlaw%c%s r%u, r%u, r%u, r%u\n",
   10760              bitM ? 't' : 'b',
   10761              nCC(conq), regD, regN, regM, regA );
   10762         return True;
   10763      }
   10764      /* fall through */
   10765    }
   10766 
   10767    /* ------------------- sel<c> <Rd>,<Rn>,<Rm> -------------------- */
   10768    /* fixme: fix up the test in v6media.c so that we can pass the ge
   10769       flags as part of the test. */
   10770    {
   10771      UInt regD = 99, regN = 99, regM = 99;
   10772      Bool gate = False;
   10773 
   10774      if (isT) {
   10775         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
   10776            regN = INSNT0(3,0);
   10777            regD = INSNT1(11,8);
   10778            regM = INSNT1(3,0);
   10779            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10780               gate = True;
   10781         }
   10782      } else {
   10783         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
   10784             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10785             INSNA(7,4)   == BITS4(1,0,1,1)) {
   10786            regD = INSNA(15,12);
   10787            regN = INSNA(19,16);
   10788            regM = INSNA(3,0);
   10789            if (regD != 15 && regN != 15 && regM != 15)
   10790               gate = True;
   10791         }
   10792      }
   10793 
   10794      if (gate) {
   10795         IRTemp irt_ge_flag0 = newTemp(Ity_I32);
   10796         IRTemp irt_ge_flag1 = newTemp(Ity_I32);
   10797         IRTemp irt_ge_flag2 = newTemp(Ity_I32);
   10798         IRTemp irt_ge_flag3 = newTemp(Ity_I32);
   10799 
   10800         assign( irt_ge_flag0, get_GEFLAG32(0) );
   10801         assign( irt_ge_flag1, get_GEFLAG32(1) );
   10802         assign( irt_ge_flag2, get_GEFLAG32(2) );
   10803         assign( irt_ge_flag3, get_GEFLAG32(3) );
   10804 
   10805         IRExpr* ire_ge_flag0_or
   10806           = binop(Iop_Or32, mkexpr(irt_ge_flag0),
   10807                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag0)));
   10808         IRExpr* ire_ge_flag1_or
   10809           = binop(Iop_Or32, mkexpr(irt_ge_flag1),
   10810                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag1)));
   10811         IRExpr* ire_ge_flag2_or
   10812           = binop(Iop_Or32, mkexpr(irt_ge_flag2),
   10813                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag2)));
   10814         IRExpr* ire_ge_flag3_or
   10815           = binop(Iop_Or32, mkexpr(irt_ge_flag3),
   10816                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag3)));
   10817 
   10818         IRExpr* ire_ge_flags
   10819           = binop( Iop_Or32,
   10820                    binop(Iop_Or32,
   10821                          binop(Iop_And32,
   10822                                binop(Iop_Sar32, ire_ge_flag0_or, mkU8(31)),
   10823                                mkU32(0x000000ff)),
   10824                          binop(Iop_And32,
   10825                                binop(Iop_Sar32, ire_ge_flag1_or, mkU8(31)),
   10826                                mkU32(0x0000ff00))),
   10827                    binop(Iop_Or32,
   10828                          binop(Iop_And32,
   10829                                binop(Iop_Sar32, ire_ge_flag2_or, mkU8(31)),
   10830                                mkU32(0x00ff0000)),
   10831                          binop(Iop_And32,
   10832                                binop(Iop_Sar32, ire_ge_flag3_or, mkU8(31)),
   10833                                mkU32(0xff000000))) );
   10834 
   10835         IRExpr* ire_result
   10836           = binop(Iop_Or32,
   10837                   binop(Iop_And32,
   10838                         isT ? getIRegT(regN) : getIRegA(regN),
   10839                         ire_ge_flags ),
   10840                   binop(Iop_And32,
   10841                         isT ? getIRegT(regM) : getIRegA(regM),
   10842                         unop(Iop_Not32, ire_ge_flags)));
   10843 
   10844         if (isT)
   10845            putIRegT( regD, ire_result, condT );
   10846         else
   10847            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10848 
   10849         DIP("sel%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   10850         return True;
   10851      }
   10852      /* fall through */
   10853    }
   10854 
   10855    /* ----------------- uxtab16<c> Rd,Rn,Rm{,rot} ------------------ */
   10856    {
   10857      UInt regD = 99, regN = 99, regM = 99, rotate = 99;
   10858      Bool gate = False;
   10859 
   10860      if (isT) {
   10861         if (INSNT0(15,4) == 0xFA3 && (INSNT1(15,0) & 0xF0C0) == 0xF080) {
   10862            regN   = INSNT0(3,0);
   10863            regD   = INSNT1(11,8);
   10864            regM   = INSNT1(3,0);
   10865            rotate = INSNT1(5,4);
   10866            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10867               gate = True;
   10868         }
   10869      } else {
   10870         if (INSNA(27,20) == BITS8(0,1,1,0,1,1,0,0) &&
   10871             INSNA(9,4)   == BITS6(0,0,0,1,1,1) ) {
   10872            regD   = INSNA(15,12);
   10873            regN   = INSNA(19,16);
   10874            regM   = INSNA(3,0);
   10875            rotate = INSNA(11,10);
   10876            if (regD != 15 && regN != 15 && regM != 15)
   10877              gate = True;
   10878         }
   10879      }
   10880 
   10881      if (gate) {
   10882         IRTemp irt_regN = newTemp(Ity_I32);
   10883         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   10884 
   10885         IRTemp irt_regM = newTemp(Ity_I32);
   10886         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   10887 
   10888         IRTemp irt_rot = newTemp(Ity_I32);
   10889         assign( irt_rot, binop(Iop_And32,
   10890                                genROR32(irt_regM, 8 * rotate),
   10891                                mkU32(0x00FF00FF)) );
   10892 
   10893         IRExpr* resLo
   10894            = binop(Iop_And32,
   10895                    binop(Iop_Add32, mkexpr(irt_regN), mkexpr(irt_rot)),
   10896                    mkU32(0x0000FFFF));
   10897 
   10898         IRExpr* resHi
   10899            = binop(Iop_Add32,
   10900                    binop(Iop_And32, mkexpr(irt_regN), mkU32(0xFFFF0000)),
   10901                    binop(Iop_And32, mkexpr(irt_rot),  mkU32(0xFFFF0000)));
   10902 
   10903         IRExpr* ire_result
   10904            = binop( Iop_Or32, resHi, resLo );
   10905 
   10906         if (isT)
   10907            putIRegT( regD, ire_result, condT );
   10908         else
   10909            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10910 
   10911         DIP( "uxtab16%s r%u, r%u, r%u, ROR #%u\n",
   10912              nCC(conq), regD, regN, regM, 8 * rotate );
   10913         return True;
   10914      }
   10915      /* fall through */
   10916    }
   10917 
   10918    /* --------------- usad8  Rd,Rn,Rm    ---------------- */
   10919    /* --------------- usada8 Rd,Rn,Rm,Ra ---------------- */
   10920    {
   10921      UInt rD = 99, rN = 99, rM = 99, rA = 99;
   10922      Bool gate = False;
   10923 
   10924      if (isT) {
   10925        if (INSNT0(15,4) == 0xFB7 && INSNT1(7,4) == BITS4(0,0,0,0)) {
   10926            rN = INSNT0(3,0);
   10927            rA = INSNT1(15,12);
   10928            rD = INSNT1(11,8);
   10929            rM = INSNT1(3,0);
   10930            if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && rA != 13)
   10931               gate = True;
   10932         }
   10933      } else {
   10934         if (INSNA(27,20) == BITS8(0,1,1,1,1,0,0,0) &&
   10935             INSNA(7,4)   == BITS4(0,0,0,1) ) {
   10936            rD = INSNA(19,16);
   10937            rA = INSNA(15,12);
   10938            rM = INSNA(11,8);
   10939            rN = INSNA(3,0);
   10940            if (rD != 15 && rN != 15 && rM != 15 /* but rA can be 15 */)
   10941               gate = True;
   10942         }
   10943      }
   10944      /* We allow rA == 15, to denote the usad8 (no accumulator) case. */
   10945 
   10946      if (gate) {
   10947         IRExpr* rNe = isT ? getIRegT(rN) : getIRegA(rN);
   10948         IRExpr* rMe = isT ? getIRegT(rM) : getIRegA(rM);
   10949         IRExpr* rAe = rA == 15 ? mkU32(0)
   10950                                : (isT ? getIRegT(rA) : getIRegA(rA));
   10951         IRExpr* res = binop(Iop_Add32,
   10952                             binop(Iop_Sad8Ux4, rNe, rMe),
   10953                             rAe);
   10954         if (isT)
   10955            putIRegT( rD, res, condT );
   10956         else
   10957            putIRegA( rD, res, condT, Ijk_Boring );
   10958 
   10959         if (rA == 15) {
   10960            DIP( "usad8%s r%u, r%u, r%u\n",
   10961                 nCC(conq), rD, rN, rM );
   10962         } else {
   10963            DIP( "usada8%s r%u, r%u, r%u, r%u\n",
   10964                 nCC(conq), rD, rN, rM, rA );
   10965         }
   10966         return True;
   10967      }
   10968      /* fall through */
   10969    }
   10970 
   10971    /* ------------------ qadd<c> <Rd>,<Rn>,<Rm> ------------------- */
   10972    {
   10973      UInt regD = 99, regN = 99, regM = 99;
   10974      Bool gate = False;
   10975 
   10976      if (isT) {
   10977         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
   10978            regN = INSNT0(3,0);
   10979            regD = INSNT1(11,8);
   10980            regM = INSNT1(3,0);
   10981            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10982               gate = True;
   10983         }
   10984      } else {
   10985         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
   10986             INSNA(11,8)  == BITS4(0,0,0,0)         &&
   10987             INSNA(7,4)   == BITS4(0,1,0,1)) {
   10988            regD = INSNA(15,12);
   10989            regN = INSNA(19,16);
   10990            regM = INSNA(3,0);
   10991            if (regD != 15 && regN != 15 && regM != 15)
   10992               gate = True;
   10993         }
   10994      }
   10995 
   10996      if (gate) {
   10997         IRTemp rNt   = newTemp(Ity_I32);
   10998         IRTemp rMt   = newTemp(Ity_I32);
   10999         IRTemp res_q = newTemp(Ity_I32);
   11000 
   11001         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11002         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11003 
   11004         assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rNt)));
   11005         if (isT)
   11006            putIRegT( regD, mkexpr(res_q), condT );
   11007         else
   11008            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11009 
   11010         or_into_QFLAG32(
   11011            signed_overflow_after_Add32(
   11012               binop(Iop_Add32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
   11013            condT
   11014         );
   11015 
   11016         DIP("qadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
   11017         return True;
   11018      }
   11019      /* fall through */
   11020    }
   11021 
   11022    /* ------------------ qdadd<c> <Rd>,<Rm>,<Rn> ------------------- */
   11023    {
   11024      UInt regD = 99, regN = 99, regM = 99;
   11025      Bool gate = False;
   11026 
   11027      if (isT) {
   11028         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF090) {
   11029            regN = INSNT0(3,0);
   11030            regD = INSNT1(11,8);
   11031            regM = INSNT1(3,0);
   11032            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11033               gate = True;
   11034         }
   11035      } else {
   11036         if (INSNA(27,20) == BITS8(0,0,0,1,0,1,0,0) &&
   11037             INSNA(11,8)  == BITS4(0,0,0,0)         &&
   11038             INSNA(7,4)   == BITS4(0,1,0,1)) {
   11039            regD = INSNA(15,12);
   11040            regN = INSNA(19,16);
   11041            regM = INSNA(3,0);
   11042            if (regD != 15 && regN != 15 && regM != 15)
   11043               gate = True;
   11044         }
   11045      }
   11046 
   11047      if (gate) {
   11048         IRTemp rNt   = newTemp(Ity_I32);
   11049         IRTemp rMt   = newTemp(Ity_I32);
   11050         IRTemp rN_d  = newTemp(Ity_I32);
   11051         IRTemp res_q = newTemp(Ity_I32);
   11052 
   11053         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11054         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11055 
   11056         or_into_QFLAG32(
   11057            signed_overflow_after_Add32(
   11058               binop(Iop_Add32, mkexpr(rNt), mkexpr(rNt)), rNt, rNt),
   11059            condT
   11060         );
   11061 
   11062         assign(rN_d,  binop(Iop_QAdd32S, mkexpr(rNt), mkexpr(rNt)));
   11063         assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rN_d)));
   11064         if (isT)
   11065            putIRegT( regD, mkexpr(res_q), condT );
   11066         else
   11067            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11068 
   11069         or_into_QFLAG32(
   11070            signed_overflow_after_Add32(
   11071               binop(Iop_Add32, mkexpr(rMt), mkexpr(rN_d)), rMt, rN_d),
   11072            condT
   11073         );
   11074 
   11075         DIP("qdadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
   11076         return True;
   11077      }
   11078      /* fall through */
   11079    }
   11080 
   11081    /* ------------------ qsub<c> <Rd>,<Rn>,<Rm> ------------------- */
   11082    {
   11083      UInt regD = 99, regN = 99, regM = 99;
   11084      Bool gate = False;
   11085 
   11086      if (isT) {
   11087         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0A0) {
   11088            regN = INSNT0(3,0);
   11089            regD = INSNT1(11,8);
   11090            regM = INSNT1(3,0);
   11091            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11092               gate = True;
   11093         }
   11094      } else {
   11095         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
   11096             INSNA(11,8)  == BITS4(0,0,0,0)         &&
   11097             INSNA(7,4)   == BITS4(0,1,0,1)) {
   11098            regD = INSNA(15,12);
   11099            regN = INSNA(19,16);
   11100            regM = INSNA(3,0);
   11101            if (regD != 15 && regN != 15 && regM != 15)
   11102               gate = True;
   11103         }
   11104      }
   11105 
   11106      if (gate) {
   11107         IRTemp rNt   = newTemp(Ity_I32);
   11108         IRTemp rMt   = newTemp(Ity_I32);
   11109         IRTemp res_q = newTemp(Ity_I32);
   11110 
   11111         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11112         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11113 
   11114         assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rNt)));
   11115         if (isT)
   11116            putIRegT( regD, mkexpr(res_q), condT );
   11117         else
   11118            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11119 
   11120         or_into_QFLAG32(
   11121            signed_overflow_after_Sub32(
   11122               binop(Iop_Sub32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
   11123            condT
   11124         );
   11125 
   11126         DIP("qsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
   11127         return True;
   11128      }
   11129      /* fall through */
   11130    }
   11131 
   11132    /* ------------------ qdsub<c> <Rd>,<Rm>,<Rn> ------------------- */
   11133    {
   11134      UInt regD = 99, regN = 99, regM = 99;
   11135      Bool gate = False;
   11136 
   11137      if (isT) {
   11138         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0B0) {
   11139            regN = INSNT0(3,0);
   11140            regD = INSNT1(11,8);
   11141            regM = INSNT1(3,0);
   11142            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11143               gate = True;
   11144         }
   11145      } else {
   11146         if (INSNA(27,20) == BITS8(0,0,0,1,0,1,1,0) &&
   11147             INSNA(11,8)  == BITS4(0,0,0,0)         &&
   11148             INSNA(7,4)   == BITS4(0,1,0,1)) {
   11149            regD = INSNA(15,12);
   11150            regN = INSNA(19,16);
   11151            regM = INSNA(3,0);
   11152            if (regD != 15 && regN != 15 && regM != 15)
   11153               gate = True;
   11154         }
   11155      }
   11156 
   11157      if (gate) {
   11158         IRTemp rNt   = newTemp(Ity_I32);
   11159         IRTemp rMt   = newTemp(Ity_I32);
   11160         IRTemp rN_d  = newTemp(Ity_I32);
   11161         IRTemp res_q = newTemp(Ity_I32);
   11162 
   11163         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11164         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11165 
   11166         or_into_QFLAG32(
   11167            signed_overflow_after_Add32(
   11168               binop(Iop_Add32, mkexpr(rNt), mkexpr(rNt)), rNt, rNt),
   11169            condT
   11170         );
   11171 
   11172         assign(rN_d,  binop(Iop_QAdd32S, mkexpr(rNt), mkexpr(rNt)));
   11173         assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rN_d)));
   11174         if (isT)
   11175            putIRegT( regD, mkexpr(res_q), condT );
   11176         else
   11177            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11178 
   11179         or_into_QFLAG32(
   11180            signed_overflow_after_Sub32(
   11181               binop(Iop_Sub32, mkexpr(rMt), mkexpr(rN_d)), rMt, rN_d),
   11182            condT
   11183         );
   11184 
   11185         DIP("qdsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
   11186         return True;
   11187      }
   11188      /* fall through */
   11189    }
   11190 
   11191    /* ------------------ uqsub16<c> <Rd>,<Rn>,<Rm> ------------------ */
   11192    {
   11193      UInt regD = 99, regN = 99, regM = 99;
   11194      Bool gate = False;
   11195 
   11196      if (isT) {
   11197         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
   11198            regN = INSNT0(3,0);
   11199            regD = INSNT1(11,8);
   11200            regM = INSNT1(3,0);
   11201            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11202               gate = True;
   11203         }
   11204      } else {
   11205         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
   11206             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11207             INSNA(7,4)   == BITS4(0,1,1,1)) {
   11208            regD = INSNA(15,12);
   11209            regN = INSNA(19,16);
   11210            regM = INSNA(3,0);
   11211            if (regD != 15 && regN != 15 && regM != 15)
   11212              gate = True;
   11213         }
   11214      }
   11215 
   11216      if (gate) {
   11217         IRTemp rNt   = newTemp(Ity_I32);
   11218         IRTemp rMt   = newTemp(Ity_I32);
   11219         IRTemp res_q = newTemp(Ity_I32);
   11220 
   11221         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11222         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11223 
   11224         assign(res_q, binop(Iop_QSub16Ux2, mkexpr(rNt), mkexpr(rMt)));
   11225         if (isT)
   11226            putIRegT( regD, mkexpr(res_q), condT );
   11227         else
   11228            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11229 
   11230         DIP("uqsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   11231         return True;
   11232      }
   11233      /* fall through */
   11234    }
   11235 
   11236    /* ----------------- shadd16<c> <Rd>,<Rn>,<Rm> ------------------- */
   11237    {
   11238      UInt regD = 99, regN = 99, regM = 99;
   11239      Bool gate = False;
   11240 
   11241      if (isT) {
   11242         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
   11243            regN = INSNT0(3,0);
   11244            regD = INSNT1(11,8);
   11245            regM = INSNT1(3,0);
   11246            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11247               gate = True;
   11248         }
   11249      } else {
   11250         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
   11251             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11252             INSNA(7,4)   == BITS4(0,0,0,1)) {
   11253            regD = INSNA(15,12);
   11254            regN = INSNA(19,16);
   11255            regM = INSNA(3,0);
   11256            if (regD != 15 && regN != 15 && regM != 15)
   11257               gate = True;
   11258         }
   11259      }
   11260 
   11261      if (gate) {
   11262         IRTemp rNt   = newTemp(Ity_I32);
   11263         IRTemp rMt   = newTemp(Ity_I32);
   11264         IRTemp res_q = newTemp(Ity_I32);
   11265 
   11266         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11267         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11268 
   11269         assign(res_q, binop(Iop_HAdd16Sx2, mkexpr(rNt), mkexpr(rMt)));
   11270         if (isT)
   11271            putIRegT( regD, mkexpr(res_q), condT );
   11272         else
   11273            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11274 
   11275         DIP("shadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   11276         return True;
   11277      }
   11278      /* fall through */
   11279    }
   11280 
   11281    /* ----------------- uhsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
   11282    {
   11283      UInt regD = 99, regN = 99, regM = 99;
   11284      Bool gate = False;
   11285 
   11286      if (isT) {
   11287         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
   11288            regN = INSNT0(3,0);
   11289            regD = INSNT1(11,8);
   11290            regM = INSNT1(3,0);
   11291            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11292               gate = True;
   11293         }
   11294      } else {
   11295         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
   11296             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11297             INSNA(7,4)   == BITS4(1,1,1,1)) {
   11298            regD = INSNA(15,12);
   11299            regN = INSNA(19,16);
   11300            regM = INSNA(3,0);
   11301            if (regD != 15 && regN != 15 && regM != 15)
   11302               gate = True;
   11303         }
   11304      }
   11305 
   11306      if (gate) {
   11307         IRTemp rNt   = newTemp(Ity_I32);
   11308         IRTemp rMt   = newTemp(Ity_I32);
   11309         IRTemp res_q = newTemp(Ity_I32);
   11310 
   11311         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11312         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11313 
   11314         assign(res_q, binop(Iop_HSub8Ux4, mkexpr(rNt), mkexpr(rMt)));
   11315         if (isT)
   11316            putIRegT( regD, mkexpr(res_q), condT );
   11317         else
   11318            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11319 
   11320         DIP("uhsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   11321         return True;
   11322      }
   11323      /* fall through */
   11324    }
   11325 
   11326    /* ----------------- uhsub16<c> <Rd>,<Rn>,<Rm> ------------------- */
   11327    {
   11328      UInt regD = 99, regN = 99, regM = 99;
   11329      Bool gate = False;
   11330 
   11331      if (isT) {
   11332         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
   11333            regN = INSNT0(3,0);
   11334            regD = INSNT1(11,8);
   11335            regM = INSNT1(3,0);
   11336            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11337               gate = True;
   11338         }
   11339      } else {
   11340         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
   11341             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11342             INSNA(7,4)   == BITS4(0,1,1,1)) {
   11343            regD = INSNA(15,12);
   11344            regN = INSNA(19,16);
   11345            regM = INSNA(3,0);
   11346            if (regD != 15 && regN != 15 && regM != 15)
   11347               gate = True;
   11348         }
   11349      }
   11350 
   11351      if (gate) {
   11352         IRTemp rNt   = newTemp(Ity_I32);
   11353         IRTemp rMt   = newTemp(Ity_I32);
   11354         IRTemp res_q = newTemp(Ity_I32);
   11355 
   11356         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11357         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11358 
   11359         assign(res_q, binop(Iop_HSub16Ux2, mkexpr(rNt), mkexpr(rMt)));
   11360         if (isT)
   11361            putIRegT( regD, mkexpr(res_q), condT );
   11362         else
   11363            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11364 
   11365         DIP("uhsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   11366         return True;
   11367      }
   11368      /* fall through */
   11369    }
   11370 
   11371    /* ------------------ uqadd16<c> <Rd>,<Rn>,<Rm> ------------------ */
   11372    {
   11373      UInt regD = 99, regN = 99, regM = 99;
   11374      Bool gate = False;
   11375 
   11376      if (isT) {
   11377         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
   11378            regN = INSNT0(3,0);
   11379            regD = INSNT1(11,8);
   11380            regM = INSNT1(3,0);
   11381            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11382               gate = True;
   11383         }
   11384      } else {
   11385         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
   11386             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11387             INSNA(7,4)   == BITS4(0,0,0,1)) {
   11388            regD = INSNA(15,12);
   11389            regN = INSNA(19,16);
   11390            regM = INSNA(3,0);
   11391            if (regD != 15 && regN != 15 && regM != 15)
   11392               gate = True;
   11393         }
   11394      }
   11395 
   11396      if (gate) {
   11397         IRTemp rNt   = newTemp(Ity_I32);
   11398         IRTemp rMt   = newTemp(Ity_I32);
   11399         IRTemp res_q = newTemp(Ity_I32);
   11400 
   11401         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11402         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11403 
   11404         assign(res_q, binop(Iop_QAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
   11405         if (isT)
   11406            putIRegT( regD, mkexpr(res_q), condT );
   11407         else
   11408            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11409 
   11410         DIP("uqadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   11411         return True;
   11412      }
   11413      /* fall through */
   11414    }
   11415 
   11416    /* ------------------- uqsax<c> <Rd>,<Rn>,<Rm> ------------------- */
   11417    {
   11418      UInt regD = 99, regN = 99, regM = 99;
   11419      Bool gate = False;
   11420 
   11421      if (isT) {
   11422         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
   11423            regN = INSNT0(3,0);
   11424            regD = INSNT1(11,8);
   11425            regM = INSNT1(3,0);
   11426            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11427               gate = True;
   11428         }
   11429      } else {
   11430         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
   11431             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11432             INSNA(7,4)   == BITS4(0,1,0,1)) {
   11433            regD = INSNA(15,12);
   11434            regN = INSNA(19,16);
   11435            regM = INSNA(3,0);
   11436            if (regD != 15 && regN != 15 && regM != 15)
   11437               gate = True;
   11438         }
   11439      }
   11440 
   11441      if (gate) {
   11442         IRTemp irt_regN     = newTemp(Ity_I32);
   11443         IRTemp irt_regM     = newTemp(Ity_I32);
   11444         IRTemp irt_sum      = newTemp(Ity_I32);
   11445         IRTemp irt_diff     = newTemp(Ity_I32);
   11446         IRTemp irt_sum_res  = newTemp(Ity_I32);
   11447         IRTemp irt_diff_res = newTemp(Ity_I32);
   11448 
   11449         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   11450         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   11451 
   11452         assign( irt_diff,
   11453                 binop( Iop_Sub32,
   11454                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
   11455                        binop( Iop_Shr32,
   11456                               binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
   11457                               mkU8(16) ) ) );
   11458         armUnsignedSatQ( &irt_diff_res, NULL, irt_diff, 0x10);
   11459 
   11460         assign( irt_sum,
   11461                 binop( Iop_Add32,
   11462                        binop( Iop_Shr32,
   11463                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   11464                               mkU8(16) ),
   11465                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) )) );
   11466         armUnsignedSatQ( &irt_sum_res, NULL, irt_sum, 0x10 );
   11467 
   11468         IRExpr* ire_result = binop( Iop_Or32,
   11469                                     binop( Iop_Shl32, mkexpr(irt_diff_res),
   11470                                            mkU8(16) ),
   11471                                     binop( Iop_And32, mkexpr(irt_sum_res),
   11472                                            mkU32(0xFFFF)) );
   11473 
   11474         if (isT)
   11475            putIRegT( regD, ire_result, condT );
   11476         else
   11477            putIRegA( regD, ire_result, condT, Ijk_Boring );
   11478 
   11479         DIP( "uqsax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   11480         return True;
   11481      }
   11482      /* fall through */
   11483    }
   11484 
   11485    /* ------------------- uqasx<c> <Rd>,<Rn>,<Rm> ------------------- */
   11486    {
   11487      UInt regD = 99, regN = 99, regM = 99;
   11488      Bool gate = False;
   11489 
   11490      if (isT) {
   11491         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
   11492            regN = INSNT0(3,0);
   11493            regD = INSNT1(11,8);
   11494            regM = INSNT1(3,0);
   11495            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11496               gate = True;
   11497         }
   11498      } else {
   11499         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
   11500             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11501             INSNA(7,4)   == BITS4(0,0,1,1)) {
   11502            regD = INSNA(15,12);
   11503            regN = INSNA(19,16);
   11504            regM = INSNA(3,0);
   11505            if (regD != 15 && regN != 15 && regM != 15)
   11506               gate = True;
   11507         }
   11508      }
   11509 
   11510      if (gate) {
   11511         IRTemp irt_regN     = newTemp(Ity_I32);
   11512         IRTemp irt_regM     = newTemp(Ity_I32);
   11513         IRTemp irt_sum      = newTemp(Ity_I32);
   11514         IRTemp irt_diff     = newTemp(Ity_I32);
   11515         IRTemp irt_res_sum  = newTemp(Ity_I32);
   11516         IRTemp irt_res_diff = newTemp(Ity_I32);
   11517 
   11518         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   11519         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   11520 
   11521         assign( irt_diff,
   11522                 binop( Iop_Sub32,
   11523                        binop( Iop_Shr32,
   11524                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   11525                               mkU8(16) ),
   11526                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
   11527         armUnsignedSatQ( &irt_res_diff, NULL, irt_diff, 0x10 );
   11528 
   11529         assign( irt_sum,
   11530                 binop( Iop_Add32,
   11531                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
   11532                        binop( Iop_Shr32,
   11533                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
   11534                               mkU8(16) ) ) );
   11535         armUnsignedSatQ( &irt_res_sum, NULL, irt_sum, 0x10 );
   11536 
   11537         IRExpr* ire_result
   11538           = binop( Iop_Or32,
   11539                    binop( Iop_Shl32, mkexpr(irt_res_sum), mkU8(16) ),
   11540                    binop( Iop_And32, mkexpr(irt_res_diff), mkU32(0xFFFF) ) );
   11541 
   11542         if (isT)
   11543            putIRegT( regD, ire_result, condT );
   11544         else
   11545            putIRegA( regD, ire_result, condT, Ijk_Boring );
   11546 
   11547         DIP( "uqasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   11548         return True;
   11549      }
   11550      /* fall through */
   11551    }
   11552 
   11553    /* ------------------- usax<c> <Rd>,<Rn>,<Rm> ------------------- */
   11554    {
   11555      UInt regD = 99, regN = 99, regM = 99;
   11556      Bool gate = False;
   11557 
   11558      if (isT) {
   11559         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
   11560            regN = INSNT0(3,0);
   11561            regD = INSNT1(11,8);
   11562            regM = INSNT1(3,0);
   11563            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11564               gate = True;
   11565         }
   11566      } else {
   11567         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
   11568             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11569             INSNA(7,4)   == BITS4(0,1,0,1)) {
   11570            regD = INSNA(15,12);
   11571            regN = INSNA(19,16);
   11572            regM = INSNA(3,0);
   11573            if (regD != 15 && regN != 15 && regM != 15)
   11574               gate = True;
   11575         }
   11576      }
   11577 
   11578      if (gate) {
   11579         IRTemp irt_regN = newTemp(Ity_I32);
   11580         IRTemp irt_regM = newTemp(Ity_I32);
   11581         IRTemp irt_sum  = newTemp(Ity_I32);
   11582         IRTemp irt_diff = newTemp(Ity_I32);
   11583 
   11584         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   11585         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   11586 
   11587         assign( irt_sum,
   11588                 binop( Iop_Add32,
   11589                        unop( Iop_16Uto32,
   11590                              unop( Iop_32to16, mkexpr(irt_regN) )
   11591                        ),
   11592                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
   11593 
   11594         assign( irt_diff,
   11595                 binop( Iop_Sub32,
   11596                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
   11597                        unop( Iop_16Uto32,
   11598                              unop( Iop_32to16, mkexpr(irt_regM) )
   11599                        )
   11600                 )
   11601         );
   11602 
   11603         IRExpr* ire_result
   11604           = binop( Iop_Or32,
   11605                    binop( Iop_Shl32, mkexpr(irt_diff), mkU8(16) ),
   11606                    binop( Iop_And32, mkexpr(irt_sum), mkU32(0xFFFF) ) );
   11607 
   11608         IRTemp ge10 = newTemp(Ity_I32);
   11609         assign( ge10, IRExpr_ITE( binop( Iop_CmpLE32U,
   11610                                          mkU32(0x10000), mkexpr(irt_sum) ),
   11611                                   mkU32(1), mkU32(0) ) );
   11612         put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
   11613         put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
   11614 
   11615         IRTemp ge32 = newTemp(Ity_I32);
   11616         assign(ge32, unop(Iop_Not32, mkexpr(irt_diff)));
   11617         put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
   11618         put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
   11619 
   11620         if (isT)
   11621            putIRegT( regD, ire_result, condT );
   11622         else
   11623            putIRegA( regD, ire_result, condT, Ijk_Boring );
   11624 
   11625         DIP( "usax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   11626         return True;
   11627      }
   11628      /* fall through */
   11629    }
   11630 
   11631    /* ------------------- uasx<c> <Rd>,<Rn>,<Rm> ------------------- */
   11632    {
   11633      UInt regD = 99, regN = 99, regM = 99;
   11634      Bool gate = False;
   11635 
   11636      if (isT) {
   11637         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
   11638            regN = INSNT0(3,0);
   11639            regD = INSNT1(11,8);
   11640            regM = INSNT1(3,0);
   11641            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11642               gate = True;
   11643         }
   11644      } else {
   11645         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
   11646             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11647             INSNA(7,4)   == BITS4(0,0,1,1)) {
   11648            regD = INSNA(15,12);
   11649            regN = INSNA(19,16);
   11650            regM = INSNA(3,0);
   11651            if (regD != 15 && regN != 15 && regM != 15)
   11652               gate = True;
   11653         }
   11654      }
   11655 
   11656      if (gate) {
   11657         IRTemp irt_regN = newTemp(Ity_I32);
   11658         IRTemp irt_regM = newTemp(Ity_I32);
   11659         IRTemp irt_sum  = newTemp(Ity_I32);
   11660         IRTemp irt_diff = newTemp(Ity_I32);
   11661 
   11662         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   11663         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   11664 
   11665         assign( irt_diff,
   11666                 binop( Iop_Sub32,
   11667                        unop( Iop_16Uto32,
   11668                              unop( Iop_32to16, mkexpr(irt_regN) )
   11669                        ),
   11670                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
   11671 
   11672         assign( irt_sum,
   11673                 binop( Iop_Add32,
   11674                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
   11675                        unop( Iop_16Uto32,
   11676                              unop( Iop_32to16, mkexpr(irt_regM) )
   11677                        ) ) );
   11678 
   11679         IRExpr* ire_result
   11680           = binop( Iop_Or32,
   11681                    binop( Iop_Shl32, mkexpr(irt_sum), mkU8(16) ),
   11682                    binop( Iop_And32, mkexpr(irt_diff), mkU32(0xFFFF) ) );
   11683 
   11684         IRTemp ge10 = newTemp(Ity_I32);
   11685         assign(ge10, unop(Iop_Not32, mkexpr(irt_diff)));
   11686         put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
   11687         put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
   11688 
   11689         IRTemp ge32 = newTemp(Ity_I32);
   11690         assign( ge32, IRExpr_ITE( binop( Iop_CmpLE32U,
   11691                                          mkU32(0x10000), mkexpr(irt_sum) ),
   11692                                   mkU32(1), mkU32(0) ) );
   11693         put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
   11694         put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
   11695 
   11696         if (isT)
   11697            putIRegT( regD, ire_result, condT );
   11698         else
   11699            putIRegA( regD, ire_result, condT, Ijk_Boring );
   11700 
   11701         DIP( "uasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   11702         return True;
   11703      }
   11704      /* fall through */
   11705    }
   11706 
   11707    /* ------------------- ssax<c> <Rd>,<Rn>,<Rm> ------------------- */
   11708    {
   11709      UInt regD = 99, regN = 99, regM = 99;
   11710      Bool gate = False;
   11711 
   11712      if (isT) {
   11713         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
   11714            regN = INSNT0(3,0);
   11715            regD = INSNT1(11,8);
   11716            regM = INSNT1(3,0);
   11717            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11718               gate = True;
   11719         }
   11720      } else {
   11721         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
   11722             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11723             INSNA(7,4)   == BITS4(0,1,0,1)) {
   11724            regD = INSNA(15,12);
   11725            regN = INSNA(19,16);
   11726            regM = INSNA(3,0);
   11727            if (regD != 15 && regN != 15 && regM != 15)
   11728               gate = True;
   11729         }
   11730      }
   11731 
   11732      if (gate) {
   11733         IRTemp irt_regN = newTemp(Ity_I32);
   11734         IRTemp irt_regM = newTemp(Ity_I32);
   11735         IRTemp irt_sum  = newTemp(Ity_I32);
   11736         IRTemp irt_diff = newTemp(Ity_I32);
   11737 
   11738         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   11739         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   11740 
   11741         assign( irt_sum,
   11742                 binop( Iop_Add32,
   11743                        binop( Iop_Sar32,
   11744                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   11745                               mkU8(16) ),
   11746                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
   11747 
   11748         assign( irt_diff,
   11749                 binop( Iop_Sub32,
   11750                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
   11751                        binop( Iop_Sar32,
   11752                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
   11753                               mkU8(16) ) ) );
   11754 
   11755         IRExpr* ire_result
   11756           = binop( Iop_Or32,
   11757                    binop( Iop_Shl32, mkexpr(irt_diff), mkU8(16) ),
   11758                    binop( Iop_And32, mkexpr(irt_sum), mkU32(0xFFFF) ) );
   11759 
   11760         IRTemp ge10 = newTemp(Ity_I32);
   11761         assign(ge10, unop(Iop_Not32, mkexpr(irt_sum)));
   11762         put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
   11763         put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
   11764 
   11765         IRTemp ge32 = newTemp(Ity_I32);
   11766         assign(ge32, unop(Iop_Not32, mkexpr(irt_diff)));
   11767         put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
   11768         put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
   11769 
   11770         if (isT)
   11771            putIRegT( regD, ire_result, condT );
   11772         else
   11773            putIRegA( regD, ire_result, condT, Ijk_Boring );
   11774 
   11775         DIP( "ssax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   11776         return True;
   11777      }
   11778      /* fall through */
   11779    }
   11780 
   11781    /* ----------------- shsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
   11782    {
   11783      UInt regD = 99, regN = 99, regM = 99;
   11784      Bool gate = False;
   11785 
   11786      if (isT) {
   11787         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
   11788            regN = INSNT0(3,0);
   11789            regD = INSNT1(11,8);
   11790            regM = INSNT1(3,0);
   11791            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11792               gate = True;
   11793         }
   11794      } else {
   11795         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
   11796             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11797             INSNA(7,4)   == BITS4(1,1,1,1)) {
   11798            regD = INSNA(15,12);
   11799            regN = INSNA(19,16);
   11800            regM = INSNA(3,0);
   11801            if (regD != 15 && regN != 15 && regM != 15)
   11802               gate = True;
   11803         }
   11804      }
   11805 
   11806      if (gate) {
   11807         IRTemp rNt   = newTemp(Ity_I32);
   11808         IRTemp rMt   = newTemp(Ity_I32);
   11809         IRTemp res_q = newTemp(Ity_I32);
   11810 
   11811         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11812         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11813 
   11814         assign(res_q, binop(Iop_HSub8Sx4, mkexpr(rNt), mkexpr(rMt)));
   11815         if (isT)
   11816            putIRegT( regD, mkexpr(res_q), condT );
   11817         else
   11818            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11819 
   11820         DIP("shsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   11821         return True;
   11822      }
   11823      /* fall through */
   11824    }
   11825 
   11826    /* ----------------- sxtab16<c> Rd,Rn,Rm{,rot} ------------------ */
   11827    {
   11828      UInt regD = 99, regN = 99, regM = 99, rotate = 99;
   11829      Bool gate = False;
   11830 
   11831      if (isT) {
   11832         if (INSNT0(15,4) == 0xFA2 && (INSNT1(15,0) & 0xF0C0) == 0xF080) {
   11833            regN   = INSNT0(3,0);
   11834            regD   = INSNT1(11,8);
   11835            regM   = INSNT1(3,0);
   11836            rotate = INSNT1(5,4);
   11837            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11838               gate = True;
   11839         }
   11840      } else {
   11841         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
   11842             INSNA(9,4)   == BITS6(0,0,0,1,1,1) ) {
   11843            regD   = INSNA(15,12);
   11844            regN   = INSNA(19,16);
   11845            regM   = INSNA(3,0);
   11846            rotate = INSNA(11,10);
   11847            if (regD != 15 && regN != 15 && regM != 15)
   11848              gate = True;
   11849         }
   11850      }
   11851 
   11852      if (gate) {
   11853         IRTemp irt_regN = newTemp(Ity_I32);
   11854         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   11855 
   11856         IRTemp irt_regM = newTemp(Ity_I32);
   11857         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   11858 
   11859         IRTemp irt_rot = newTemp(Ity_I32);
   11860         assign( irt_rot, genROR32(irt_regM, 8 * rotate) );
   11861 
   11862         /* FIXME Maybe we can write this arithmetic in shorter form. */
   11863         IRExpr* resLo
   11864            = binop(Iop_And32,
   11865                    binop(Iop_Add32,
   11866                          mkexpr(irt_regN),
   11867                          unop(Iop_16Uto32,
   11868                               unop(Iop_8Sto16,
   11869                                    unop(Iop_32to8, mkexpr(irt_rot))))),
   11870                    mkU32(0x0000FFFF));
   11871 
   11872         IRExpr* resHi
   11873            = binop(Iop_And32,
   11874                    binop(Iop_Add32,
   11875                          mkexpr(irt_regN),
   11876                          binop(Iop_Shl32,
   11877                                unop(Iop_16Uto32,
   11878                                     unop(Iop_8Sto16,
   11879                                          unop(Iop_32to8,
   11880                                               binop(Iop_Shr32,
   11881                                                     mkexpr(irt_rot),
   11882                                                     mkU8(16))))),
   11883                                mkU8(16))),
   11884                    mkU32(0xFFFF0000));
   11885 
   11886         IRExpr* ire_result
   11887            = binop( Iop_Or32, resHi, resLo );
   11888 
   11889         if (isT)
   11890            putIRegT( regD, ire_result, condT );
   11891         else
   11892            putIRegA( regD, ire_result, condT, Ijk_Boring );
   11893 
   11894         DIP( "sxtab16%s r%u, r%u, r%u, ROR #%u\n",
   11895              nCC(conq), regD, regN, regM, 8 * rotate );
   11896         return True;
   11897      }
   11898      /* fall through */
   11899    }
   11900 
   11901    /* ----------------- shasx<c> <Rd>,<Rn>,<Rm> ------------------- */
   11902    {
   11903      UInt regD = 99, regN = 99, regM = 99;
   11904      Bool gate = False;
   11905 
   11906      if (isT) {
   11907         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
   11908            regN = INSNT0(3,0);
   11909            regD = INSNT1(11,8);
   11910            regM = INSNT1(3,0);
   11911            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11912               gate = True;
   11913         }
   11914      } else {
   11915         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
   11916             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11917             INSNA(7,4)   == BITS4(0,0,1,1)) {
   11918            regD = INSNA(15,12);
   11919            regN = INSNA(19,16);
   11920            regM = INSNA(3,0);
   11921            if (regD != 15 && regN != 15 && regM != 15)
   11922               gate = True;
   11923         }
   11924      }
   11925 
   11926      if (gate) {
   11927         IRTemp rNt   = newTemp(Ity_I32);
   11928         IRTemp rMt   = newTemp(Ity_I32);
   11929         IRTemp irt_diff  = newTemp(Ity_I32);
   11930         IRTemp irt_sum   = newTemp(Ity_I32);
   11931         IRTemp res_q = newTemp(Ity_I32);
   11932 
   11933         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11934         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11935 
   11936         assign( irt_diff,
   11937                 binop(Iop_Sub32,
   11938                       unop(Iop_16Sto32,
   11939                            unop(Iop_32to16,
   11940                                 mkexpr(rNt)
   11941                            )
   11942                       ),
   11943                       unop(Iop_16Sto32,
   11944                            unop(Iop_32to16,
   11945                                 binop(Iop_Shr32,
   11946                                       mkexpr(rMt), mkU8(16)
   11947                                 )
   11948                            )
   11949                       )
   11950                 )
   11951         );
   11952 
   11953         assign( irt_sum,
   11954                 binop(Iop_Add32,
   11955                       unop(Iop_16Sto32,
   11956                            unop(Iop_32to16,
   11957                                 binop(Iop_Shr32,
   11958                                       mkexpr(rNt), mkU8(16)
   11959                                 )
   11960                            )
   11961                       ),
   11962                       unop(Iop_16Sto32,
   11963                            unop(Iop_32to16, mkexpr(rMt)
   11964                            )
   11965                       )
   11966                 )
   11967         );
   11968 
   11969         assign( res_q,
   11970                 binop(Iop_Or32,
   11971                       unop(Iop_16Uto32,
   11972                            unop(Iop_32to16,
   11973                                 binop(Iop_Shr32,
   11974                                       mkexpr(irt_diff), mkU8(1)
   11975                                 )
   11976                            )
   11977                       ),
   11978                       binop(Iop_Shl32,
   11979                             binop(Iop_Shr32,
   11980                                   mkexpr(irt_sum), mkU8(1)
   11981                             ),
   11982                             mkU8(16)
   11983                      )
   11984                 )
   11985         );
   11986 
   11987         if (isT)
   11988            putIRegT( regD, mkexpr(res_q), condT );
   11989         else
   11990            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11991 
   11992         DIP("shasx%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   11993         return True;
   11994      }
   11995      /* fall through */
   11996    }
   11997 
   11998    /* ----------------- uhasx<c> <Rd>,<Rn>,<Rm> ------------------- */
   11999    {
   12000      UInt regD = 99, regN = 99, regM = 99;
   12001      Bool gate = False;
   12002 
   12003      if (isT) {
   12004         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
   12005            regN = INSNT0(3,0);
   12006            regD = INSNT1(11,8);
   12007            regM = INSNT1(3,0);
   12008            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   12009               gate = True;
   12010         }
   12011      } else {
   12012         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
   12013             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   12014             INSNA(7,4)   == BITS4(0,0,1,1)) {
   12015            regD = INSNA(15,12);
   12016            regN = INSNA(19,16);
   12017            regM = INSNA(3,0);
   12018            if (regD != 15 && regN != 15 && regM != 15)
   12019               gate = True;
   12020         }
   12021      }
   12022 
   12023      if (gate) {
   12024         IRTemp rNt   = newTemp(Ity_I32);
   12025         IRTemp rMt   = newTemp(Ity_I32);
   12026         IRTemp irt_diff  = newTemp(Ity_I32);
   12027         IRTemp irt_sum   = newTemp(Ity_I32);
   12028         IRTemp res_q = newTemp(Ity_I32);
   12029 
   12030         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   12031         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   12032 
   12033         assign( irt_diff,
   12034                 binop(Iop_Sub32,
   12035                       unop(Iop_16Uto32,
   12036                            unop(Iop_32to16,
   12037                                 mkexpr(rNt)
   12038                            )
   12039                       ),
   12040                       unop(Iop_16Uto32,
   12041                            unop(Iop_32to16,
   12042                                 binop(Iop_Shr32,
   12043                                       mkexpr(rMt), mkU8(16)
   12044                                 )
   12045                            )
   12046                       )
   12047                 )
   12048         );
   12049 
   12050         assign( irt_sum,
   12051                 binop(Iop_Add32,
   12052                       unop(Iop_16Uto32,
   12053                            unop(Iop_32to16,
   12054                                 binop(Iop_Shr32,
   12055                                       mkexpr(rNt), mkU8(16)
   12056                                 )
   12057                            )
   12058                       ),
   12059                       unop(Iop_16Uto32,
   12060                            unop(Iop_32to16, mkexpr(rMt)
   12061                            )
   12062                       )
   12063                 )
   12064         );
   12065 
   12066         assign( res_q,
   12067                 binop(Iop_Or32,
   12068                       unop(Iop_16Uto32,
   12069                            unop(Iop_32to16,
   12070                                 binop(Iop_Shr32,
   12071                                       mkexpr(irt_diff), mkU8(1)
   12072                                 )
   12073                            )
   12074                       ),
   12075                       binop(Iop_Shl32,
   12076                             binop(Iop_Shr32,
   12077                                   mkexpr(irt_sum), mkU8(1)
   12078                             ),
   12079                             mkU8(16)
   12080                      )
   12081                 )
   12082         );
   12083 
   12084         if (isT)
   12085            putIRegT( regD, mkexpr(res_q), condT );
   12086         else
   12087            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   12088 
   12089         DIP("uhasx%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   12090         return True;
   12091      }
   12092      /* fall through */
   12093    }
   12094 
   12095    /* ----------------- shsax<c> <Rd>,<Rn>,<Rm> ------------------- */
   12096    {
   12097      UInt regD = 99, regN = 99, regM = 99;
   12098      Bool gate = False;
   12099 
   12100      if (isT) {
   12101         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
   12102            regN = INSNT0(3,0);
   12103            regD = INSNT1(11,8);
   12104            regM = INSNT1(3,0);
   12105            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   12106               gate = True;
   12107         }
   12108      } else {
   12109         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
   12110             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   12111             INSNA(7,4)   == BITS4(0,1,0,1)) {
   12112            regD = INSNA(15,12);
   12113            regN = INSNA(19,16);
   12114            regM = INSNA(3,0);
   12115            if (regD != 15 && regN != 15 && regM != 15)
   12116               gate = True;
   12117         }
   12118      }
   12119 
   12120      if (gate) {
   12121         IRTemp rNt   = newTemp(Ity_I32);
   12122         IRTemp rMt   = newTemp(Ity_I32);
   12123         IRTemp irt_diff  = newTemp(Ity_I32);
   12124         IRTemp irt_sum   = newTemp(Ity_I32);
   12125         IRTemp res_q = newTemp(Ity_I32);
   12126 
   12127         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   12128         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   12129 
   12130         assign( irt_sum,
   12131                 binop(Iop_Add32,
   12132                       unop(Iop_16Sto32,
   12133                            unop(Iop_32to16,
   12134                                 mkexpr(rNt)
   12135                            )
   12136                       ),
   12137                       unop(Iop_16Sto32,
   12138                            unop(Iop_32to16,
   12139                                 binop(Iop_Shr32,
   12140                                       mkexpr(rMt), mkU8(16)
   12141                                 )
   12142                            )
   12143                       )
   12144                 )
   12145         );
   12146 
   12147         assign( irt_diff,
   12148                 binop(Iop_Sub32,
   12149                       unop(Iop_16Sto32,
   12150                            unop(Iop_32to16,
   12151                                 binop(Iop_Shr32,
   12152                                       mkexpr(rNt), mkU8(16)
   12153                                 )
   12154                            )
   12155                       ),
   12156                       unop(Iop_16Sto32,
   12157                            unop(Iop_32to16, mkexpr(rMt)
   12158                            )
   12159                       )
   12160                 )
   12161         );
   12162 
   12163         assign( res_q,
   12164                 binop(Iop_Or32,
   12165                       unop(Iop_16Uto32,
   12166                            unop(Iop_32to16,
   12167                                 binop(Iop_Shr32,
   12168                                       mkexpr(irt_sum), mkU8(1)
   12169                                 )
   12170                            )
   12171                       ),
   12172                       binop(Iop_Shl32,
   12173                             binop(Iop_Shr32,
   12174                                   mkexpr(irt_diff), mkU8(1)
   12175                             ),
   12176                             mkU8(16)
   12177                      )
   12178                 )
   12179         );
   12180 
   12181         if (isT)
   12182            putIRegT( regD, mkexpr(res_q), condT );
   12183         else
   12184            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   12185 
   12186         DIP("shsax%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   12187         return True;
   12188      }
   12189      /* fall through */
   12190    }
   12191 
   12192    /* ----------------- uhsax<c> <Rd>,<Rn>,<Rm> ------------------- */
   12193    {
   12194      UInt regD = 99, regN = 99, regM = 99;
   12195      Bool gate = False;
   12196 
   12197      if (isT) {
   12198         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
   12199            regN = INSNT0(3,0);
   12200            regD = INSNT1(11,8);
   12201            regM = INSNT1(3,0);
   12202            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   12203               gate = True;
   12204         }
   12205      } else {
   12206         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
   12207             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   12208             INSNA(7,4)   == BITS4(0,1,0,1)) {
   12209            regD = INSNA(15,12);
   12210            regN = INSNA(19,16);
   12211            regM = INSNA(3,0);
   12212            if (regD != 15 && regN != 15 && regM != 15)
   12213               gate = True;
   12214         }
   12215      }
   12216 
   12217      if (gate) {
   12218         IRTemp rNt   = newTemp(Ity_I32);
   12219         IRTemp rMt   = newTemp(Ity_I32);
   12220         IRTemp irt_diff  = newTemp(Ity_I32);
   12221         IRTemp irt_sum   = newTemp(Ity_I32);
   12222         IRTemp res_q = newTemp(Ity_I32);
   12223 
   12224         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   12225         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   12226 
   12227         assign( irt_sum,
   12228                 binop(Iop_Add32,
   12229                       unop(Iop_16Uto32,
   12230                            unop(Iop_32to16,
   12231                                 mkexpr(rNt)
   12232                            )
   12233                       ),
   12234                       unop(Iop_16Uto32,
   12235                            unop(Iop_32to16,
   12236                                 binop(Iop_Shr32,
   12237                                       mkexpr(rMt), mkU8(16)
   12238                                 )
   12239                            )
   12240                       )
   12241                 )
   12242         );
   12243 
   12244         assign( irt_diff,
   12245                 binop(Iop_Sub32,
   12246                       unop(Iop_16Uto32,
   12247                            unop(Iop_32to16,
   12248                                 binop(Iop_Shr32,
   12249                                       mkexpr(rNt), mkU8(16)
   12250                                 )
   12251                            )
   12252                       ),
   12253                       unop(Iop_16Uto32,
   12254                            unop(Iop_32to16, mkexpr(rMt)
   12255                            )
   12256                       )
   12257                 )
   12258         );
   12259 
   12260         assign( res_q,
   12261                 binop(Iop_Or32,
   12262                       unop(Iop_16Uto32,
   12263                            unop(Iop_32to16,
   12264                                 binop(Iop_Shr32,
   12265                                       mkexpr(irt_sum), mkU8(1)
   12266                                 )
   12267                            )
   12268                       ),
   12269                       binop(Iop_Shl32,
   12270                             binop(Iop_Shr32,
   12271                                   mkexpr(irt_diff), mkU8(1)
   12272                             ),
   12273                             mkU8(16)
   12274                      )
   12275                 )
   12276         );
   12277 
   12278         if (isT)
   12279            putIRegT( regD, mkexpr(res_q), condT );
   12280         else
   12281            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   12282 
   12283         DIP("uhsax%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   12284         return True;
   12285      }
   12286      /* fall through */
   12287    }
   12288 
   12289    /* ----------------- shsub16<c> <Rd>,<Rn>,<Rm> ------------------- */
   12290    {
   12291      UInt regD = 99, regN = 99, regM = 99;
   12292      Bool gate = False;
   12293 
   12294      if (isT) {
   12295         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
   12296            regN = INSNT0(3,0);
   12297            regD = INSNT1(11,8);
   12298            regM = INSNT1(3,0);
   12299            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   12300               gate = True;
   12301         }
   12302      } else {
   12303         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
   12304             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   12305             INSNA(7,4)   == BITS4(0,1,1,1)) {
   12306            regD = INSNA(15,12);
   12307            regN = INSNA(19,16);
   12308            regM = INSNA(3,0);
   12309            if (regD != 15 && regN != 15 && regM != 15)
   12310               gate = True;
   12311         }
   12312      }
   12313 
   12314      if (gate) {
   12315         IRTemp rNt   = newTemp(Ity_I32);
   12316         IRTemp rMt   = newTemp(Ity_I32);
   12317         IRTemp res_q = newTemp(Ity_I32);
   12318 
   12319         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   12320         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   12321 
   12322         assign(res_q, binop(Iop_HSub16Sx2, mkexpr(rNt), mkexpr(rMt)));
   12323         if (isT)
   12324            putIRegT( regD, mkexpr(res_q), condT );
   12325         else
   12326            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   12327 
   12328         DIP("shsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   12329         return True;
   12330      }
   12331      /* fall through */
   12332    }
   12333 
   12334    /* ----------------- smmls{r}<c> <Rd>,<Rn>,<Rm>,<Ra> ------------------- */
   12335    {
   12336      UInt rD = 99, rN = 99, rM = 99, rA = 99;
   12337      Bool round  = False;
   12338      Bool gate   = False;
   12339 
   12340      if (isT) {
   12341         if (INSNT0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
   12342             && INSNT0(6,4) == BITS3(1,1,0)
   12343             && INSNT1(7,5) == BITS3(0,0,0)) {
   12344            round = INSNT1(4,4);
   12345            rA    = INSNT1(15,12);
   12346            rD    = INSNT1(11,8);
   12347            rM    = INSNT1(3,0);
   12348            rN    = INSNT0(3,0);
   12349            if (!isBadRegT(rD)
   12350                && !isBadRegT(rN) && !isBadRegT(rM) && !isBadRegT(rA))
   12351               gate = True;
   12352         }
   12353      } else {
   12354         if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,1)
   12355             && INSNA(15,12) != BITS4(1,1,1,1)
   12356             && (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(1,1,0,1)) {
   12357            round = INSNA(5,5);
   12358            rD    = INSNA(19,16);
   12359            rA    = INSNA(15,12);
   12360            rM    = INSNA(11,8);
   12361            rN    = INSNA(3,0);
   12362            if (rD != 15 && rM != 15 && rN != 15)
   12363               gate = True;
   12364         }
   12365      }
   12366      if (gate) {
   12367         IRTemp irt_rA   = newTemp(Ity_I32);
   12368         IRTemp irt_rN   = newTemp(Ity_I32);
   12369         IRTemp irt_rM   = newTemp(Ity_I32);
   12370         assign( irt_rA, isT ? getIRegT(rA) : getIRegA(rA) );
   12371         assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN) );
   12372         assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM) );
   12373         IRExpr* res
   12374         = unop(Iop_64HIto32,
   12375                binop(Iop_Add64,
   12376                      binop(Iop_Sub64,
   12377                            binop(Iop_32HLto64, mkexpr(irt_rA), mkU32(0)),
   12378                            binop(Iop_MullS32, mkexpr(irt_rN), mkexpr(irt_rM))),
   12379                      mkU64(round ? 0x80000000ULL : 0ULL)));
   12380         if (isT)
   12381            putIRegT( rD, res, condT );
   12382         else
   12383            putIRegA(rD, res, condT, Ijk_Boring);
   12384         DIP("smmls%s%s r%u, r%u, r%u, r%u\n",
   12385             round ? "r" : "", nCC(conq), rD, rN, rM, rA);
   12386         return True;
   12387      }
   12388      /* fall through */
   12389    }
   12390 
   12391    /* -------------- smlald{x}<c> <RdLo>,<RdHi>,<Rn>,<Rm> ---------------- */
   12392    {
   12393      UInt rN = 99, rDlo = 99, rDhi = 99, rM = 99;
   12394      Bool m_swap = False;
   12395      Bool gate   = False;
   12396 
   12397      if (isT) {
   12398         if (INSNT0(15,4) == 0xFBC &&
   12399             (INSNT1(7,4) & BITS4(1,1,1,0)) == BITS4(1,1,0,0)) {
   12400            rN     = INSNT0(3,0);
   12401            rDlo   = INSNT1(15,12);
   12402            rDhi   = INSNT1(11,8);
   12403            rM     = INSNT1(3,0);
   12404            m_swap = (INSNT1(4,4) & 1) == 1;
   12405            if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
   12406                && !isBadRegT(rM) && rDhi != rDlo)
   12407               gate = True;
   12408         }
   12409      } else {
   12410         if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,0)
   12411             && (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
   12412            rN     = INSNA(3,0);
   12413            rDlo   = INSNA(15,12);
   12414            rDhi   = INSNA(19,16);
   12415            rM     = INSNA(11,8);
   12416            m_swap = ( INSNA(5,5) & 1 ) == 1;
   12417            if (rDlo != 15 && rDhi != 15
   12418                && rN != 15 && rM != 15 && rDlo != rDhi)
   12419               gate = True;
   12420         }
   12421      }
   12422 
   12423      if (gate) {
   12424         IRTemp irt_rM   = newTemp(Ity_I32);
   12425         IRTemp irt_rN   = newTemp(Ity_I32);
   12426         IRTemp irt_rDhi = newTemp(Ity_I32);
   12427         IRTemp irt_rDlo = newTemp(Ity_I32);
   12428         IRTemp op_2     = newTemp(Ity_I32);
   12429         IRTemp pr_1     = newTemp(Ity_I64);
   12430         IRTemp pr_2     = newTemp(Ity_I64);
   12431         IRTemp result   = newTemp(Ity_I64);
   12432         IRTemp resHi    = newTemp(Ity_I32);
   12433         IRTemp resLo    = newTemp(Ity_I32);
   12434         assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM));
   12435         assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN));
   12436         assign( irt_rDhi, isT ? getIRegT(rDhi) : getIRegA(rDhi));
   12437         assign( irt_rDlo, isT ? getIRegT(rDlo) : getIRegA(rDlo));
   12438         assign( op_2, genROR32(irt_rM, m_swap ? 16 : 0) );
   12439         assign( pr_1, binop(Iop_MullS32,
   12440                             unop(Iop_16Sto32,
   12441                                  unop(Iop_32to16, mkexpr(irt_rN))
   12442                             ),
   12443                             unop(Iop_16Sto32,
   12444                                  unop(Iop_32to16, mkexpr(op_2))
   12445                             )
   12446                       )
   12447         );
   12448         assign( pr_2, binop(Iop_MullS32,
   12449                             binop(Iop_Sar32, mkexpr(irt_rN), mkU8(16)),
   12450                             binop(Iop_Sar32, mkexpr(op_2), mkU8(16))
   12451                       )
   12452         );
   12453         assign( result, binop(Iop_Add64,
   12454                               binop(Iop_Add64,
   12455                                     mkexpr(pr_1),
   12456                                     mkexpr(pr_2)
   12457                               ),
   12458                               binop(Iop_32HLto64,
   12459                                     mkexpr(irt_rDhi),
   12460                                     mkexpr(irt_rDlo)
   12461                               )
   12462                         )
   12463         );
   12464         assign( resHi, unop(Iop_64HIto32, mkexpr(result)) );
   12465         assign( resLo, unop(Iop_64to32, mkexpr(result)) );
   12466         if (isT) {
   12467            putIRegT( rDhi, mkexpr(resHi), condT );
   12468            putIRegT( rDlo, mkexpr(resLo), condT );
   12469         } else {
   12470            putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
   12471            putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
   12472         }
   12473         DIP("smlald%c%s r%u, r%u, r%u, r%u\n",
   12474             m_swap ? 'x' : ' ', nCC(conq), rDlo, rDhi, rN, rM);
   12475         return True;
   12476      }
   12477      /* fall through */
   12478    }
   12479 
   12480    /* -------------- smlsld{x}<c> <RdLo>,<RdHi>,<Rn>,<Rm> ---------------- */
   12481    {
   12482      UInt rN = 99, rDlo = 99, rDhi = 99, rM = 99;
   12483      Bool m_swap = False;
   12484      Bool gate   = False;
   12485 
   12486      if (isT) {
   12487         if ((INSNT0(15,4) == 0xFBD &&
   12488             (INSNT1(7,4) & BITS4(1,1,1,0)) == BITS4(1,1,0,0))) {
   12489            rN     = INSNT0(3,0);
   12490            rDlo   = INSNT1(15,12);
   12491            rDhi   = INSNT1(11,8);
   12492            rM     = INSNT1(3,0);
   12493            m_swap = (INSNT1(4,4) & 1) == 1;
   12494            if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN) &&
   12495                !isBadRegT(rM) && rDhi != rDlo)
   12496               gate = True;
   12497         }
   12498      } else {
   12499         if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,0) &&
   12500             (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(0,1,0,1)) {
   12501            rN     = INSNA(3,0);
   12502            rDlo   = INSNA(15,12);
   12503            rDhi   = INSNA(19,16);
   12504            rM     = INSNA(11,8);
   12505            m_swap = (INSNA(5,5) & 1) == 1;
   12506            if (rDlo != 15 && rDhi != 15 &&
   12507                rN != 15 && rM != 15 && rDlo != rDhi)
   12508               gate = True;
   12509         }
   12510      }
   12511      if (gate) {
   12512         IRTemp irt_rM   = newTemp(Ity_I32);
   12513         IRTemp irt_rN   = newTemp(Ity_I32);
   12514         IRTemp irt_rDhi = newTemp(Ity_I32);
   12515         IRTemp irt_rDlo = newTemp(Ity_I32);
   12516         IRTemp op_2     = newTemp(Ity_I32);
   12517         IRTemp pr_1     = newTemp(Ity_I64);
   12518         IRTemp pr_2     = newTemp(Ity_I64);
   12519         IRTemp result   = newTemp(Ity_I64);
   12520         IRTemp resHi    = newTemp(Ity_I32);
   12521         IRTemp resLo    = newTemp(Ity_I32);
   12522         assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM) );
   12523         assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN) );
   12524         assign( irt_rDhi, isT ? getIRegT(rDhi) : getIRegA(rDhi) );
   12525         assign( irt_rDlo, isT ? getIRegT(rDlo) : getIRegA(rDlo) );
   12526         assign( op_2, genROR32(irt_rM, m_swap ? 16 : 0) );
   12527         assign( pr_1, binop(Iop_MullS32,
   12528                             unop(Iop_16Sto32,
   12529                                  unop(Iop_32to16, mkexpr(irt_rN))
   12530                             ),
   12531                             unop(Iop_16Sto32,
   12532                                  unop(Iop_32to16, mkexpr(op_2))
   12533                             )
   12534                       )
   12535         );
   12536         assign( pr_2, binop(Iop_MullS32,
   12537                             binop(Iop_Sar32, mkexpr(irt_rN), mkU8(16)),
   12538                             binop(Iop_Sar32, mkexpr(op_2), mkU8(16))
   12539                       )
   12540         );
   12541         assign( result, binop(Iop_Add64,
   12542                               binop(Iop_Sub64,
   12543                                     mkexpr(pr_1),
   12544                                     mkexpr(pr_2)
   12545                               ),
   12546                               binop(Iop_32HLto64,
   12547                                     mkexpr(irt_rDhi),
   12548                                     mkexpr(irt_rDlo)
   12549                               )
   12550                         )
   12551         );
   12552         assign( resHi, unop(Iop_64HIto32, mkexpr(result)) );
   12553         assign( resLo, unop(Iop_64to32, mkexpr(result)) );
   12554         if (isT) {
   12555            putIRegT( rDhi, mkexpr(resHi), condT );
   12556            putIRegT( rDlo, mkexpr(resLo), condT );
   12557         } else {
   12558            putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
   12559            putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
   12560         }
   12561         DIP("smlsld%c%s r%u, r%u, r%u, r%u\n",
   12562             m_swap ? 'x' : ' ', nCC(conq), rDlo, rDhi, rN, rM);
   12563         return True;
   12564      }
   12565      /* fall through */
   12566    }
   12567 
   12568    /* ---------- Doesn't match anything. ---------- */
   12569    return False;
   12570 
   12571 #  undef INSNA
   12572 #  undef INSNT0
   12573 #  undef INSNT1
   12574 }
   12575 
   12576 
   12577 /*------------------------------------------------------------*/
   12578 /*--- LDMxx/STMxx helper (both ARM and Thumb32)            ---*/
   12579 /*------------------------------------------------------------*/
   12580 
   12581 /* Generate IR for LDMxx and STMxx.  This is complex.  Assumes it's
   12582    unconditional, so the caller must produce a jump-around before
   12583    calling this, if the insn is to be conditional.  Caller is
   12584    responsible for all validation of parameters.  For LDMxx, if PC is
   12585    amongst the values loaded, caller is also responsible for
   12586    generating the jump. */
   12587 static void mk_ldm_stm ( Bool arm,     /* True: ARM, False: Thumb */
   12588                          UInt rN,      /* base reg */
   12589                          UInt bINC,    /* 1: inc,  0: dec */
   12590                          UInt bBEFORE, /* 1: inc/dec before, 0: after */
   12591                          UInt bW,      /* 1: writeback to Rn */
   12592                          UInt bL,      /* 1: load, 0: store */
   12593                          UInt regList )
   12594 {
   12595    Int i, r, m, nRegs;
   12596    IRTemp jk = Ijk_Boring;
   12597 
   12598    /* Get hold of the old Rn value.  We might need to write its value
   12599       to memory during a store, and if it's also the writeback
   12600       register then we need to get its value now.  We can't treat it
   12601       exactly like the other registers we're going to transfer,
   12602       because for xxMDA and xxMDB writeback forms, the generated IR
   12603       updates Rn in the guest state before any transfers take place.
   12604       We have to do this as per comments below, in order that if Rn is
   12605       the stack pointer then it always has a value is below or equal
   12606       to any of the transfer addresses.  Ick. */
   12607    IRTemp oldRnT = newTemp(Ity_I32);
   12608    assign(oldRnT, arm ? getIRegA(rN) : getIRegT(rN));
   12609 
   12610    IRTemp anchorT = newTemp(Ity_I32);
   12611    /* The old (Addison-Wesley) ARM ARM seems to say that LDMxx/STMxx
   12612       ignore the bottom two bits of the address.  However, Cortex-A8
   12613       doesn't seem to care.  Hence: */
   12614    /* No .. don't force alignment .. */
   12615    /* assign(anchorT, binop(Iop_And32, mkexpr(oldRnT), mkU32(~3U))); */
   12616    /* Instead, use the potentially misaligned address directly. */
   12617    assign(anchorT, mkexpr(oldRnT));
   12618 
   12619    IROp opADDorSUB = bINC ? Iop_Add32 : Iop_Sub32;
   12620    // bINC == 1:  xxMIA, xxMIB
   12621    // bINC == 0:  xxMDA, xxMDB
   12622 
   12623    // For xxMDA and xxMDB, update Rn first if necessary.  We have
   12624    // to do this first so that, for the common idiom of the transfers
   12625    // faulting because we're pushing stuff onto a stack and the stack
   12626    // is growing down onto allocate-on-fault pages (as Valgrind simulates),
   12627    // we need to have the SP up-to-date "covering" (pointing below) the
   12628    // transfer area.  For the same reason, if we are doing xxMIA or xxMIB,
   12629    // do the transfer first, and then update rN afterwards.
   12630    nRegs = 0;
   12631    for (i = 0; i < 16; i++) {
   12632      if ((regList & (1 << i)) != 0)
   12633          nRegs++;
   12634    }
   12635    if (bW == 1 && !bINC) {
   12636       IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
   12637       if (arm)
   12638          putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
   12639       else
   12640          putIRegT( rN, e, IRTemp_INVALID );
   12641    }
   12642 
   12643    // Make up a list of the registers to transfer, and their offsets
   12644    // in memory relative to the anchor.  If the base reg (Rn) is part
   12645    // of the transfer, then do it last for a load and first for a store.
   12646    UInt xReg[16], xOff[16];
   12647    Int  nX = 0;
   12648    m = 0;
   12649    for (i = 0; i < 16; i++) {
   12650       r = bINC ? i : (15-i);
   12651       if (0 == (regList & (1<<r)))
   12652          continue;
   12653       if (bBEFORE)
   12654          m++;
   12655       /* paranoia: check we aren't transferring the writeback
   12656          register during a load. Should be assured by decode-point
   12657          check above. */
   12658       if (bW == 1 && bL == 1)
   12659          vassert(r != rN);
   12660 
   12661       xOff[nX] = 4 * m;
   12662       xReg[nX] = r;
   12663       nX++;
   12664 
   12665       if (!bBEFORE)
   12666          m++;
   12667    }
   12668    vassert(m == nRegs);
   12669    vassert(nX == nRegs);
   12670    vassert(nX <= 16);
   12671 
   12672    if (bW == 0 && (regList & (1<<rN)) != 0) {
   12673       /* Non-writeback, and basereg is to be transferred.  Do its
   12674          transfer last for a load and first for a store.  Requires
   12675          reordering xOff/xReg. */
   12676       if (0) {
   12677          vex_printf("\nREG_LIST_PRE: (rN=%u)\n", rN);
   12678          for (i = 0; i < nX; i++)
   12679             vex_printf("reg %u   off %u\n", xReg[i], xOff[i]);
   12680          vex_printf("\n");
   12681       }
   12682 
   12683       vassert(nX > 0);
   12684       for (i = 0; i < nX; i++) {
   12685          if (xReg[i] == rN)
   12686              break;
   12687       }
   12688       vassert(i < nX); /* else we didn't find it! */
   12689       UInt tReg = xReg[i];
   12690       UInt tOff = xOff[i];
   12691       if (bL == 1) {
   12692          /* load; make this transfer happen last */
   12693          if (i < nX-1) {
   12694             for (m = i+1; m < nX; m++) {
   12695                xReg[m-1] = xReg[m];
   12696                xOff[m-1] = xOff[m];
   12697             }
   12698             vassert(m == nX);
   12699             xReg[m-1] = tReg;
   12700             xOff[m-1] = tOff;
   12701          }
   12702       } else {
   12703          /* store; make this transfer happen first */
   12704          if (i > 0) {
   12705             for (m = i-1; m >= 0; m--) {
   12706                xReg[m+1] = xReg[m];
   12707                xOff[m+1] = xOff[m];
   12708             }
   12709             vassert(m == -1);
   12710             xReg[0] = tReg;
   12711             xOff[0] = tOff;
   12712          }
   12713       }
   12714 
   12715       if (0) {
   12716          vex_printf("REG_LIST_POST:\n");
   12717          for (i = 0; i < nX; i++)
   12718             vex_printf("reg %u   off %u\n", xReg[i], xOff[i]);
   12719          vex_printf("\n");
   12720       }
   12721    }
   12722 
   12723    /* According to the Cortex A8 TRM Sec. 5.2.1, LDM(1) with r13 as the base
   12724        register and PC in the register list is a return for purposes of branch
   12725        prediction.
   12726       The ARM ARM Sec. C9.10.1 further specifies that writeback must be enabled
   12727        to be counted in event 0x0E (Procedure return).*/
   12728    if (rN == 13 && bL == 1 && bINC && !bBEFORE && bW == 1) {
   12729       jk = Ijk_Ret;
   12730    }
   12731 
   12732    /* Actually generate the transfers */
   12733    for (i = 0; i < nX; i++) {
   12734       r = xReg[i];
   12735       if (bL == 1) {
   12736          IRExpr* e = loadLE(Ity_I32,
   12737                             binop(opADDorSUB, mkexpr(anchorT),
   12738                                   mkU32(xOff[i])));
   12739          if (arm) {
   12740             putIRegA( r, e, IRTemp_INVALID, jk );
   12741          } else {
   12742             // no: putIRegT( r, e, IRTemp_INVALID );
   12743             // putIRegT refuses to write to R15.  But that might happen.
   12744             // Since this is uncond, and we need to be able to
   12745             // write the PC, just use the low level put:
   12746             llPutIReg( r, e );
   12747          }
   12748       } else {
   12749          /* if we're storing Rn, make sure we use the correct
   12750             value, as per extensive comments above */
   12751          storeLE( binop(opADDorSUB, mkexpr(anchorT), mkU32(xOff[i])),
   12752                   r == rN ? mkexpr(oldRnT)
   12753                           : (arm ? getIRegA(r) : getIRegT(r) ) );
   12754       }
   12755    }
   12756 
   12757    // If we are doing xxMIA or xxMIB,
   12758    // do the transfer first, and then update rN afterwards.
   12759    if (bW == 1 && bINC) {
   12760       IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
   12761       if (arm)
   12762          putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
   12763       else
   12764          putIRegT( rN, e, IRTemp_INVALID );
   12765    }
   12766 }
   12767 
   12768 
   12769 /*------------------------------------------------------------*/
   12770 /*--- VFP (CP 10 and 11) instructions                      ---*/
   12771 /*------------------------------------------------------------*/
   12772 
   12773 /* Both ARM and Thumb */
   12774 
   12775 /* Translate a CP10 or CP11 instruction.  If successful, returns
   12776    True and *dres may or may not be updated.  If failure, returns
   12777    False and doesn't change *dres nor create any IR.
   12778 
   12779    The ARM and Thumb encodings are identical for the low 28 bits of
   12780    the insn (yay!) and that's what the caller must supply, iow, imm28
   12781    has the top 4 bits masked out.  Caller is responsible for
   12782    determining whether the masked-out bits are valid for a CP10/11
   12783    insn.  The rules for the top 4 bits are:
   12784 
   12785      ARM: 0000 to 1110 allowed, and this is the gating condition.
   12786      1111 (NV) is not allowed.
   12787 
   12788      Thumb: must be 1110.  The gating condition is taken from
   12789      ITSTATE in the normal way.
   12790 
   12791    Conditionalisation:
   12792 
   12793    Caller must supply an IRTemp 'condT' holding the gating condition,
   12794    or IRTemp_INVALID indicating the insn is always executed.
   12795 
   12796    Caller must also supply an ARMCondcode 'cond'.  This is only used
   12797    for debug printing, no other purpose.  For ARM, this is simply the
   12798    top 4 bits of the original instruction.  For Thumb, the condition
   12799    is not (really) known until run time, and so ARMCondAL should be
   12800    passed, only so that printing of these instructions does not show
   12801    any condition.
   12802 
   12803    Finally, the caller must indicate whether this occurs in ARM or
   12804    Thumb code.
   12805 */
   12806 static Bool decode_CP10_CP11_instruction (
   12807                /*MOD*/DisResult* dres,
   12808                UInt              insn28,
   12809                IRTemp            condT,
   12810                ARMCondcode       conq,
   12811                Bool              isT
   12812             )
   12813 {
   12814 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn28, (_bMax), (_bMin))
   12815 
   12816    vassert(INSN(31,28) == BITS4(0,0,0,0)); // caller's obligation
   12817 
   12818    if (isT) {
   12819       vassert(conq == ARMCondAL);
   12820    } else {
   12821       vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
   12822    }
   12823 
   12824    /* ----------------------------------------------------------- */
   12825    /* -- VFP instructions -- double precision (mostly)         -- */
   12826    /* ----------------------------------------------------------- */
   12827 
   12828    /* --------------------- fldmx, fstmx --------------------- */
   12829    /*
   12830                                  31   27   23   19 15 11   7   0
   12831                                          P U WL
   12832       C4-100, C5-26  1  FSTMX    cond 1100 1000 Rn Dd 1011 offset
   12833       C4-100, C5-28  2  FSTMIAX  cond 1100 1010 Rn Dd 1011 offset
   12834       C4-100, C5-30  3  FSTMDBX  cond 1101 0010 Rn Dd 1011 offset
   12835 
   12836       C4-42, C5-26   1  FLDMX    cond 1100 1001 Rn Dd 1011 offset
   12837       C4-42, C5-28   2  FLDMIAX  cond 1100 1011 Rn Dd 1011 offset
   12838       C4-42, C5-30   3  FLDMDBX  cond 1101 0011 Rn Dd 1011 offset
   12839 
   12840       Regs transferred: Dd .. D(d + (offset-3)/2)
   12841       offset must be odd, must not imply a reg > 15
   12842       IA/DB: Rn is changed by (4 + 8 x # regs transferred)
   12843 
   12844       case coding:
   12845          1  at-Rn   (access at Rn)
   12846          2  ia-Rn   (access at Rn, then Rn += 4+8n)
   12847          3  db-Rn   (Rn -= 4+8n,   then access at Rn)
   12848    */
   12849    if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
   12850        && INSN(11,8) == BITS4(1,0,1,1)) {
   12851       UInt bP      = (insn28 >> 24) & 1;
   12852       UInt bU      = (insn28 >> 23) & 1;
   12853       UInt bW      = (insn28 >> 21) & 1;
   12854       UInt bL      = (insn28 >> 20) & 1;
   12855       UInt offset  = (insn28 >> 0) & 0xFF;
   12856       UInt rN      = INSN(19,16);
   12857       UInt dD      = (INSN(22,22) << 4) | INSN(15,12);
   12858       UInt nRegs   = (offset - 1) / 2;
   12859       UInt summary = 0;
   12860       Int  i;
   12861 
   12862       /**/ if (bP == 0 && bU == 1 && bW == 0) {
   12863          summary = 1;
   12864       }
   12865       else if (bP == 0 && bU == 1 && bW == 1) {
   12866          summary = 2;
   12867       }
   12868       else if (bP == 1 && bU == 0 && bW == 1) {
   12869          summary = 3;
   12870       }
   12871       else goto after_vfp_fldmx_fstmx;
   12872 
   12873       /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
   12874       if (rN == 15 && (summary == 2 || summary == 3 || isT))
   12875          goto after_vfp_fldmx_fstmx;
   12876 
   12877       /* offset must be odd, and specify at least one register */
   12878       if (0 == (offset & 1) || offset < 3)
   12879          goto after_vfp_fldmx_fstmx;
   12880 
   12881       /* can't transfer regs after D15 */
   12882       if (dD + nRegs - 1 >= 32)
   12883          goto after_vfp_fldmx_fstmx;
   12884 
   12885       /* Now, we can't do a conditional load or store, since that very
   12886          likely will generate an exception.  So we have to take a side
   12887          exit at this point if the condition is false. */
   12888       if (condT != IRTemp_INVALID) {
   12889          if (isT)
   12890             mk_skip_over_T32_if_cond_is_false( condT );
   12891          else
   12892             mk_skip_over_A32_if_cond_is_false( condT );
   12893          condT = IRTemp_INVALID;
   12894       }
   12895       /* Ok, now we're unconditional.  Do the load or store. */
   12896 
   12897       /* get the old Rn value */
   12898       IRTemp rnT = newTemp(Ity_I32);
   12899       assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
   12900                            rN == 15));
   12901 
   12902       /* make a new value for Rn, post-insn */
   12903       IRTemp rnTnew = IRTemp_INVALID;
   12904       if (summary == 2 || summary == 3) {
   12905          rnTnew = newTemp(Ity_I32);
   12906          assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
   12907                               mkexpr(rnT),
   12908                               mkU32(4 + 8 * nRegs)));
   12909       }
   12910 
   12911       /* decide on the base transfer address */
   12912       IRTemp taT = newTemp(Ity_I32);
   12913       assign(taT,  summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
   12914 
   12915       /* update Rn if necessary -- in case 3, we're moving it down, so
   12916          update before any memory reference, in order to keep Memcheck
   12917          and V's stack-extending logic (on linux) happy */
   12918       if (summary == 3) {
   12919          if (isT)
   12920             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
   12921          else
   12922             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
   12923       }
   12924 
   12925       /* generate the transfers */
   12926       for (i = 0; i < nRegs; i++) {
   12927          IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
   12928          if (bL) {
   12929             putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
   12930          } else {
   12931             storeLE(addr, getDReg(dD + i));
   12932          }
   12933       }
   12934 
   12935       /* update Rn if necessary -- in case 2, we're moving it up, so
   12936          update after any memory reference, in order to keep Memcheck
   12937          and V's stack-extending logic (on linux) happy */
   12938       if (summary == 2) {
   12939          if (isT)
   12940             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
   12941          else
   12942             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
   12943       }
   12944 
   12945       const HChar* nm = bL==1 ? "ld" : "st";
   12946       switch (summary) {
   12947          case 1:  DIP("f%smx%s r%u, {d%u-d%u}\n",
   12948                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
   12949                   break;
   12950          case 2:  DIP("f%smiax%s r%u!, {d%u-d%u}\n",
   12951                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
   12952                   break;
   12953          case 3:  DIP("f%smdbx%s r%u!, {d%u-d%u}\n",
   12954                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
   12955                   break;
   12956          default: vassert(0);
   12957       }
   12958 
   12959       goto decode_success_vfp;
   12960       /* FIXME alignment constraints? */
   12961    }
   12962 
   12963   after_vfp_fldmx_fstmx:
   12964 
   12965    /* --------------------- fldmd, fstmd --------------------- */
   12966    /*
   12967                                  31   27   23   19 15 11   7   0
   12968                                          P U WL
   12969       C4-96, C5-26   1  FSTMD    cond 1100 1000 Rn Dd 1011 offset
   12970       C4-96, C5-28   2  FSTMDIA  cond 1100 1010 Rn Dd 1011 offset
   12971       C4-96, C5-30   3  FSTMDDB  cond 1101 0010 Rn Dd 1011 offset
   12972 
   12973       C4-38, C5-26   1  FLDMD    cond 1100 1001 Rn Dd 1011 offset
   12974       C4-38, C5-28   2  FLDMIAD  cond 1100 1011 Rn Dd 1011 offset
   12975       C4-38, C5-30   3  FLDMDBD  cond 1101 0011 Rn Dd 1011 offset
   12976 
   12977       Regs transferred: Dd .. D(d + (offset-2)/2)
   12978       offset must be even, must not imply a reg > 15
   12979       IA/DB: Rn is changed by (8 x # regs transferred)
   12980 
   12981       case coding:
   12982          1  at-Rn   (access at Rn)
   12983          2  ia-Rn   (access at Rn, then Rn += 8n)
   12984          3  db-Rn   (Rn -= 8n,     then access at Rn)
   12985    */
   12986    if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
   12987        && INSN(11,8) == BITS4(1,0,1,1)) {
   12988       UInt bP      = (insn28 >> 24) & 1;
   12989       UInt bU      = (insn28 >> 23) & 1;
   12990       UInt bW      = (insn28 >> 21) & 1;
   12991       UInt bL      = (insn28 >> 20) & 1;
   12992       UInt offset  = (insn28 >> 0) & 0xFF;
   12993       UInt rN      = INSN(19,16);
   12994       UInt dD      = (INSN(22,22) << 4) | INSN(15,12);
   12995       UInt nRegs   = offset / 2;
   12996       UInt summary = 0;
   12997       Int  i;
   12998 
   12999       /**/ if (bP == 0 && bU == 1 && bW == 0) {
   13000          summary = 1;
   13001       }
   13002       else if (bP == 0 && bU == 1 && bW == 1) {
   13003          summary = 2;
   13004       }
   13005       else if (bP == 1 && bU == 0 && bW == 1) {
   13006          summary = 3;
   13007       }
   13008       else goto after_vfp_fldmd_fstmd;
   13009 
   13010       /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
   13011       if (rN == 15 && (summary == 2 || summary == 3 || isT))
   13012          goto after_vfp_fldmd_fstmd;
   13013 
   13014       /* offset must be even, and specify at least one register */
   13015       if (1 == (offset & 1) || offset < 2)
   13016          goto after_vfp_fldmd_fstmd;
   13017 
   13018       /* can't transfer regs after D15 */
   13019       if (dD + nRegs - 1 >= 32)
   13020          goto after_vfp_fldmd_fstmd;
   13021 
   13022       /* Now, we can't do a conditional load or store, since that very
   13023          likely will generate an exception.  So we have to take a side
   13024          exit at this point if the condition is false. */
   13025       if (condT != IRTemp_INVALID) {
   13026          if (isT)
   13027             mk_skip_over_T32_if_cond_is_false( condT );
   13028          else
   13029             mk_skip_over_A32_if_cond_is_false( condT );
   13030          condT = IRTemp_INVALID;
   13031       }
   13032       /* Ok, now we're unconditional.  Do the load or store. */
   13033 
   13034       /* get the old Rn value */
   13035       IRTemp rnT = newTemp(Ity_I32);
   13036       assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
   13037                            rN == 15));
   13038 
   13039       /* make a new value for Rn, post-insn */
   13040       IRTemp rnTnew = IRTemp_INVALID;
   13041       if (summary == 2 || summary == 3) {
   13042          rnTnew = newTemp(Ity_I32);
   13043          assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
   13044                               mkexpr(rnT),
   13045                               mkU32(8 * nRegs)));
   13046       }
   13047 
   13048       /* decide on the base transfer address */
   13049       IRTemp taT = newTemp(Ity_I32);
   13050       assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
   13051 
   13052       /* update Rn if necessary -- in case 3, we're moving it down, so
   13053          update before any memory reference, in order to keep Memcheck
   13054          and V's stack-extending logic (on linux) happy */
   13055       if (summary == 3) {
   13056          if (isT)
   13057             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
   13058          else
   13059             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
   13060       }
   13061 
   13062       /* generate the transfers */
   13063       for (i = 0; i < nRegs; i++) {
   13064          IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
   13065          if (bL) {
   13066             putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
   13067          } else {
   13068             storeLE(addr, getDReg(dD + i));
   13069          }
   13070       }
   13071 
   13072       /* update Rn if necessary -- in case 2, we're moving it up, so
   13073          update after any memory reference, in order to keep Memcheck
   13074          and V's stack-extending logic (on linux) happy */
   13075       if (summary == 2) {
   13076          if (isT)
   13077             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
   13078          else
   13079             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
   13080       }
   13081 
   13082       const HChar* nm = bL==1 ? "ld" : "st";
   13083       switch (summary) {
   13084          case 1:  DIP("f%smd%s r%u, {d%u-d%u}\n",
   13085                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
   13086                   break;
   13087          case 2:  DIP("f%smiad%s r%u!, {d%u-d%u}\n",
   13088                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
   13089                   break;
   13090          case 3:  DIP("f%smdbd%s r%u!, {d%u-d%u}\n",
   13091                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
   13092                   break;
   13093          default: vassert(0);
   13094       }
   13095 
   13096       goto decode_success_vfp;
   13097       /* FIXME alignment constraints? */
   13098    }
   13099 
   13100   after_vfp_fldmd_fstmd:
   13101 
   13102    /* ------------------- fmrx, fmxr ------------------- */
   13103    if (BITS8(1,1,1,0,1,1,1,1) == INSN(27,20)
   13104        && BITS4(1,0,1,0) == INSN(11,8)
   13105        && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
   13106       UInt rD  = INSN(15,12);
   13107       UInt reg = INSN(19,16);
   13108       if (reg == BITS4(0,0,0,1)) {
   13109          if (rD == 15) {
   13110             IRTemp nzcvT = newTemp(Ity_I32);
   13111             /* When rD is 15, we are copying the top 4 bits of FPSCR
   13112                into CPSR.  That is, set the flags thunk to COPY and
   13113                install FPSCR[31:28] as the value to copy. */
   13114             assign(nzcvT, binop(Iop_And32,
   13115                                 IRExpr_Get(OFFB_FPSCR, Ity_I32),
   13116                                 mkU32(0xF0000000)));
   13117             setFlags_D1(ARMG_CC_OP_COPY, nzcvT, condT);
   13118             DIP("fmstat%s\n", nCC(conq));
   13119          } else {
   13120             /* Otherwise, merely transfer FPSCR to r0 .. r14. */
   13121             IRExpr* e = IRExpr_Get(OFFB_FPSCR, Ity_I32);
   13122             if (isT)
   13123                putIRegT(rD, e, condT);
   13124             else
   13125                putIRegA(rD, e, condT, Ijk_Boring);
   13126             DIP("fmrx%s r%u, fpscr\n", nCC(conq), rD);
   13127          }
   13128          goto decode_success_vfp;
   13129       }
   13130       /* fall through */
   13131    }
   13132 
   13133    if (BITS8(1,1,1,0,1,1,1,0) == INSN(27,20)
   13134        && BITS4(1,0,1,0) == INSN(11,8)
   13135        && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
   13136       UInt rD  = INSN(15,12);
   13137       UInt reg = INSN(19,16);
   13138       if (reg == BITS4(0,0,0,1)) {
   13139          putMiscReg32(OFFB_FPSCR,
   13140                       isT ? getIRegT(rD) : getIRegA(rD), condT);
   13141          DIP("fmxr%s fpscr, r%u\n", nCC(conq), rD);
   13142          goto decode_success_vfp;
   13143       }
   13144       /* fall through */
   13145    }
   13146 
   13147    /* --------------------- vmov --------------------- */
   13148    // VMOV dM, rD, rN
   13149    if (0x0C400B10 == (insn28 & 0x0FF00FD0)) {
   13150       UInt dM = INSN(3,0) | (INSN(5,5) << 4);
   13151       UInt rD = INSN(15,12); /* lo32 */
   13152       UInt rN = INSN(19,16); /* hi32 */
   13153       if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))) {
   13154          /* fall through */
   13155       } else {
   13156          putDReg(dM,
   13157                  unop(Iop_ReinterpI64asF64,
   13158                       binop(Iop_32HLto64,
   13159                             isT ? getIRegT(rN) : getIRegA(rN),
   13160                             isT ? getIRegT(rD) : getIRegA(rD))),
   13161                  condT);
   13162          DIP("vmov%s d%u, r%u, r%u\n", nCC(conq), dM, rD, rN);
   13163          goto decode_success_vfp;
   13164       }
   13165       /* fall through */
   13166    }
   13167 
   13168    // VMOV rD, rN, dM
   13169    if (0x0C500B10 == (insn28 & 0x0FF00FD0)) {
   13170       UInt dM = INSN(3,0) | (INSN(5,5) << 4);
   13171       UInt rD = INSN(15,12); /* lo32 */
   13172       UInt rN = INSN(19,16); /* hi32 */
   13173       if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))
   13174           || rD == rN) {
   13175          /* fall through */
   13176       } else {
   13177          IRTemp i64 = newTemp(Ity_I64);
   13178          assign(i64, unop(Iop_ReinterpF64asI64, getDReg(dM)));
   13179          IRExpr* hi32 = unop(Iop_64HIto32, mkexpr(i64));
   13180          IRExpr* lo32 = unop(Iop_64to32,   mkexpr(i64));
   13181          if (isT) {
   13182             putIRegT(rN, hi32, condT);
   13183             putIRegT(rD, lo32, condT);
   13184          } else {
   13185             putIRegA(rN, hi32, condT, Ijk_Boring);
   13186             putIRegA(rD, lo32, condT, Ijk_Boring);
   13187          }
   13188          DIP("vmov%s r%u, r%u, d%u\n", nCC(conq), rD, rN, dM);
   13189          goto decode_success_vfp;
   13190       }
   13191       /* fall through */
   13192    }
   13193 
   13194    // VMOV sD, sD+1, rN, rM
   13195    if (0x0C400A10 == (insn28 & 0x0FF00FD0)) {
   13196       UInt sD = (INSN(3,0) << 1) | INSN(5,5);
   13197       UInt rN = INSN(15,12);
   13198       UInt rM = INSN(19,16);
   13199       if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
   13200           || sD == 31) {
   13201          /* fall through */
   13202       } else {
   13203          putFReg(sD,
   13204                  unop(Iop_ReinterpI32asF32, isT ? getIRegT(rN) : getIRegA(rN)),
   13205                  condT);
   13206          putFReg(sD+1,
   13207                  unop(Iop_ReinterpI32asF32, isT ? getIRegT(rM) : getIRegA(rM)),
   13208                  condT);
   13209          DIP("vmov%s, s%u, s%u, r%u, r%u\n",
   13210               nCC(conq), sD, sD + 1, rN, rM);
   13211          goto decode_success_vfp;
   13212       }
   13213    }
   13214 
   13215    // VMOV rN, rM, sD, sD+1
   13216    if (0x0C500A10 == (insn28 & 0x0FF00FD0)) {
   13217       UInt sD = (INSN(3,0) << 1) | INSN(5,5);
   13218       UInt rN = INSN(15,12);
   13219       UInt rM = INSN(19,16);
   13220       if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
   13221           || sD == 31 || rN == rM) {
   13222          /* fall through */
   13223       } else {
   13224          IRExpr* res0 = unop(Iop_ReinterpF32asI32, getFReg(sD));
   13225          IRExpr* res1 = unop(Iop_ReinterpF32asI32, getFReg(sD+1));
   13226          if (isT) {
   13227             putIRegT(rN, res0, condT);
   13228             putIRegT(rM, res1, condT);
   13229          } else {
   13230             putIRegA(rN, res0, condT, Ijk_Boring);
   13231             putIRegA(rM, res1, condT, Ijk_Boring);
   13232          }
   13233          DIP("vmov%s, r%u, r%u, s%u, s%u\n",
   13234              nCC(conq), rN, rM, sD, sD + 1);
   13235          goto decode_success_vfp;
   13236       }
   13237    }
   13238 
   13239    // VMOV rD[x], rT  (ARM core register to scalar)
   13240    if (0x0E000B10 == (insn28 & 0x0F900F1F)) {
   13241       UInt rD  = (INSN(7,7) << 4) | INSN(19,16);
   13242       UInt rT  = INSN(15,12);
   13243       UInt opc = (INSN(22,21) << 2) | INSN(6,5);
   13244       UInt index;
   13245       if (rT == 15 || (isT && rT == 13)) {
   13246          /* fall through */
   13247       } else {
   13248          if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
   13249             index = opc & 7;
   13250             putDRegI64(rD, triop(Iop_SetElem8x8,
   13251                                  getDRegI64(rD),
   13252                                  mkU8(index),
   13253                                  unop(Iop_32to8,
   13254                                       isT ? getIRegT(rT) : getIRegA(rT))),
   13255                            condT);
   13256             DIP("vmov%s.8 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
   13257             goto decode_success_vfp;
   13258          }
   13259          else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
   13260             index = (opc >> 1) & 3;
   13261             putDRegI64(rD, triop(Iop_SetElem16x4,
   13262                                  getDRegI64(rD),
   13263                                  mkU8(index),
   13264                                  unop(Iop_32to16,
   13265                                       isT ? getIRegT(rT) : getIRegA(rT))),
   13266                            condT);
   13267             DIP("vmov%s.16 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
   13268             goto decode_success_vfp;
   13269          }
   13270          else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0)) {
   13271             index = (opc >> 2) & 1;
   13272             putDRegI64(rD, triop(Iop_SetElem32x2,
   13273                                  getDRegI64(rD),
   13274                                  mkU8(index),
   13275                                  isT ? getIRegT(rT) : getIRegA(rT)),
   13276                            condT);
   13277             DIP("vmov%s.32 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
   13278             goto decode_success_vfp;
   13279          } else {
   13280             /* fall through */
   13281          }
   13282       }
   13283    }
   13284 
   13285    // VMOV (scalar to ARM core register)
   13286    // VMOV rT, rD[x]
   13287    if (0x0E100B10 == (insn28 & 0x0F100F1F)) {
   13288       UInt rN  = (INSN(7,7) << 4) | INSN(19,16);
   13289       UInt rT  = INSN(15,12);
   13290       UInt U   = INSN(23,23);
   13291       UInt opc = (INSN(22,21) << 2) | INSN(6,5);
   13292       UInt index;
   13293       if (rT == 15 || (isT && rT == 13)) {
   13294          /* fall through */
   13295       } else {
   13296          if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
   13297             index = opc & 7;
   13298             IRExpr* e = unop(U ? Iop_8Uto32 : Iop_8Sto32,
   13299                              binop(Iop_GetElem8x8,
   13300                                    getDRegI64(rN),
   13301                                    mkU8(index)));
   13302             if (isT)
   13303                putIRegT(rT, e, condT);
   13304             else
   13305                putIRegA(rT, e, condT, Ijk_Boring);
   13306             DIP("vmov%s.%c8 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
   13307                   rT, rN, index);
   13308             goto decode_success_vfp;
   13309          }
   13310          else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
   13311             index = (opc >> 1) & 3;
   13312             IRExpr* e = unop(U ? Iop_16Uto32 : Iop_16Sto32,
   13313                              binop(Iop_GetElem16x4,
   13314                                    getDRegI64(rN),
   13315                                    mkU8(index)));
   13316             if (isT)
   13317                putIRegT(rT, e, condT);
   13318             else
   13319                putIRegA(rT, e, condT, Ijk_Boring);
   13320             DIP("vmov%s.%c16 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
   13321                   rT, rN, index);
   13322             goto decode_success_vfp;
   13323          }
   13324          else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0) && U == 0) {
   13325             index = (opc >> 2) & 1;
   13326             IRExpr* e = binop(Iop_GetElem32x2, getDRegI64(rN), mkU8(index));
   13327             if (isT)
   13328                putIRegT(rT, e, condT);
   13329             else
   13330                putIRegA(rT, e, condT, Ijk_Boring);
   13331             DIP("vmov%s.32 r%u, d%u[%u]\n", nCC(conq), rT, rN, index);
   13332             goto decode_success_vfp;
   13333          } else {
   13334             /* fall through */
   13335          }
   13336       }
   13337    }
   13338 
   13339    // VMOV.F32 sD, #imm
   13340    // FCONSTS sD, #imm
   13341    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   13342        && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,0)) {
   13343       UInt rD   = (INSN(15,12) << 1) | INSN(22,22);
   13344       UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
   13345       UInt b    = (imm8 >> 6) & 1;
   13346       UInt imm;
   13347       imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,(imm8 >> 5) & 1) << 8)
   13348              | ((imm8 & 0x1f) << 3);
   13349       imm <<= 16;
   13350       putFReg(rD, unop(Iop_ReinterpI32asF32, mkU32(imm)), condT);
   13351       DIP("fconsts%s s%u #%u", nCC(conq), rD, imm8);
   13352       goto decode_success_vfp;
   13353    }
   13354 
   13355    // VMOV.F64 dD, #imm
   13356    // FCONSTD dD, #imm
   13357    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   13358        && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,1)) {
   13359       UInt rD   = INSN(15,12) | (INSN(22,22) << 4);
   13360       UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
   13361       UInt b    = (imm8 >> 6) & 1;
   13362       ULong imm;
   13363       imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,b) << 8)
   13364              | BITS8(b,b,0,0,0,0,0,0) | (imm8 & 0x3f);
   13365       imm <<= 48;
   13366       putDReg(rD, unop(Iop_ReinterpI64asF64, mkU64(imm)), condT);
   13367       DIP("fconstd%s d%u #%u", nCC(conq), rD, imm8);
   13368       goto decode_success_vfp;
   13369    }
   13370 
   13371    /* ---------------------- vdup ------------------------- */
   13372    // VDUP dD, rT
   13373    // VDUP qD, rT
   13374    if (BITS8(1,1,1,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,1))
   13375        && BITS4(1,0,1,1) == INSN(11,8) && INSN(6,6) == 0 && INSN(4,4) == 1) {
   13376       UInt rD   = (INSN(7,7) << 4) | INSN(19,16);
   13377       UInt rT   = INSN(15,12);
   13378       UInt Q    = INSN(21,21);
   13379       UInt size = (INSN(22,22) << 1) | INSN(5,5);
   13380       if (rT == 15 || (isT && rT == 13) || size == 3 || (Q && (rD & 1))) {
   13381          /* fall through */
   13382       } else {
   13383          IRExpr* e = isT ? getIRegT(rT) : getIRegA(rT);
   13384          if (Q) {
   13385             rD >>= 1;
   13386             switch (size) {
   13387                case 0:
   13388                   putQReg(rD, unop(Iop_Dup32x4, e), condT);
   13389                   break;
   13390                case 1:
   13391                   putQReg(rD, unop(Iop_Dup16x8, unop(Iop_32to16, e)),
   13392                               condT);
   13393                   break;
   13394                case 2:
   13395                   putQReg(rD, unop(Iop_Dup8x16, unop(Iop_32to8, e)),
   13396                               condT);
   13397                   break;
   13398                default:
   13399                   vassert(0);
   13400             }
   13401             DIP("vdup.%d q%u, r%u\n", 32 / (1<<size), rD, rT);
   13402          } else {
   13403             switch (size) {
   13404                case 0:
   13405                   putDRegI64(rD, unop(Iop_Dup32x2, e), condT);
   13406                   break;
   13407                case 1:
   13408                   putDRegI64(rD, unop(Iop_Dup16x4, unop(Iop_32to16, e)),
   13409                                condT);
   13410                   break;
   13411                case 2:
   13412                   putDRegI64(rD, unop(Iop_Dup8x8, unop(Iop_32to8, e)),
   13413                                condT);
   13414                   break;
   13415                default:
   13416                   vassert(0);
   13417             }
   13418             DIP("vdup.%d d%u, r%u\n", 32 / (1<<size), rD, rT);
   13419          }
   13420          goto decode_success_vfp;
   13421       }
   13422    }
   13423 
   13424    /* --------------------- f{ld,st}d --------------------- */
   13425    // FLDD, FSTD
   13426    if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
   13427        && BITS4(1,0,1,1) == INSN(11,8)) {
   13428       UInt dD     = INSN(15,12) | (INSN(22,22) << 4);
   13429       UInt rN     = INSN(19,16);
   13430       UInt offset = (insn28 & 0xFF) << 2;
   13431       UInt bU     = (insn28 >> 23) & 1; /* 1: +offset  0: -offset */
   13432       UInt bL     = (insn28 >> 20) & 1; /* 1: load  0: store */
   13433       /* make unconditional */
   13434       if (condT != IRTemp_INVALID) {
   13435          if (isT)
   13436             mk_skip_over_T32_if_cond_is_false( condT );
   13437          else
   13438             mk_skip_over_A32_if_cond_is_false( condT );
   13439          condT = IRTemp_INVALID;
   13440       }
   13441       IRTemp ea = newTemp(Ity_I32);
   13442       assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
   13443                        align4if(isT ? getIRegT(rN) : getIRegA(rN),
   13444                                 rN == 15),
   13445                        mkU32(offset)));
   13446       if (bL) {
   13447          putDReg(dD, loadLE(Ity_F64,mkexpr(ea)), IRTemp_INVALID);
   13448       } else {
   13449          storeLE(mkexpr(ea), getDReg(dD));
   13450       }
   13451       DIP("f%sd%s d%u, [r%u, %c#%u]\n",
   13452           bL ? "ld" : "st", nCC(conq), dD, rN,
   13453           bU ? '+' : '-', offset);
   13454       goto decode_success_vfp;
   13455    }
   13456 
   13457    /* --------------------- dp insns (D) --------------------- */
   13458    if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
   13459        && BITS4(1,0,1,1) == INSN(11,8)
   13460        && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
   13461       UInt    dM  = INSN(3,0)   | (INSN(5,5) << 4);       /* argR */
   13462       UInt    dD  = INSN(15,12) | (INSN(22,22) << 4);   /* dst/acc */
   13463       UInt    dN  = INSN(19,16) | (INSN(7,7) << 4);     /* argL */
   13464       UInt    bP  = (insn28 >> 23) & 1;
   13465       UInt    bQ  = (insn28 >> 21) & 1;
   13466       UInt    bR  = (insn28 >> 20) & 1;
   13467       UInt    bS  = (insn28 >> 6) & 1;
   13468       UInt    opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
   13469       IRExpr* rm  = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
   13470       switch (opc) {
   13471          case BITS4(0,0,0,0): /* MAC: d + n * m */
   13472             putDReg(dD, triop(Iop_AddF64, rm,
   13473                               getDReg(dD),
   13474                               triop(Iop_MulF64, rm, getDReg(dN),
   13475                                                     getDReg(dM))),
   13476                         condT);
   13477             DIP("fmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13478             goto decode_success_vfp;
   13479          case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
   13480             putDReg(dD, triop(Iop_AddF64, rm,
   13481                               getDReg(dD),
   13482                               unop(Iop_NegF64,
   13483                                    triop(Iop_MulF64, rm, getDReg(dN),
   13484                                                          getDReg(dM)))),
   13485                         condT);
   13486             DIP("fnmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13487             goto decode_success_vfp;
   13488          case BITS4(0,0,1,0): /* MSC: - d + n * m */
   13489             putDReg(dD, triop(Iop_AddF64, rm,
   13490                               unop(Iop_NegF64, getDReg(dD)),
   13491                               triop(Iop_MulF64, rm, getDReg(dN),
   13492                                                     getDReg(dM))),
   13493                         condT);
   13494             DIP("fmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13495             goto decode_success_vfp;
   13496          case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
   13497             putDReg(dD, triop(Iop_AddF64, rm,
   13498                               unop(Iop_NegF64, getDReg(dD)),
   13499                               unop(Iop_NegF64,
   13500                                    triop(Iop_MulF64, rm, getDReg(dN),
   13501                                                          getDReg(dM)))),
   13502                         condT);
   13503             DIP("fnmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13504             goto decode_success_vfp;
   13505          case BITS4(0,1,0,0): /* MUL: n * m */
   13506             putDReg(dD, triop(Iop_MulF64, rm, getDReg(dN), getDReg(dM)),
   13507                         condT);
   13508             DIP("fmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13509             goto decode_success_vfp;
   13510          case BITS4(0,1,0,1): /* NMUL: - n * m */
   13511             putDReg(dD, unop(Iop_NegF64,
   13512                              triop(Iop_MulF64, rm, getDReg(dN),
   13513                                                    getDReg(dM))),
   13514                     condT);
   13515             DIP("fnmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13516             goto decode_success_vfp;
   13517          case BITS4(0,1,1,0): /* ADD: n + m */
   13518             putDReg(dD, triop(Iop_AddF64, rm, getDReg(dN), getDReg(dM)),
   13519                         condT);
   13520             DIP("faddd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13521             goto decode_success_vfp;
   13522          case BITS4(0,1,1,1): /* SUB: n - m */
   13523             putDReg(dD, triop(Iop_SubF64, rm, getDReg(dN), getDReg(dM)),
   13524                         condT);
   13525             DIP("fsubd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13526             goto decode_success_vfp;
   13527          case BITS4(1,0,0,0): /* DIV: n / m */
   13528             putDReg(dD, triop(Iop_DivF64, rm, getDReg(dN), getDReg(dM)),
   13529                         condT);
   13530             DIP("fdivd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13531             goto decode_success_vfp;
   13532          case BITS4(1,0,1,0): /* VNFMS: -(d - n * m) (fused) */
   13533             /* XXXROUNDINGFIXME look up ARM reference for fused
   13534                multiply-add rounding */
   13535             putDReg(dD, triop(Iop_AddF64, rm,
   13536                               unop(Iop_NegF64, getDReg(dD)),
   13537                               triop(Iop_MulF64, rm,
   13538                                                 getDReg(dN),
   13539                                                 getDReg(dM))),
   13540                         condT);
   13541             DIP("vfnmsd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13542             goto decode_success_vfp;
   13543          case BITS4(1,0,1,1): /* VNFMA: -(d + n * m) (fused) */
   13544             /* XXXROUNDINGFIXME look up ARM reference for fused
   13545                multiply-add rounding */
   13546             putDReg(dD, triop(Iop_AddF64, rm,
   13547                               unop(Iop_NegF64, getDReg(dD)),
   13548                               triop(Iop_MulF64, rm,
   13549                                                 unop(Iop_NegF64, getDReg(dN)),
   13550                                                 getDReg(dM))),
   13551                         condT);
   13552             DIP("vfnmad%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13553             goto decode_success_vfp;
   13554          case BITS4(1,1,0,0): /* VFMA: d + n * m (fused) */
   13555             /* XXXROUNDINGFIXME look up ARM reference for fused
   13556                multiply-add rounding */
   13557             putDReg(dD, triop(Iop_AddF64, rm,
   13558                               getDReg(dD),
   13559                               triop(Iop_MulF64, rm, getDReg(dN),
   13560                                                     getDReg(dM))),
   13561                         condT);
   13562             DIP("vfmad%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13563             goto decode_success_vfp;
   13564          case BITS4(1,1,0,1): /* VFMS: d + (-n * m) (fused) */
   13565             /* XXXROUNDINGFIXME look up ARM reference for fused
   13566                multiply-add rounding */
   13567             putDReg(dD, triop(Iop_AddF64, rm,
   13568                               getDReg(dD),
   13569                               triop(Iop_MulF64, rm,
   13570                                     unop(Iop_NegF64, getDReg(dN)),
   13571                                     getDReg(dM))),
   13572                         condT);
   13573             DIP("vfmsd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13574             goto decode_success_vfp;
   13575          default:
   13576             break;
   13577       }
   13578    }
   13579 
   13580    /* --------------------- compares (D) --------------------- */
   13581    /*          31   27   23   19   15 11   7    3
   13582                  28   24   20   16 12    8    4    0
   13583       FCMPD    cond 1110 1D11 0100 Dd 1011 0100 Dm
   13584       FCMPED   cond 1110 1D11 0100 Dd 1011 1100 Dm
   13585       FCMPZD   cond 1110 1D11 0101 Dd 1011 0100 0000
   13586       FCMPZED  cond 1110 1D11 0101 Dd 1011 1100 0000
   13587                                  Z         N
   13588 
   13589       Z=0 Compare Dd vs Dm     and set FPSCR 31:28 accordingly
   13590       Z=1 Compare Dd vs zero
   13591 
   13592       N=1 generates Invalid Operation exn if either arg is any kind of NaN
   13593       N=0 generates Invalid Operation exn if either arg is a signalling NaN
   13594       (Not that we pay any attention to N here)
   13595    */
   13596    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   13597        && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
   13598        && BITS4(1,0,1,1) == INSN(11,8)
   13599        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   13600       UInt bZ = (insn28 >> 16) & 1;
   13601       UInt bN = (insn28 >> 7) & 1;
   13602       UInt dD = INSN(15,12) | (INSN(22,22) << 4);
   13603       UInt dM = INSN(3,0) | (INSN(5,5) << 4);
   13604       if (bZ && INSN(3,0) != 0) {
   13605          /* does not decode; fall through */
   13606       } else {
   13607          IRTemp argL = newTemp(Ity_F64);
   13608          IRTemp argR = newTemp(Ity_F64);
   13609          IRTemp irRes = newTemp(Ity_I32);
   13610          assign(argL, getDReg(dD));
   13611          assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0)) : getDReg(dM));
   13612          assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
   13613 
   13614          IRTemp nzcv     = IRTemp_INVALID;
   13615          IRTemp oldFPSCR = newTemp(Ity_I32);
   13616          IRTemp newFPSCR = newTemp(Ity_I32);
   13617 
   13618          /* This is where the fun starts.  We have to convert 'irRes'
   13619             from an IR-convention return result (IRCmpF64Result) to an
   13620             ARM-encoded (N,Z,C,V) group.  The final result is in the
   13621             bottom 4 bits of 'nzcv'. */
   13622          /* Map compare result from IR to ARM(nzcv) */
   13623          /*
   13624             FP cmp result | IR   | ARM(nzcv)
   13625             --------------------------------
   13626             UN              0x45   0011
   13627             LT              0x01   1000
   13628             GT              0x00   0010
   13629             EQ              0x40   0110
   13630          */
   13631          nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
   13632 
   13633          /* And update FPSCR accordingly */
   13634          assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
   13635          assign(newFPSCR,
   13636                 binop(Iop_Or32,
   13637                       binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
   13638                       binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
   13639 
   13640          putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
   13641 
   13642          if (bZ) {
   13643             DIP("fcmpz%sd%s d%u\n", bN ? "e" : "", nCC(conq), dD);
   13644          } else {
   13645             DIP("fcmp%sd%s d%u, d%u\n", bN ? "e" : "", nCC(conq), dD, dM);
   13646          }
   13647          goto decode_success_vfp;
   13648       }
   13649       /* fall through */
   13650    }
   13651 
   13652    /* --------------------- unary (D) --------------------- */
   13653    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   13654        && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
   13655        && BITS4(1,0,1,1) == INSN(11,8)
   13656        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   13657       UInt dD  = INSN(15,12) | (INSN(22,22) << 4);
   13658       UInt dM  = INSN(3,0) | (INSN(5,5) << 4);
   13659       UInt b16 = (insn28 >> 16) & 1;
   13660       UInt b7  = (insn28 >> 7) & 1;
   13661       /**/ if (b16 == 0 && b7 == 0) {
   13662          // FCPYD
   13663          putDReg(dD, getDReg(dM), condT);
   13664          DIP("fcpyd%s d%u, d%u\n", nCC(conq), dD, dM);
   13665          goto decode_success_vfp;
   13666       }
   13667       else if (b16 == 0 && b7 == 1) {
   13668          // FABSD
   13669          putDReg(dD, unop(Iop_AbsF64, getDReg(dM)), condT);
   13670          DIP("fabsd%s d%u, d%u\n", nCC(conq), dD, dM);
   13671          goto decode_success_vfp;
   13672       }
   13673       else if (b16 == 1 && b7 == 0) {
   13674          // FNEGD
   13675          putDReg(dD, unop(Iop_NegF64, getDReg(dM)), condT);
   13676          DIP("fnegd%s d%u, d%u\n", nCC(conq), dD, dM);
   13677          goto decode_success_vfp;
   13678       }
   13679       else if (b16 == 1 && b7 == 1) {
   13680          // FSQRTD
   13681          IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
   13682          putDReg(dD, binop(Iop_SqrtF64, rm, getDReg(dM)), condT);
   13683          DIP("fsqrtd%s d%u, d%u\n", nCC(conq), dD, dM);
   13684          goto decode_success_vfp;
   13685       }
   13686       else
   13687          vassert(0);
   13688 
   13689       /* fall through */
   13690    }
   13691 
   13692    /* ----------------- I <-> D conversions ----------------- */
   13693 
   13694    // F{S,U}ITOD dD, fM
   13695    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   13696        && BITS4(1,0,0,0) == (INSN(19,16) & BITS4(1,1,1,1))
   13697        && BITS4(1,0,1,1) == INSN(11,8)
   13698        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   13699       UInt bM    = (insn28 >> 5) & 1;
   13700       UInt fM    = (INSN(3,0) << 1) | bM;
   13701       UInt dD    = INSN(15,12) | (INSN(22,22) << 4);
   13702       UInt syned = (insn28 >> 7) & 1;
   13703       if (syned) {
   13704          // FSITOD
   13705          putDReg(dD, unop(Iop_I32StoF64,
   13706                           unop(Iop_ReinterpF32asI32, getFReg(fM))),
   13707                  condT);
   13708          DIP("fsitod%s d%u, s%u\n", nCC(conq), dD, fM);
   13709       } else {
   13710          // FUITOD
   13711          putDReg(dD, unop(Iop_I32UtoF64,
   13712                           unop(Iop_ReinterpF32asI32, getFReg(fM))),
   13713                  condT);
   13714          DIP("fuitod%s d%u, s%u\n", nCC(conq), dD, fM);
   13715       }
   13716       goto decode_success_vfp;
   13717    }
   13718 
   13719    // FTO{S,U}ID fD, dM
   13720    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   13721        && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
   13722        && BITS4(1,0,1,1) == INSN(11,8)
   13723        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   13724       UInt   bD    = (insn28 >> 22) & 1;
   13725       UInt   fD    = (INSN(15,12) << 1) | bD;
   13726       UInt   dM    = INSN(3,0) | (INSN(5,5) << 4);
   13727       UInt   bZ    = (insn28 >> 7) & 1;
   13728       UInt   syned = (insn28 >> 16) & 1;
   13729       IRTemp rmode = newTemp(Ity_I32);
   13730       assign(rmode, bZ ? mkU32(Irrm_ZERO)
   13731                        : mkexpr(mk_get_IR_rounding_mode()));
   13732       if (syned) {
   13733          // FTOSID
   13734          putFReg(fD, unop(Iop_ReinterpI32asF32,
   13735                           binop(Iop_F64toI32S, mkexpr(rmode),
   13736                                 getDReg(dM))),
   13737                  condT);
   13738          DIP("ftosi%sd%s s%u, d%u\n", bZ ? "z" : "",
   13739              nCC(conq), fD, dM);
   13740       } else {
   13741          // FTOUID
   13742          putFReg(fD, unop(Iop_ReinterpI32asF32,
   13743                           binop(Iop_F64toI32U, mkexpr(rmode),
   13744                                 getDReg(dM))),
   13745                  condT);
   13746          DIP("ftoui%sd%s s%u, d%u\n", bZ ? "z" : "",
   13747              nCC(conq), fD, dM);
   13748       }
   13749       goto decode_success_vfp;
   13750    }
   13751 
   13752    /* ----------------------------------------------------------- */
   13753    /* -- VFP instructions -- single precision                  -- */
   13754    /* ----------------------------------------------------------- */
   13755 
   13756    /* --------------------- fldms, fstms --------------------- */
   13757    /*
   13758                                  31   27   23   19 15 11   7   0
   13759                                          P UDWL
   13760       C4-98, C5-26   1  FSTMD    cond 1100 1x00 Rn Fd 1010 offset
   13761       C4-98, C5-28   2  FSTMDIA  cond 1100 1x10 Rn Fd 1010 offset
   13762       C4-98, C5-30   3  FSTMDDB  cond 1101 0x10 Rn Fd 1010 offset
   13763 
   13764       C4-40, C5-26   1  FLDMD    cond 1100 1x01 Rn Fd 1010 offset
   13765       C4-40, C5-26   2  FLDMIAD  cond 1100 1x11 Rn Fd 1010 offset
   13766       C4-40, C5-26   3  FLDMDBD  cond 1101 0x11 Rn Fd 1010 offset
   13767 
   13768       Regs transferred: F(Fd:D) .. F(Fd:d + offset)
   13769       offset must not imply a reg > 15
   13770       IA/DB: Rn is changed by (4 x # regs transferred)
   13771 
   13772       case coding:
   13773          1  at-Rn   (access at Rn)
   13774          2  ia-Rn   (access at Rn, then Rn += 4n)
   13775          3  db-Rn   (Rn -= 4n,     then access at Rn)
   13776    */
   13777    if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
   13778        && INSN(11,8) == BITS4(1,0,1,0)) {
   13779       UInt bP      = (insn28 >> 24) & 1;
   13780       UInt bU      = (insn28 >> 23) & 1;
   13781       UInt bW      = (insn28 >> 21) & 1;
   13782       UInt bL      = (insn28 >> 20) & 1;
   13783       UInt bD      = (insn28 >> 22) & 1;
   13784       UInt offset  = (insn28 >> 0) & 0xFF;
   13785       UInt rN      = INSN(19,16);
   13786       UInt fD      = (INSN(15,12) << 1) | bD;
   13787       UInt nRegs   = offset;
   13788       UInt summary = 0;
   13789       Int  i;
   13790 
   13791       /**/ if (bP == 0 && bU == 1 && bW == 0) {
   13792          summary = 1;
   13793       }
   13794       else if (bP == 0 && bU == 1 && bW == 1) {
   13795          summary = 2;
   13796       }
   13797       else if (bP == 1 && bU == 0 && bW == 1) {
   13798          summary = 3;
   13799       }
   13800       else goto after_vfp_fldms_fstms;
   13801 
   13802       /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
   13803       if (rN == 15 && (summary == 2 || summary == 3 || isT))
   13804          goto after_vfp_fldms_fstms;
   13805 
   13806       /* offset must specify at least one register */
   13807       if (offset < 1)
   13808          goto after_vfp_fldms_fstms;
   13809 
   13810       /* can't transfer regs after S31 */
   13811       if (fD + nRegs - 1 >= 32)
   13812          goto after_vfp_fldms_fstms;
   13813 
   13814       /* Now, we can't do a conditional load or store, since that very
   13815          likely will generate an exception.  So we have to take a side
   13816          exit at this point if the condition is false. */
   13817       if (condT != IRTemp_INVALID) {
   13818          if (isT)
   13819             mk_skip_over_T32_if_cond_is_false( condT );
   13820          else
   13821             mk_skip_over_A32_if_cond_is_false( condT );
   13822          condT = IRTemp_INVALID;
   13823       }
   13824       /* Ok, now we're unconditional.  Do the load or store. */
   13825 
   13826       /* get the old Rn value */
   13827       IRTemp rnT = newTemp(Ity_I32);
   13828       assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
   13829                            rN == 15));
   13830 
   13831       /* make a new value for Rn, post-insn */
   13832       IRTemp rnTnew = IRTemp_INVALID;
   13833       if (summary == 2 || summary == 3) {
   13834          rnTnew = newTemp(Ity_I32);
   13835          assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
   13836                               mkexpr(rnT),
   13837                               mkU32(4 * nRegs)));
   13838       }
   13839 
   13840       /* decide on the base transfer address */
   13841       IRTemp taT = newTemp(Ity_I32);
   13842       assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
   13843 
   13844       /* update Rn if necessary -- in case 3, we're moving it down, so
   13845          update before any memory reference, in order to keep Memcheck
   13846          and V's stack-extending logic (on linux) happy */
   13847       if (summary == 3) {
   13848          if (isT)
   13849             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
   13850          else
   13851             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
   13852       }
   13853 
   13854       /* generate the transfers */
   13855       for (i = 0; i < nRegs; i++) {
   13856          IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(4*i));
   13857          if (bL) {
   13858             putFReg(fD + i, loadLE(Ity_F32, addr), IRTemp_INVALID);
   13859          } else {
   13860             storeLE(addr, getFReg(fD + i));
   13861          }
   13862       }
   13863 
   13864       /* update Rn if necessary -- in case 2, we're moving it up, so
   13865          update after any memory reference, in order to keep Memcheck
   13866          and V's stack-extending logic (on linux) happy */
   13867       if (summary == 2) {
   13868          if (isT)
   13869             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
   13870          else
   13871             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
   13872       }
   13873 
   13874       const HChar* nm = bL==1 ? "ld" : "st";
   13875       switch (summary) {
   13876          case 1:  DIP("f%sms%s r%u, {s%u-s%u}\n",
   13877                       nm, nCC(conq), rN, fD, fD + nRegs - 1);
   13878                   break;
   13879          case 2:  DIP("f%smias%s r%u!, {s%u-s%u}\n",
   13880                       nm, nCC(conq), rN, fD, fD + nRegs - 1);
   13881                   break;
   13882          case 3:  DIP("f%smdbs%s r%u!, {s%u-s%u}\n",
   13883                       nm, nCC(conq), rN, fD, fD + nRegs - 1);
   13884                   break;
   13885          default: vassert(0);
   13886       }
   13887 
   13888       goto decode_success_vfp;
   13889       /* FIXME alignment constraints? */
   13890    }
   13891 
   13892   after_vfp_fldms_fstms:
   13893 
   13894    /* --------------------- fmsr, fmrs --------------------- */
   13895    if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
   13896        && BITS4(1,0,1,0) == INSN(11,8)
   13897        && BITS4(0,0,0,0) == INSN(3,0)
   13898        && BITS4(0,0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
   13899       UInt rD  = INSN(15,12);
   13900       UInt b7  = (insn28 >> 7) & 1;
   13901       UInt fN  = (INSN(19,16) << 1) | b7;
   13902       UInt b20 = (insn28 >> 20) & 1;
   13903       if (rD == 15) {
   13904          /* fall through */
   13905          /* Let's assume that no sane person would want to do
   13906             floating-point transfers to or from the program counter,
   13907             and simply decline to decode the instruction.  The ARM ARM
   13908             doesn't seem to explicitly disallow this case, though. */
   13909       } else {
   13910          if (b20) {
   13911             IRExpr* res = unop(Iop_ReinterpF32asI32, getFReg(fN));
   13912             if (isT)
   13913                putIRegT(rD, res, condT);
   13914             else
   13915                putIRegA(rD, res, condT, Ijk_Boring);
   13916             DIP("fmrs%s r%u, s%u\n", nCC(conq), rD, fN);
   13917          } else {
   13918             putFReg(fN, unop(Iop_ReinterpI32asF32,
   13919                              isT ? getIRegT(rD) : getIRegA(rD)),
   13920                         condT);
   13921             DIP("fmsr%s s%u, r%u\n", nCC(conq), fN, rD);
   13922          }
   13923          goto decode_success_vfp;
   13924       }
   13925       /* fall through */
   13926    }
   13927 
   13928    /* --------------------- f{ld,st}s --------------------- */
   13929    // FLDS, FSTS
   13930    if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
   13931        && BITS4(1,0,1,0) == INSN(11,8)) {
   13932       UInt bD     = (insn28 >> 22) & 1;
   13933       UInt fD     = (INSN(15,12) << 1) | bD;
   13934       UInt rN     = INSN(19,16);
   13935       UInt offset = (insn28 & 0xFF) << 2;
   13936       UInt bU     = (insn28 >> 23) & 1; /* 1: +offset  0: -offset */
   13937       UInt bL     = (insn28 >> 20) & 1; /* 1: load  0: store */
   13938       /* make unconditional */
   13939       if (condT != IRTemp_INVALID) {
   13940          if (isT)
   13941             mk_skip_over_T32_if_cond_is_false( condT );
   13942          else
   13943             mk_skip_over_A32_if_cond_is_false( condT );
   13944          condT = IRTemp_INVALID;
   13945       }
   13946       IRTemp ea = newTemp(Ity_I32);
   13947       assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
   13948                        align4if(isT ? getIRegT(rN) : getIRegA(rN),
   13949                                 rN == 15),
   13950                        mkU32(offset)));
   13951       if (bL) {
   13952          putFReg(fD, loadLE(Ity_F32,mkexpr(ea)), IRTemp_INVALID);
   13953       } else {
   13954          storeLE(mkexpr(ea), getFReg(fD));
   13955       }
   13956       DIP("f%ss%s s%u, [r%u, %c#%u]\n",
   13957           bL ? "ld" : "st", nCC(conq), fD, rN,
   13958           bU ? '+' : '-', offset);
   13959       goto decode_success_vfp;
   13960    }
   13961 
   13962    /* --------------------- dp insns (F) --------------------- */
   13963    if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
   13964        && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
   13965        && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
   13966       UInt    bM  = (insn28 >> 5) & 1;
   13967       UInt    bD  = (insn28 >> 22) & 1;
   13968       UInt    bN  = (insn28 >> 7) & 1;
   13969       UInt    fM  = (INSN(3,0) << 1) | bM;   /* argR */
   13970       UInt    fD  = (INSN(15,12) << 1) | bD; /* dst/acc */
   13971       UInt    fN  = (INSN(19,16) << 1) | bN; /* argL */
   13972       UInt    bP  = (insn28 >> 23) & 1;
   13973       UInt    bQ  = (insn28 >> 21) & 1;
   13974       UInt    bR  = (insn28 >> 20) & 1;
   13975       UInt    bS  = (insn28 >> 6) & 1;
   13976       UInt    opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
   13977       IRExpr* rm  = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
   13978       switch (opc) {
   13979          case BITS4(0,0,0,0): /* MAC: d + n * m */
   13980             putFReg(fD, triop(Iop_AddF32, rm,
   13981                               getFReg(fD),
   13982                               triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
   13983                         condT);
   13984             DIP("fmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   13985             goto decode_success_vfp;
   13986          case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
   13987             putFReg(fD, triop(Iop_AddF32, rm,
   13988                               getFReg(fD),
   13989                               unop(Iop_NegF32,
   13990                                    triop(Iop_MulF32, rm, getFReg(fN),
   13991                                                          getFReg(fM)))),
   13992                         condT);
   13993             DIP("fnmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   13994             goto decode_success_vfp;
   13995          case BITS4(0,0,1,0): /* MSC: - d + n * m */
   13996             putFReg(fD, triop(Iop_AddF32, rm,
   13997                               unop(Iop_NegF32, getFReg(fD)),
   13998                               triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
   13999                         condT);
   14000             DIP("fmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   14001             goto decode_success_vfp;
   14002          case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
   14003             putFReg(fD, triop(Iop_AddF32, rm,
   14004                               unop(Iop_NegF32, getFReg(fD)),
   14005                               unop(Iop_NegF32,
   14006                                    triop(Iop_MulF32, rm,
   14007                                                      getFReg(fN),
   14008                                                     getFReg(fM)))),
   14009                         condT);
   14010             DIP("fnmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   14011             goto decode_success_vfp;
   14012          case BITS4(0,1,0,0): /* MUL: n * m */
   14013             putFReg(fD, triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM)),
   14014                         condT);
   14015             DIP("fmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   14016             goto decode_success_vfp;
   14017          case BITS4(0,1,0,1): /* NMUL: - n * m */
   14018             putFReg(fD, unop(Iop_NegF32,
   14019                              triop(Iop_MulF32, rm, getFReg(fN),
   14020                                                    getFReg(fM))),
   14021                     condT);
   14022             DIP("fnmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   14023             goto decode_success_vfp;
   14024          case BITS4(0,1,1,0): /* ADD: n + m */
   14025             putFReg(fD, triop(Iop_AddF32, rm, getFReg(fN), getFReg(fM)),
   14026                         condT);
   14027             DIP("fadds%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   14028             goto decode_success_vfp;
   14029          case BITS4(0,1,1,1): /* SUB: n - m */
   14030             putFReg(fD, triop(Iop_SubF32, rm, getFReg(fN), getFReg(fM)),
   14031                         condT);
   14032             DIP("fsubs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   14033             goto decode_success_vfp;
   14034          case BITS4(1,0,0,0): /* DIV: n / m */
   14035             putFReg(fD, triop(Iop_DivF32, rm, getFReg(fN), getFReg(fM)),
   14036                         condT);
   14037             DIP("fdivs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   14038             goto decode_success_vfp;
   14039          case BITS4(1,0,1,0): /* VNFMS: -(d - n * m) (fused) */
   14040             /* XXXROUNDINGFIXME look up ARM reference for fused
   14041                multiply-add rounding */
   14042             putFReg(fD, triop(Iop_AddF32, rm,
   14043                               unop(Iop_NegF32, getFReg(fD)),
   14044                               triop(Iop_MulF32, rm,
   14045                                                 getFReg(fN),
   14046                                                 getFReg(fM))),
   14047                         condT);
   14048             DIP("vfnmss%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   14049             goto decode_success_vfp;
   14050          case BITS4(1,0,1,1): /* VNFMA: -(d + n * m) (fused) */
   14051             /* XXXROUNDINGFIXME look up ARM reference for fused
   14052                multiply-add rounding */
   14053             putFReg(fD, triop(Iop_AddF32, rm,
   14054                               unop(Iop_NegF32, getFReg(fD)),
   14055                               triop(Iop_MulF32, rm,
   14056                                                 unop(Iop_NegF32, getFReg(fN)),
   14057                                                 getFReg(fM))),
   14058                         condT);
   14059             DIP("vfnmas%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   14060             goto decode_success_vfp;
   14061          case BITS4(1,1,0,0): /* VFMA: d + n * m (fused) */
   14062             /* XXXROUNDINGFIXME look up ARM reference for fused
   14063                multiply-add rounding */
   14064             putFReg(fD, triop(Iop_AddF32, rm,
   14065                               getFReg(fD),
   14066                               triop(Iop_MulF32, rm, getFReg(fN),
   14067                                                     getFReg(fM))),
   14068                         condT);
   14069             DIP("vfmas%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   14070             goto decode_success_vfp;
   14071          case BITS4(1,1,0,1): /* VFMS: d + (-n * m) (fused) */
   14072             /* XXXROUNDINGFIXME look up ARM reference for fused
   14073                multiply-add rounding */
   14074             putFReg(fD, triop(Iop_AddF32, rm,
   14075                               getFReg(fD),
   14076                               triop(Iop_MulF32, rm,
   14077                                     unop(Iop_NegF32, getFReg(fN)),
   14078                                     getFReg(fM))),
   14079                         condT);
   14080             DIP("vfmss%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   14081             goto decode_success_vfp;
   14082          default:
   14083             break;
   14084       }
   14085    }
   14086 
   14087    /* --------------------- compares (S) --------------------- */
   14088    /*          31   27   23   19   15 11   7    3
   14089                  28   24   20   16 12    8    4    0
   14090       FCMPS    cond 1110 1D11 0100 Fd 1010 01M0 Fm
   14091       FCMPES   cond 1110 1D11 0100 Fd 1010 11M0 Fm
   14092       FCMPZS   cond 1110 1D11 0101 Fd 1010 0100 0000
   14093       FCMPZED  cond 1110 1D11 0101 Fd 1010 1100 0000
   14094                                  Z         N
   14095 
   14096       Z=0 Compare Fd:D vs Fm:M     and set FPSCR 31:28 accordingly
   14097       Z=1 Compare Fd:D vs zero
   14098 
   14099       N=1 generates Invalid Operation exn if either arg is any kind of NaN
   14100       N=0 generates Invalid Operation exn if either arg is a signalling NaN
   14101       (Not that we pay any attention to N here)
   14102    */
   14103    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   14104        && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
   14105        && BITS4(1,0,1,0) == INSN(11,8)
   14106        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   14107       UInt bZ = (insn28 >> 16) & 1;
   14108       UInt bN = (insn28 >> 7) & 1;
   14109       UInt bD = (insn28 >> 22) & 1;
   14110       UInt bM = (insn28 >> 5) & 1;
   14111       UInt fD = (INSN(15,12) << 1) | bD;
   14112       UInt fM = (INSN(3,0) << 1) | bM;
   14113       if (bZ && (INSN(3,0) != 0 || (INSN(7,4) & 3) != 0)) {
   14114          /* does not decode; fall through */
   14115       } else {
   14116          IRTemp argL = newTemp(Ity_F64);
   14117          IRTemp argR = newTemp(Ity_F64);
   14118          IRTemp irRes = newTemp(Ity_I32);
   14119 
   14120          assign(argL, unop(Iop_F32toF64, getFReg(fD)));
   14121          assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0))
   14122                          : unop(Iop_F32toF64, getFReg(fM)));
   14123          assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
   14124 
   14125          IRTemp nzcv     = IRTemp_INVALID;
   14126          IRTemp oldFPSCR = newTemp(Ity_I32);
   14127          IRTemp newFPSCR = newTemp(Ity_I32);
   14128 
   14129          /* This is where the fun starts.  We have to convert 'irRes'
   14130             from an IR-convention return result (IRCmpF64Result) to an
   14131             ARM-encoded (N,Z,C,V) group.  The final result is in the
   14132             bottom 4 bits of 'nzcv'. */
   14133          /* Map compare result from IR to ARM(nzcv) */
   14134          /*
   14135             FP cmp result | IR   | ARM(nzcv)
   14136             --------------------------------
   14137             UN              0x45   0011
   14138             LT              0x01   1000
   14139             GT              0x00   0010
   14140             EQ              0x40   0110
   14141          */
   14142          nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
   14143 
   14144          /* And update FPSCR accordingly */
   14145          assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
   14146          assign(newFPSCR,
   14147                 binop(Iop_Or32,
   14148                       binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
   14149                       binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
   14150 
   14151          putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
   14152 
   14153          if (bZ) {
   14154             DIP("fcmpz%ss%s s%u\n", bN ? "e" : "", nCC(conq), fD);
   14155          } else {
   14156             DIP("fcmp%ss%s s%u, s%u\n", bN ? "e" : "",
   14157                 nCC(conq), fD, fM);
   14158          }
   14159          goto decode_success_vfp;
   14160       }
   14161       /* fall through */
   14162    }
   14163 
   14164    /* --------------------- unary (S) --------------------- */
   14165    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   14166        && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
   14167        && BITS4(1,0,1,0) == INSN(11,8)
   14168        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   14169       UInt bD = (insn28 >> 22) & 1;
   14170       UInt bM = (insn28 >> 5) & 1;
   14171       UInt fD  = (INSN(15,12) << 1) | bD;
   14172       UInt fM  = (INSN(3,0) << 1) | bM;
   14173       UInt b16 = (insn28 >> 16) & 1;
   14174       UInt b7  = (insn28 >> 7) & 1;
   14175       /**/ if (b16 == 0 && b7 == 0) {
   14176          // FCPYS
   14177          putFReg(fD, getFReg(fM), condT);
   14178          DIP("fcpys%s s%u, s%u\n", nCC(conq), fD, fM);
   14179          goto decode_success_vfp;
   14180       }
   14181       else if (b16 == 0 && b7 == 1) {
   14182          // FABSS
   14183          putFReg(fD, unop(Iop_AbsF32, getFReg(fM)), condT);
   14184          DIP("fabss%s s%u, s%u\n", nCC(conq), fD, fM);
   14185          goto decode_success_vfp;
   14186       }
   14187       else if (b16 == 1 && b7 == 0) {
   14188          // FNEGS
   14189          putFReg(fD, unop(Iop_NegF32, getFReg(fM)), condT);
   14190          DIP("fnegs%s s%u, s%u\n", nCC(conq), fD, fM);
   14191          goto decode_success_vfp;
   14192       }
   14193       else if (b16 == 1 && b7 == 1) {
   14194          // FSQRTS
   14195          IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
   14196          putFReg(fD, binop(Iop_SqrtF32, rm, getFReg(fM)), condT);
   14197          DIP("fsqrts%s s%u, s%u\n", nCC(conq), fD, fM);
   14198          goto decode_success_vfp;
   14199       }
   14200       else
   14201          vassert(0);
   14202 
   14203       /* fall through */
   14204    }
   14205 
   14206    /* ----------------- I <-> S conversions ----------------- */
   14207 
   14208    // F{S,U}ITOS fD, fM
   14209    /* These are more complex than FSITOD/FUITOD.  In the D cases, a 32
   14210       bit int will always fit within the 53 bit mantissa, so there's
   14211       no possibility of a loss of precision, but that's obviously not
   14212       the case here.  Hence this case possibly requires rounding, and
   14213       so it drags in the current rounding mode. */
   14214    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   14215        && BITS4(1,0,0,0) == INSN(19,16)
   14216        && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
   14217        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   14218       UInt bM    = (insn28 >> 5) & 1;
   14219       UInt bD    = (insn28 >> 22) & 1;
   14220       UInt fM    = (INSN(3,0) << 1) | bM;
   14221       UInt fD    = (INSN(15,12) << 1) | bD;
   14222       UInt syned = (insn28 >> 7) & 1;
   14223       IRTemp rmode = newTemp(Ity_I32);
   14224       assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
   14225       if (syned) {
   14226          // FSITOS
   14227          putFReg(fD, binop(Iop_F64toF32,
   14228                            mkexpr(rmode),
   14229                            unop(Iop_I32StoF64,
   14230                                 unop(Iop_ReinterpF32asI32, getFReg(fM)))),
   14231                  condT);
   14232          DIP("fsitos%s s%u, s%u\n", nCC(conq), fD, fM);
   14233       } else {
   14234          // FUITOS
   14235          putFReg(fD, binop(Iop_F64toF32,
   14236                            mkexpr(rmode),
   14237                            unop(Iop_I32UtoF64,
   14238                                 unop(Iop_ReinterpF32asI32, getFReg(fM)))),
   14239                  condT);
   14240          DIP("fuitos%s s%u, s%u\n", nCC(conq), fD, fM);
   14241       }
   14242       goto decode_success_vfp;
   14243    }
   14244 
   14245    // FTO{S,U}IS fD, fM
   14246    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   14247        && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
   14248        && BITS4(1,0,1,0) == INSN(11,8)
   14249        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   14250       UInt   bM    = (insn28 >> 5) & 1;
   14251       UInt   bD    = (insn28 >> 22) & 1;
   14252       UInt   fD    = (INSN(15,12) << 1) | bD;
   14253       UInt   fM    = (INSN(3,0) << 1) | bM;
   14254       UInt   bZ    = (insn28 >> 7) & 1;
   14255       UInt   syned = (insn28 >> 16) & 1;
   14256       IRTemp rmode = newTemp(Ity_I32);
   14257       assign(rmode, bZ ? mkU32(Irrm_ZERO)
   14258                        : mkexpr(mk_get_IR_rounding_mode()));
   14259       if (syned) {
   14260          // FTOSIS
   14261          putFReg(fD, unop(Iop_ReinterpI32asF32,
   14262                           binop(Iop_F64toI32S, mkexpr(rmode),
   14263                                 unop(Iop_F32toF64, getFReg(fM)))),
   14264                  condT);
   14265          DIP("ftosi%ss%s s%u, d%u\n", bZ ? "z" : "",
   14266              nCC(conq), fD, fM);
   14267          goto decode_success_vfp;
   14268       } else {
   14269          // FTOUIS
   14270          putFReg(fD, unop(Iop_ReinterpI32asF32,
   14271                           binop(Iop_F64toI32U, mkexpr(rmode),
   14272                                 unop(Iop_F32toF64, getFReg(fM)))),
   14273                  condT);
   14274          DIP("ftoui%ss%s s%u, d%u\n", bZ ? "z" : "",
   14275              nCC(conq), fD, fM);
   14276          goto decode_success_vfp;
   14277       }
   14278    }
   14279 
   14280    /* ----------------- S <-> D conversions ----------------- */
   14281 
   14282    // FCVTDS
   14283    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   14284        && BITS4(0,1,1,1) == INSN(19,16)
   14285        && BITS4(1,0,1,0) == INSN(11,8)
   14286        && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
   14287       UInt dD = INSN(15,12) | (INSN(22,22) << 4);
   14288       UInt bM = (insn28 >> 5) & 1;
   14289       UInt fM = (INSN(3,0) << 1) | bM;
   14290       putDReg(dD, unop(Iop_F32toF64, getFReg(fM)), condT);
   14291       DIP("fcvtds%s d%u, s%u\n", nCC(conq), dD, fM);
   14292       goto decode_success_vfp;
   14293    }
   14294 
   14295    // FCVTSD
   14296    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   14297        && BITS4(0,1,1,1) == INSN(19,16)
   14298        && BITS4(1,0,1,1) == INSN(11,8)
   14299        && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
   14300       UInt   bD    = (insn28 >> 22) & 1;
   14301       UInt   fD    = (INSN(15,12) << 1) | bD;
   14302       UInt   dM    = INSN(3,0) | (INSN(5,5) << 4);
   14303       IRTemp rmode = newTemp(Ity_I32);
   14304       assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
   14305       putFReg(fD, binop(Iop_F64toF32, mkexpr(rmode), getDReg(dM)),
   14306                   condT);
   14307       DIP("fcvtsd%s s%u, d%u\n", nCC(conq), fD, dM);
   14308       goto decode_success_vfp;
   14309    }
   14310 
   14311    /* --------------- VCVT fixed<->floating, VFP --------------- */
   14312    /*          31   27   23   19   15 11   7    3
   14313                  28   24   20   16 12    8    4    0
   14314 
   14315                cond 1110 1D11 1p1U Vd 101f x1i0 imm4
   14316 
   14317       VCVT<c>.<Td>.F64 <Dd>, <Dd>, #fbits
   14318       VCVT<c>.<Td>.F32 <Dd>, <Dd>, #fbits
   14319       VCVT<c>.F64.<Td> <Dd>, <Dd>, #fbits
   14320       VCVT<c>.F32.<Td> <Dd>, <Dd>, #fbits
   14321       are of this form.  We only handle a subset of the cases though.
   14322    */
   14323    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   14324        && BITS4(1,0,1,0) == (INSN(19,16) & BITS4(1,0,1,0))
   14325        && BITS3(1,0,1) == INSN(11,9)
   14326        && BITS3(1,0,0) == (INSN(6,4) & BITS3(1,0,1))) {
   14327       UInt bD        = INSN(22,22);
   14328       UInt bOP       = INSN(18,18);
   14329       UInt bU        = INSN(16,16);
   14330       UInt Vd        = INSN(15,12);
   14331       UInt bSF       = INSN(8,8);
   14332       UInt bSX       = INSN(7,7);
   14333       UInt bI        = INSN(5,5);
   14334       UInt imm4      = INSN(3,0);
   14335       Bool to_fixed  = bOP == 1;
   14336       Bool dp_op     = bSF == 1;
   14337       Bool unsyned   = bU == 1;
   14338       UInt size      = bSX == 0 ? 16 : 32;
   14339       Int  frac_bits = size - ((imm4 << 1) | bI);
   14340       UInt d         = dp_op  ? ((bD << 4) | Vd)  : ((Vd << 1) | bD);
   14341 
   14342       IRExpr* rm     = mkU32(Irrm_NEAREST);
   14343       IRTemp  scale  = newTemp(Ity_F64);
   14344       assign(scale, unop(Iop_I32UtoF64, mkU32( 1 << (frac_bits-1) )));
   14345 
   14346       if (frac_bits >= 1 && frac_bits <= 32 && !to_fixed && !dp_op
   14347                                             && size == 32) {
   14348          /* VCVT.F32.{S,U}32 S[d], S[d], #frac_bits */
   14349          /* This generates really horrible code.  We could potentially
   14350             do much better. */
   14351          IRTemp rmode = newTemp(Ity_I32);
   14352          assign(rmode, mkU32(Irrm_NEAREST)); // per the spec
   14353          IRTemp src32 = newTemp(Ity_I32);
   14354          assign(src32,  unop(Iop_ReinterpF32asI32, getFReg(d)));
   14355          IRExpr* as_F64 = unop( unsyned ? Iop_I32UtoF64 : Iop_I32StoF64,
   14356                                 mkexpr(src32 ) );
   14357          IRExpr* resF64 = triop(Iop_DivF64,
   14358                                 rm, as_F64,
   14359                                 triop(Iop_AddF64, rm, mkexpr(scale),
   14360                                                       mkexpr(scale)));
   14361          IRExpr* resF32 = binop(Iop_F64toF32, mkexpr(rmode), resF64);
   14362          putFReg(d, resF32, condT);
   14363          DIP("vcvt.f32.%c32, s%u, s%u, #%d\n",
   14364              unsyned ? 'u' : 's', d, d, frac_bits);
   14365          goto decode_success_vfp;
   14366       }
   14367       if (frac_bits >= 1 && frac_bits <= 32 && !to_fixed && dp_op
   14368                                             && size == 32) {
   14369          /* VCVT.F64.{S,U}32 D[d], D[d], #frac_bits */
   14370          /* This generates really horrible code.  We could potentially
   14371             do much better. */
   14372          IRTemp src32 = newTemp(Ity_I32);
   14373          assign(src32, unop(Iop_64to32, getDRegI64(d)));
   14374          IRExpr* as_F64 = unop( unsyned ? Iop_I32UtoF64 : Iop_I32StoF64,
   14375                                 mkexpr(src32 ) );
   14376          IRExpr* resF64 = triop(Iop_DivF64,
   14377                                 rm, as_F64,
   14378                                 triop(Iop_AddF64, rm, mkexpr(scale),
   14379                                                       mkexpr(scale)));
   14380          putDReg(d, resF64, condT);
   14381          DIP("vcvt.f64.%c32, d%u, d%u, #%d\n",
   14382              unsyned ? 'u' : 's', d, d, frac_bits);
   14383          goto decode_success_vfp;
   14384       }
   14385       if (frac_bits >= 1 && frac_bits <= 32 && to_fixed && dp_op
   14386                                             && size == 32) {
   14387          /* VCVT.{S,U}32.F64 D[d], D[d], #frac_bits */
   14388          IRTemp srcF64 = newTemp(Ity_F64);
   14389          assign(srcF64, getDReg(d));
   14390          IRTemp scaledF64 = newTemp(Ity_F64);
   14391          assign(scaledF64, triop(Iop_MulF64,
   14392                                  rm, mkexpr(srcF64),
   14393                                  triop(Iop_AddF64, rm, mkexpr(scale),
   14394                                                        mkexpr(scale))));
   14395          IRTemp rmode = newTemp(Ity_I32);
   14396          assign(rmode, mkU32(Irrm_ZERO)); // as per the spec
   14397          IRTemp asI32 = newTemp(Ity_I32);
   14398          assign(asI32, binop(unsyned ? Iop_F64toI32U : Iop_F64toI32S,
   14399                              mkexpr(rmode), mkexpr(scaledF64)));
   14400          putDRegI64(d, unop(unsyned ? Iop_32Uto64 : Iop_32Sto64,
   14401                             mkexpr(asI32)), condT);
   14402 
   14403          DIP("vcvt.%c32.f64, d%u, d%u, #%d\n",
   14404              unsyned ? 'u' : 's', d, d, frac_bits);
   14405          goto decode_success_vfp;
   14406       }
   14407       if (frac_bits >= 1 && frac_bits <= 32 && to_fixed && !dp_op
   14408                                             && size == 32) {
   14409          /* VCVT.{S,U}32.F32 S[d], S[d], #frac_bits */
   14410          IRTemp srcF32 = newTemp(Ity_F32);
   14411          assign(srcF32, getFReg(d));
   14412          IRTemp scaledF64 = newTemp(Ity_F64);
   14413          assign(scaledF64, triop(Iop_MulF64,
   14414                                  rm, unop(Iop_F32toF64, mkexpr(srcF32)),
   14415                                  triop(Iop_AddF64, rm, mkexpr(scale),
   14416                                                        mkexpr(scale))));
   14417          IRTemp rmode = newTemp(Ity_I32);
   14418          assign(rmode, mkU32(Irrm_ZERO)); // as per the spec
   14419          IRTemp asI32 = newTemp(Ity_I32);
   14420          assign(asI32, binop(unsyned ? Iop_F64toI32U : Iop_F64toI32S,
   14421                              mkexpr(rmode), mkexpr(scaledF64)));
   14422          putFReg(d, unop(Iop_ReinterpI32asF32, mkexpr(asI32)), condT);
   14423          DIP("vcvt.%c32.f32, d%u, d%u, #%d\n",
   14424              unsyned ? 'u' : 's', d, d, frac_bits);
   14425          goto decode_success_vfp;
   14426       }
   14427       /* fall through */
   14428    }
   14429 
   14430    /* FAILURE */
   14431    return False;
   14432 
   14433   decode_success_vfp:
   14434    /* Check that any accepted insn really is a CP10 or CP11 insn, iow,
   14435       assert that we aren't accepting, in this fn, insns that actually
   14436       should be handled somewhere else. */
   14437    vassert(INSN(11,9) == BITS3(1,0,1)); // 11:8 = 1010 or 1011
   14438    return True;
   14439 
   14440 #  undef INSN
   14441 }
   14442 
   14443 
   14444 /*------------------------------------------------------------*/
   14445 /*--- Instructions in NV (never) space                     ---*/
   14446 /*------------------------------------------------------------*/
   14447 
   14448 /* ARM only */
   14449 /* Translate a NV space instruction.  If successful, returns True and
   14450    *dres may or may not be updated.  If failure, returns False and
   14451    doesn't change *dres nor create any IR.
   14452 
   14453    Note that all NEON instructions (in ARM mode) are handled through
   14454    here, since they are all in NV space.
   14455 */
   14456 static Bool decode_NV_instruction ( /*MOD*/DisResult* dres,
   14457                                     const VexArchInfo* archinfo,
   14458                                     UInt insn )
   14459 {
   14460 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   14461 #  define INSN_COND          SLICE_UInt(insn, 31, 28)
   14462 
   14463    HChar dis_buf[128];
   14464 
   14465    // Should only be called for NV instructions
   14466    vassert(BITS4(1,1,1,1) == INSN_COND);
   14467 
   14468    /* ------------------------ pld{w} ------------------------ */
   14469    if (BITS8(0,1,0,1, 0,0, 0,1) == (INSN(27,20) & BITS8(1,1,1,1, 0,0, 1,1))
   14470        && BITS4(1,1,1,1) == INSN(15,12)) {
   14471       UInt rN    = INSN(19,16);
   14472       UInt imm12 = INSN(11,0);
   14473       UInt bU    = INSN(23,23);
   14474       UInt bR    = INSN(22,22);
   14475       DIP("pld%c [r%u, #%c%u]\n", bR ? ' ' : 'w', rN, bU ? '+' : '-', imm12);
   14476       return True;
   14477    }
   14478 
   14479    if (BITS8(0,1,1,1, 0,0, 0,1) == (INSN(27,20) & BITS8(1,1,1,1, 0,0, 1,1))
   14480        && BITS4(1,1,1,1) == INSN(15,12)
   14481        && 0 == INSN(4,4)) {
   14482       UInt rN   = INSN(19,16);
   14483       UInt rM   = INSN(3,0);
   14484       UInt imm5 = INSN(11,7);
   14485       UInt sh2  = INSN(6,5);
   14486       UInt bU   = INSN(23,23);
   14487       UInt bR   = INSN(22,22);
   14488       if (rM != 15 && (rN != 15 || bR)) {
   14489          IRExpr* eaE = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
   14490                                                        sh2, imm5, dis_buf);
   14491          IRTemp eaT = newTemp(Ity_I32);
   14492          /* Bind eaE to a temp merely for debugging-vex purposes, so we
   14493             can check it's a plausible decoding.  It will get removed
   14494             by iropt a little later on. */
   14495          vassert(eaE);
   14496          assign(eaT, eaE);
   14497          DIP("pld%c %s\n", bR ? ' ' : 'w', dis_buf);
   14498          return True;
   14499       }
   14500       /* fall through */
   14501    }
   14502 
   14503    /* ------------------------ pli ------------------------ */
   14504    if (BITS8(0,1,0,0, 0, 1,0,1) == (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1))
   14505        && BITS4(1,1,1,1) == INSN(15,12)) {
   14506       UInt rN    = INSN(19,16);
   14507       UInt imm12 = INSN(11,0);
   14508       UInt bU    = INSN(23,23);
   14509       DIP("pli [r%u, #%c%u]\n", rN, bU ? '+' : '-', imm12);
   14510       return True;
   14511    }
   14512 
   14513    /* --------------------- Interworking branches --------------------- */
   14514 
   14515    // BLX (1), viz, unconditional branch and link to R15+simm24
   14516    // and set CPSR.T = 1, that is, switch to Thumb mode
   14517    if (INSN(31,25) == BITS7(1,1,1,1,1,0,1)) {
   14518       UInt bitH   = INSN(24,24);
   14519       Int  uimm24 = INSN(23,0);
   14520       Int  simm24 = (((uimm24 << 8) >> 8) << 2) + (bitH << 1);
   14521       /* Now this is a bit tricky.  Since we're decoding an ARM insn,
   14522          it is implies that CPSR.T == 0.  Hence the current insn's
   14523          address is guaranteed to be of the form X--(30)--X00.  So, no
   14524          need to mask any bits off it.  But need to set the lowest bit
   14525          to 1 to denote we're in Thumb mode after this, since
   14526          guest_R15T has CPSR.T as the lowest bit.  And we can't chase
   14527          into the call, so end the block at this point. */
   14528       UInt dst = guest_R15_curr_instr_notENC + 8 + (simm24 | 1);
   14529       putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
   14530                     IRTemp_INVALID/*because AL*/, Ijk_Boring );
   14531       llPutIReg(15, mkU32(dst));
   14532       dres->jk_StopHere = Ijk_Call;
   14533       dres->whatNext    = Dis_StopHere;
   14534       DIP("blx 0x%x (and switch to Thumb mode)\n", dst - 1);
   14535       return True;
   14536    }
   14537 
   14538    /* ------------------- v7 barrier insns ------------------- */
   14539    switch (insn) {
   14540       case 0xF57FF06F: /* ISB */
   14541          stmt( IRStmt_MBE(Imbe_Fence) );
   14542          DIP("ISB\n");
   14543          return True;
   14544       case 0xF57FF04F: /* DSB sy */
   14545       case 0xF57FF04E: /* DSB st */
   14546       case 0xF57FF04B: /* DSB ish */
   14547       case 0xF57FF04A: /* DSB ishst */
   14548       case 0xF57FF047: /* DSB nsh */
   14549       case 0xF57FF046: /* DSB nshst */
   14550       case 0xF57FF043: /* DSB osh */
   14551       case 0xF57FF042: /* DSB oshst */
   14552          stmt( IRStmt_MBE(Imbe_Fence) );
   14553          DIP("DSB\n");
   14554          return True;
   14555       case 0xF57FF05F: /* DMB sy */
   14556       case 0xF57FF05E: /* DMB st */
   14557       case 0xF57FF05B: /* DMB ish */
   14558       case 0xF57FF05A: /* DMB ishst */
   14559       case 0xF57FF057: /* DMB nsh */
   14560       case 0xF57FF056: /* DMB nshst */
   14561       case 0xF57FF053: /* DMB osh */
   14562       case 0xF57FF052: /* DMB oshst */
   14563          stmt( IRStmt_MBE(Imbe_Fence) );
   14564          DIP("DMB\n");
   14565          return True;
   14566       default:
   14567          break;
   14568    }
   14569 
   14570    /* ------------------- CLREX ------------------ */
   14571    if (insn == 0xF57FF01F) {
   14572       /* AFAICS, this simply cancels a (all?) reservations made by a
   14573          (any?) preceding LDREX(es).  Arrange to hand it through to
   14574          the back end. */
   14575       stmt( IRStmt_MBE(Imbe_CancelReservation) );
   14576       DIP("clrex\n");
   14577       return True;
   14578    }
   14579 
   14580    /* ------------------- NEON ------------------- */
   14581    if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
   14582       Bool ok_neon = decode_NEON_instruction(
   14583                         dres, insn, IRTemp_INVALID/*unconditional*/,
   14584                         False/*!isT*/
   14585                      );
   14586       if (ok_neon)
   14587          return True;
   14588    }
   14589 
   14590    // unrecognised
   14591    return False;
   14592 
   14593 #  undef INSN_COND
   14594 #  undef INSN
   14595 }
   14596 
   14597 
   14598 /*------------------------------------------------------------*/
   14599 /*--- Disassemble a single ARM instruction                 ---*/
   14600 /*------------------------------------------------------------*/
   14601 
   14602 /* Disassemble a single ARM instruction into IR.  The instruction is
   14603    located in host memory at guest_instr, and has (decoded) guest IP
   14604    of guest_R15_curr_instr_notENC, which will have been set before the
   14605    call here. */
   14606 
   14607 static
   14608 DisResult disInstr_ARM_WRK (
   14609              Bool         (*resteerOkFn) ( /*opaque*/void*, Addr ),
   14610              Bool         resteerCisOk,
   14611              void*        callback_opaque,
   14612              const UChar* guest_instr,
   14613              const VexArchInfo* archinfo,
   14614              const VexAbiInfo*  abiinfo,
   14615              Bool         sigill_diag
   14616           )
   14617 {
   14618    // A macro to fish bits out of 'insn'.
   14619 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   14620 #  define INSN_COND          SLICE_UInt(insn, 31, 28)
   14621 
   14622    DisResult dres;
   14623    UInt      insn;
   14624    //Bool      allow_VFP = False;
   14625    //UInt      hwcaps = archinfo->hwcaps;
   14626    IRTemp    condT; /* :: Ity_I32 */
   14627    UInt      summary;
   14628    HChar     dis_buf[128];  // big enough to hold LDMIA etc text
   14629 
   14630    /* What insn variants are we supporting today? */
   14631    //allow_VFP  = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
   14632    // etc etc
   14633 
   14634    /* Set result defaults. */
   14635    dres.whatNext    = Dis_Continue;
   14636    dres.len         = 4;
   14637    dres.continueAt  = 0;
   14638    dres.jk_StopHere = Ijk_INVALID;
   14639 
   14640    /* Set default actions for post-insn handling of writes to r15, if
   14641       required. */
   14642    r15written = False;
   14643    r15guard   = IRTemp_INVALID; /* unconditional */
   14644    r15kind    = Ijk_Boring;
   14645 
   14646    /* At least this is simple on ARM: insns are all 4 bytes long, and
   14647       4-aligned.  So just fish the whole thing out of memory right now
   14648       and have done. */
   14649    insn = getUIntLittleEndianly( guest_instr );
   14650 
   14651    if (0) vex_printf("insn: 0x%x\n", insn);
   14652 
   14653    DIP("\t(arm) 0x%x:  ", (UInt)guest_R15_curr_instr_notENC);
   14654 
   14655    vassert(0 == (guest_R15_curr_instr_notENC & 3));
   14656 
   14657    /* ----------------------------------------------------------- */
   14658 
   14659    /* Spot "Special" instructions (see comment at top of file). */
   14660    {
   14661       const UChar* code = guest_instr;
   14662       /* Spot the 16-byte preamble:
   14663 
   14664          e1a0c1ec  mov r12, r12, ROR #3
   14665          e1a0c6ec  mov r12, r12, ROR #13
   14666          e1a0ceec  mov r12, r12, ROR #29
   14667          e1a0c9ec  mov r12, r12, ROR #19
   14668       */
   14669       UInt word1 = 0xE1A0C1EC;
   14670       UInt word2 = 0xE1A0C6EC;
   14671       UInt word3 = 0xE1A0CEEC;
   14672       UInt word4 = 0xE1A0C9EC;
   14673       if (getUIntLittleEndianly(code+ 0) == word1 &&
   14674           getUIntLittleEndianly(code+ 4) == word2 &&
   14675           getUIntLittleEndianly(code+ 8) == word3 &&
   14676           getUIntLittleEndianly(code+12) == word4) {
   14677          /* Got a "Special" instruction preamble.  Which one is it? */
   14678          if (getUIntLittleEndianly(code+16) == 0xE18AA00A
   14679                                                /* orr r10,r10,r10 */) {
   14680             /* R3 = client_request ( R4 ) */
   14681             DIP("r3 = client_request ( %%r4 )\n");
   14682             llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 ));
   14683             dres.jk_StopHere = Ijk_ClientReq;
   14684             dres.whatNext    = Dis_StopHere;
   14685             goto decode_success;
   14686          }
   14687          else
   14688          if (getUIntLittleEndianly(code+16) == 0xE18BB00B
   14689                                                /* orr r11,r11,r11 */) {
   14690             /* R3 = guest_NRADDR */
   14691             DIP("r3 = guest_NRADDR\n");
   14692             dres.len = 20;
   14693             llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
   14694             goto decode_success;
   14695          }
   14696          else
   14697          if (getUIntLittleEndianly(code+16) == 0xE18CC00C
   14698                                                /* orr r12,r12,r12 */) {
   14699             /*  branch-and-link-to-noredir R4 */
   14700             DIP("branch-and-link-to-noredir r4\n");
   14701             llPutIReg(14, mkU32( guest_R15_curr_instr_notENC + 20) );
   14702             llPutIReg(15, llGetIReg(4));
   14703             dres.jk_StopHere = Ijk_NoRedir;
   14704             dres.whatNext    = Dis_StopHere;
   14705             goto decode_success;
   14706          }
   14707          else
   14708          if (getUIntLittleEndianly(code+16) == 0xE1899009
   14709                                                /* orr r9,r9,r9 */) {
   14710             /* IR injection */
   14711             DIP("IR injection\n");
   14712             vex_inject_ir(irsb, Iend_LE);
   14713             // Invalidate the current insn. The reason is that the IRop we're
   14714             // injecting here can change. In which case the translation has to
   14715             // be redone. For ease of handling, we simply invalidate all the
   14716             // time.
   14717             stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_R15_curr_instr_notENC)));
   14718             stmt(IRStmt_Put(OFFB_CMLEN,   mkU32(20)));
   14719             llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 ));
   14720             dres.whatNext    = Dis_StopHere;
   14721             dres.jk_StopHere = Ijk_InvalICache;
   14722             goto decode_success;
   14723          }
   14724          /* We don't know what it is.  Set opc1/opc2 so decode_failure
   14725             can print the insn following the Special-insn preamble. */
   14726          insn = getUIntLittleEndianly(code+16);
   14727          goto decode_failure;
   14728          /*NOTREACHED*/
   14729       }
   14730 
   14731    }
   14732 
   14733    /* ----------------------------------------------------------- */
   14734 
   14735    /* Main ARM instruction decoder starts here. */
   14736 
   14737    /* Deal with the condition.  Strategy is to merely generate a
   14738       condition temporary at this point (or IRTemp_INVALID, meaning
   14739       unconditional).  We leave it to lower-level instruction decoders
   14740       to decide whether they can generate straight-line code, or
   14741       whether they must generate a side exit before the instruction.
   14742       condT :: Ity_I32 and is always either zero or one. */
   14743    condT = IRTemp_INVALID;
   14744    switch ( (ARMCondcode)INSN_COND ) {
   14745       case ARMCondNV: {
   14746          // Illegal instruction prior to v5 (see ARM ARM A3-5), but
   14747          // some cases are acceptable
   14748          Bool ok = decode_NV_instruction(&dres, archinfo, insn);
   14749          if (ok)
   14750             goto decode_success;
   14751          else
   14752             goto decode_failure;
   14753       }
   14754       case ARMCondAL: // Always executed
   14755          break;
   14756       case ARMCondEQ: case ARMCondNE: case ARMCondHS: case ARMCondLO:
   14757       case ARMCondMI: case ARMCondPL: case ARMCondVS: case ARMCondVC:
   14758       case ARMCondHI: case ARMCondLS: case ARMCondGE: case ARMCondLT:
   14759       case ARMCondGT: case ARMCondLE:
   14760          condT = newTemp(Ity_I32);
   14761          assign( condT, mk_armg_calculate_condition( INSN_COND ));
   14762          break;
   14763    }
   14764 
   14765    /* ----------------------------------------------------------- */
   14766    /* -- ARMv5 integer instructions                            -- */
   14767    /* ----------------------------------------------------------- */
   14768 
   14769    /* ---------------- Data processing ops ------------------- */
   14770 
   14771    if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0))
   14772        && !(INSN(25,25) == 0 && INSN(7,7) == 1 && INSN(4,4) == 1)) {
   14773       IRTemp  shop = IRTemp_INVALID; /* shifter operand */
   14774       IRTemp  shco = IRTemp_INVALID; /* shifter carry out */
   14775       UInt    rD   = (insn >> 12) & 0xF; /* 15:12 */
   14776       UInt    rN   = (insn >> 16) & 0xF; /* 19:16 */
   14777       UInt    bitS = (insn >> 20) & 1; /* 20:20 */
   14778       IRTemp  rNt  = IRTemp_INVALID;
   14779       IRTemp  res  = IRTemp_INVALID;
   14780       IRTemp  oldV = IRTemp_INVALID;
   14781       IRTemp  oldC = IRTemp_INVALID;
   14782       const HChar*  name = NULL;
   14783       IROp    op   = Iop_INVALID;
   14784       Bool    ok;
   14785 
   14786       switch (INSN(24,21)) {
   14787 
   14788          /* --------- ADD, SUB, AND, OR --------- */
   14789          case BITS4(0,1,0,0): /* ADD:  Rd = Rn + shifter_operand */
   14790             name = "add"; op = Iop_Add32; goto rd_eq_rn_op_SO;
   14791          case BITS4(0,0,1,0): /* SUB:  Rd = Rn - shifter_operand */
   14792             name = "sub"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
   14793          case BITS4(0,0,1,1): /* RSB:  Rd = shifter_operand - Rn */
   14794             name = "rsb"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
   14795          case BITS4(0,0,0,0): /* AND:  Rd = Rn & shifter_operand */
   14796             name = "and"; op = Iop_And32; goto rd_eq_rn_op_SO;
   14797          case BITS4(1,1,0,0): /* OR:   Rd = Rn | shifter_operand */
   14798             name = "orr"; op = Iop_Or32; goto rd_eq_rn_op_SO;
   14799          case BITS4(0,0,0,1): /* EOR:  Rd = Rn ^ shifter_operand */
   14800             name = "eor"; op = Iop_Xor32; goto rd_eq_rn_op_SO;
   14801          case BITS4(1,1,1,0): /* BIC:  Rd = Rn & ~shifter_operand */
   14802             name = "bic"; op = Iop_And32; goto rd_eq_rn_op_SO;
   14803          rd_eq_rn_op_SO: {
   14804             Bool isRSB = False;
   14805             Bool isBIC = False;
   14806             switch (INSN(24,21)) {
   14807                case BITS4(0,0,1,1):
   14808                   vassert(op == Iop_Sub32); isRSB = True; break;
   14809                case BITS4(1,1,1,0):
   14810                   vassert(op == Iop_And32); isBIC = True; break;
   14811                default:
   14812                   break;
   14813             }
   14814             rNt = newTemp(Ity_I32);
   14815             assign(rNt, getIRegA(rN));
   14816             ok = mk_shifter_operand(
   14817                     INSN(25,25), INSN(11,0),
   14818                     &shop, bitS ? &shco : NULL, dis_buf
   14819                  );
   14820             if (!ok)
   14821                break;
   14822             res = newTemp(Ity_I32);
   14823             // compute the main result
   14824             if (isRSB) {
   14825                // reverse-subtract: shifter_operand - Rn
   14826                vassert(op == Iop_Sub32);
   14827                assign(res, binop(op, mkexpr(shop), mkexpr(rNt)) );
   14828             } else if (isBIC) {
   14829                // andn: shifter_operand & ~Rn
   14830                vassert(op == Iop_And32);
   14831                assign(res, binop(op, mkexpr(rNt),
   14832                                      unop(Iop_Not32, mkexpr(shop))) );
   14833             } else {
   14834                // normal: Rn op shifter_operand
   14835                assign(res, binop(op, mkexpr(rNt), mkexpr(shop)) );
   14836             }
   14837             // but don't commit it until after we've finished
   14838             // all necessary reads from the guest state
   14839             if (bitS
   14840                 && (op == Iop_And32 || op == Iop_Or32 || op == Iop_Xor32)) {
   14841                oldV = newTemp(Ity_I32);
   14842                assign( oldV, mk_armg_calculate_flag_v() );
   14843             }
   14844             // can't safely read guest state after here
   14845             // now safe to put the main result
   14846             putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
   14847             // XXXX!! not safe to read any guest state after
   14848             // this point (I think the code below doesn't do that).
   14849             if (!bitS)
   14850                vassert(shco == IRTemp_INVALID);
   14851             /* Update the flags thunk if necessary */
   14852             if (bitS) {
   14853                vassert(shco != IRTemp_INVALID);
   14854                switch (op) {
   14855                   case Iop_Add32:
   14856                      setFlags_D1_D2( ARMG_CC_OP_ADD, rNt, shop, condT );
   14857                      break;
   14858                   case Iop_Sub32:
   14859                      if (isRSB) {
   14860                         setFlags_D1_D2( ARMG_CC_OP_SUB, shop, rNt, condT );
   14861                      } else {
   14862                         setFlags_D1_D2( ARMG_CC_OP_SUB, rNt, shop, condT );
   14863                      }
   14864                      break;
   14865                   case Iop_And32: /* BIC and AND set the flags the same */
   14866                   case Iop_Or32:
   14867                   case Iop_Xor32:
   14868                      // oldV has been read just above
   14869                      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
   14870                                         res, shco, oldV, condT );
   14871                      break;
   14872                   default:
   14873                      vassert(0);
   14874                }
   14875             }
   14876             DIP("%s%s%s r%u, r%u, %s\n",
   14877                 name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
   14878             goto decode_success;
   14879          }
   14880 
   14881          /* --------- MOV, MVN --------- */
   14882          case BITS4(1,1,0,1):   /* MOV: Rd = shifter_operand */
   14883          case BITS4(1,1,1,1): { /* MVN: Rd = not(shifter_operand) */
   14884             Bool isMVN = INSN(24,21) == BITS4(1,1,1,1);
   14885             IRTemp jk = Ijk_Boring;
   14886             if (rN != 0)
   14887                break; /* rN must be zero */
   14888             ok = mk_shifter_operand(
   14889                     INSN(25,25), INSN(11,0),
   14890                     &shop, bitS ? &shco : NULL, dis_buf
   14891                  );
   14892             if (!ok)
   14893                break;
   14894             res = newTemp(Ity_I32);
   14895             assign( res, isMVN ? unop(Iop_Not32, mkexpr(shop))
   14896                                : mkexpr(shop) );
   14897             if (bitS) {
   14898                vassert(shco != IRTemp_INVALID);
   14899                oldV = newTemp(Ity_I32);
   14900                assign( oldV, mk_armg_calculate_flag_v() );
   14901             } else {
   14902                vassert(shco == IRTemp_INVALID);
   14903             }
   14904             /* According to the Cortex A8 TRM Sec. 5.2.1, MOV PC, r14 is a
   14905                 return for purposes of branch prediction. */
   14906             if (!isMVN && INSN(11,0) == 14) {
   14907               jk = Ijk_Ret;
   14908             }
   14909             // can't safely read guest state after here
   14910             putIRegA( rD, mkexpr(res), condT, jk );
   14911             /* Update the flags thunk if necessary */
   14912             if (bitS) {
   14913                setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
   14914                                   res, shco, oldV, condT );
   14915             }
   14916             DIP("%s%s%s r%u, %s\n",
   14917                 isMVN ? "mvn" : "mov",
   14918                 nCC(INSN_COND), bitS ? "s" : "", rD, dis_buf );
   14919             goto decode_success;
   14920          }
   14921 
   14922          /* --------- CMP --------- */
   14923          case BITS4(1,0,1,0):   /* CMP:  (void) Rn - shifter_operand */
   14924          case BITS4(1,0,1,1): { /* CMN:  (void) Rn + shifter_operand */
   14925             Bool isCMN = INSN(24,21) == BITS4(1,0,1,1);
   14926             if (rD != 0)
   14927                break; /* rD must be zero */
   14928             if (bitS == 0)
   14929                break; /* if S (bit 20) is not set, it's not CMP/CMN */
   14930             rNt = newTemp(Ity_I32);
   14931             assign(rNt, getIRegA(rN));
   14932             ok = mk_shifter_operand(
   14933                     INSN(25,25), INSN(11,0),
   14934                     &shop, NULL, dis_buf
   14935                  );
   14936             if (!ok)
   14937                break;
   14938             // can't safely read guest state after here
   14939             /* Update the flags thunk. */
   14940             setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
   14941                             rNt, shop, condT );
   14942             DIP("%s%s r%u, %s\n",
   14943                 isCMN ? "cmn" : "cmp",
   14944                 nCC(INSN_COND), rN, dis_buf );
   14945             goto decode_success;
   14946          }
   14947 
   14948          /* --------- TST --------- */
   14949          case BITS4(1,0,0,0):   /* TST:  (void) Rn & shifter_operand */
   14950          case BITS4(1,0,0,1): { /* TEQ:  (void) Rn ^ shifter_operand */
   14951             Bool isTEQ = INSN(24,21) == BITS4(1,0,0,1);
   14952             if (rD != 0)
   14953                break; /* rD must be zero */
   14954             if (bitS == 0)
   14955                break; /* if S (bit 20) is not set, it's not TST/TEQ */
   14956             rNt = newTemp(Ity_I32);
   14957             assign(rNt, getIRegA(rN));
   14958             ok = mk_shifter_operand(
   14959                     INSN(25,25), INSN(11,0),
   14960                     &shop, &shco, dis_buf
   14961                  );
   14962             if (!ok)
   14963                break;
   14964             /* Update the flags thunk. */
   14965             res = newTemp(Ity_I32);
   14966             assign( res, binop(isTEQ ? Iop_Xor32 : Iop_And32,
   14967                                mkexpr(rNt), mkexpr(shop)) );
   14968             oldV = newTemp(Ity_I32);
   14969             assign( oldV, mk_armg_calculate_flag_v() );
   14970             // can't safely read guest state after here
   14971             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
   14972                                res, shco, oldV, condT );
   14973             DIP("%s%s r%u, %s\n",
   14974                 isTEQ ? "teq" : "tst",
   14975                 nCC(INSN_COND), rN, dis_buf );
   14976             goto decode_success;
   14977          }
   14978 
   14979          /* --------- ADC, SBC, RSC --------- */
   14980          case BITS4(0,1,0,1): /* ADC:  Rd = Rn + shifter_operand + oldC */
   14981             name = "adc"; goto rd_eq_rn_op_SO_op_oldC;
   14982          case BITS4(0,1,1,0): /* SBC:  Rd = Rn - shifter_operand - (oldC ^ 1) */
   14983             name = "sbc"; goto rd_eq_rn_op_SO_op_oldC;
   14984          case BITS4(0,1,1,1): /* RSC:  Rd = shifter_operand - Rn - (oldC ^ 1) */
   14985             name = "rsc"; goto rd_eq_rn_op_SO_op_oldC;
   14986          rd_eq_rn_op_SO_op_oldC: {
   14987             // FIXME: shco isn't used for anything.  Get rid of it.
   14988             rNt = newTemp(Ity_I32);
   14989             assign(rNt, getIRegA(rN));
   14990             ok = mk_shifter_operand(
   14991                     INSN(25,25), INSN(11,0),
   14992                     &shop, bitS ? &shco : NULL, dis_buf
   14993                  );
   14994             if (!ok)
   14995                break;
   14996             oldC = newTemp(Ity_I32);
   14997             assign( oldC, mk_armg_calculate_flag_c() );
   14998             res = newTemp(Ity_I32);
   14999             // compute the main result
   15000             switch (INSN(24,21)) {
   15001                case BITS4(0,1,0,1): /* ADC */
   15002                   assign(res,
   15003                          binop(Iop_Add32,
   15004                                binop(Iop_Add32, mkexpr(rNt), mkexpr(shop)),
   15005                                mkexpr(oldC) ));
   15006                   break;
   15007                case BITS4(0,1,1,0): /* SBC */
   15008                   assign(res,
   15009                          binop(Iop_Sub32,
   15010                                binop(Iop_Sub32, mkexpr(rNt), mkexpr(shop)),
   15011                                binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
   15012                   break;
   15013                case BITS4(0,1,1,1): /* RSC */
   15014                   assign(res,
   15015                          binop(Iop_Sub32,
   15016                                binop(Iop_Sub32, mkexpr(shop), mkexpr(rNt)),
   15017                                binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
   15018                   break;
   15019                default:
   15020                   vassert(0);
   15021             }
   15022             // but don't commit it until after we've finished
   15023             // all necessary reads from the guest state
   15024             // now safe to put the main result
   15025             putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
   15026             // XXXX!! not safe to read any guest state after
   15027             // this point (I think the code below doesn't do that).
   15028             if (!bitS)
   15029                vassert(shco == IRTemp_INVALID);
   15030             /* Update the flags thunk if necessary */
   15031             if (bitS) {
   15032                vassert(shco != IRTemp_INVALID);
   15033                switch (INSN(24,21)) {
   15034                   case BITS4(0,1,0,1): /* ADC */
   15035                      setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
   15036                                         rNt, shop, oldC, condT );
   15037                      break;
   15038                   case BITS4(0,1,1,0): /* SBC */
   15039                      setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
   15040                                         rNt, shop, oldC, condT );
   15041                      break;
   15042                   case BITS4(0,1,1,1): /* RSC */
   15043                      setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
   15044                                         shop, rNt, oldC, condT );
   15045                      break;
   15046                   default:
   15047                      vassert(0);
   15048                }
   15049             }
   15050             DIP("%s%s%s r%u, r%u, %s\n",
   15051                 name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
   15052             goto decode_success;
   15053          }
   15054 
   15055          default:
   15056             vassert(0);
   15057       }
   15058    } /* if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0)) */
   15059 
   15060    /* --------------------- Load/store (ubyte & word) -------- */
   15061    // LDR STR LDRB STRB
   15062    /*                 31   27   23   19 15 11    6   4 3  # highest bit
   15063                         28   24   20 16 12
   15064       A5-20   1 | 16  cond 0101 UB0L Rn Rd imm12
   15065       A5-22   1 | 32  cond 0111 UBOL Rn Rd imm5  sh2 0 Rm
   15066       A5-24   2 | 16  cond 0101 UB1L Rn Rd imm12
   15067       A5-26   2 | 32  cond 0111 UB1L Rn Rd imm5  sh2 0 Rm
   15068       A5-28   3 | 16  cond 0100 UB0L Rn Rd imm12
   15069       A5-32   3 | 32  cond 0110 UB0L Rn Rd imm5  sh2 0 Rm
   15070    */
   15071    /* case coding:
   15072              1   at-ea               (access at ea)
   15073              2   at-ea-then-upd      (access at ea, then Rn = ea)
   15074              3   at-Rn-then-upd      (access at Rn, then Rn = ea)
   15075       ea coding
   15076              16  Rn +/- imm12
   15077              32  Rn +/- Rm sh2 imm5
   15078    */
   15079    /* Quickly skip over all of this for hopefully most instructions */
   15080    if ((INSN(27,24) & BITS4(1,1,0,0)) != BITS4(0,1,0,0))
   15081       goto after_load_store_ubyte_or_word;
   15082 
   15083    summary = 0;
   15084 
   15085    /**/ if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 0) {
   15086       summary = 1 | 16;
   15087    }
   15088    else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 0
   15089                                           && INSN(4,4) == 0) {
   15090       summary = 1 | 32;
   15091    }
   15092    else if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 1) {
   15093       summary = 2 | 16;
   15094    }
   15095    else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 1
   15096                                           && INSN(4,4) == 0) {
   15097       summary = 2 | 32;
   15098    }
   15099    else if (INSN(27,24) == BITS4(0,1,0,0) && INSN(21,21) == 0) {
   15100       summary = 3 | 16;
   15101    }
   15102    else if (INSN(27,24) == BITS4(0,1,1,0) && INSN(21,21) == 0
   15103                                           && INSN(4,4) == 0) {
   15104       summary = 3 | 32;
   15105    }
   15106    else goto after_load_store_ubyte_or_word;
   15107 
   15108    { UInt rN = (insn >> 16) & 0xF; /* 19:16 */
   15109      UInt rD = (insn >> 12) & 0xF; /* 15:12 */
   15110      UInt rM = (insn >> 0)  & 0xF; /*  3:0  */
   15111      UInt bU = (insn >> 23) & 1;      /* 23 */
   15112      UInt bB = (insn >> 22) & 1;      /* 22 */
   15113      UInt bL = (insn >> 20) & 1;      /* 20 */
   15114      UInt imm12 = (insn >> 0) & 0xFFF; /* 11:0 */
   15115      UInt imm5  = (insn >> 7) & 0x1F;  /* 11:7 */
   15116      UInt sh2   = (insn >> 5) & 3;     /* 6:5 */
   15117 
   15118      /* Skip some invalid cases, which would lead to two competing
   15119         updates to the same register, or which are otherwise
   15120         disallowed by the spec. */
   15121      switch (summary) {
   15122         case 1 | 16:
   15123            break;
   15124         case 1 | 32:
   15125            if (rM == 15) goto after_load_store_ubyte_or_word;
   15126            break;
   15127         case 2 | 16: case 3 | 16:
   15128            if (rN == 15) goto after_load_store_ubyte_or_word;
   15129            if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
   15130            break;
   15131         case 2 | 32: case 3 | 32:
   15132            if (rM == 15) goto after_load_store_ubyte_or_word;
   15133            if (rN == 15) goto after_load_store_ubyte_or_word;
   15134            if (rN == rM) goto after_load_store_ubyte_or_word;
   15135            if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
   15136            break;
   15137         default:
   15138            vassert(0);
   15139      }
   15140 
   15141      /* compute the effective address.  Bind it to a tmp since we
   15142         may need to use it twice. */
   15143      IRExpr* eaE = NULL;
   15144      switch (summary & 0xF0) {
   15145         case 16:
   15146            eaE = mk_EA_reg_plusminus_imm12( rN, bU, imm12, dis_buf );
   15147            break;
   15148         case 32:
   15149            eaE = mk_EA_reg_plusminus_shifted_reg( rN, bU, rM, sh2, imm5,
   15150                                                   dis_buf );
   15151            break;
   15152      }
   15153      vassert(eaE);
   15154      IRTemp eaT = newTemp(Ity_I32);
   15155      assign(eaT, eaE);
   15156 
   15157      /* get the old Rn value */
   15158      IRTemp rnT = newTemp(Ity_I32);
   15159      assign(rnT, getIRegA(rN));
   15160 
   15161      /* decide on the transfer address */
   15162      IRTemp taT = IRTemp_INVALID;
   15163      switch (summary & 0x0F) {
   15164         case 1: case 2: taT = eaT; break;
   15165         case 3:         taT = rnT; break;
   15166      }
   15167      vassert(taT != IRTemp_INVALID);
   15168 
   15169      if (bL == 0) {
   15170        /* Store.  If necessary, update the base register before the
   15171           store itself, so that the common idiom of "str rX, [sp,
   15172           #-4]!" (store rX at sp-4, then do new sp = sp-4, a.k.a "push
   15173           rX") doesn't cause Memcheck to complain that the access is
   15174           below the stack pointer.  Also, not updating sp before the
   15175           store confuses Valgrind's dynamic stack-extending logic.  So
   15176           do it before the store.  Hence we need to snarf the store
   15177           data before doing the basereg update. */
   15178 
   15179         /* get hold of the data to be stored */
   15180         IRTemp rDt = newTemp(Ity_I32);
   15181         assign(rDt, getIRegA(rD));
   15182 
   15183         /* Update Rn if necessary. */
   15184         switch (summary & 0x0F) {
   15185            case 2: case 3:
   15186               putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
   15187               break;
   15188         }
   15189 
   15190         /* generate the transfer */
   15191         if (bB == 0) { // word store
   15192            storeGuardedLE( mkexpr(taT), mkexpr(rDt), condT );
   15193         } else { // byte store
   15194            vassert(bB == 1);
   15195            storeGuardedLE( mkexpr(taT), unop(Iop_32to8, mkexpr(rDt)), condT );
   15196         }
   15197 
   15198      } else {
   15199         /* Load */
   15200         vassert(bL == 1);
   15201 
   15202         /* generate the transfer */
   15203         if (bB == 0) { // word load
   15204            IRTemp jk = Ijk_Boring;
   15205            /* According to the Cortex A8 TRM Sec. 5.2.1, LDR(1) with r13 as the
   15206                base register and PC as the destination register is a return for
   15207                purposes of branch prediction.
   15208               The ARM ARM Sec. C9.10.1 further specifies that it must use a
   15209                post-increment by immediate addressing mode to be counted in
   15210                event 0x0E (Procedure return).*/
   15211            if (rN == 13 && summary == (3 | 16) && bB == 0) {
   15212               jk = Ijk_Ret;
   15213            }
   15214            IRTemp tD = newTemp(Ity_I32);
   15215            loadGuardedLE( tD, ILGop_Ident32,
   15216                           mkexpr(taT), llGetIReg(rD), condT );
   15217            /* "rD == 15 ? condT : IRTemp_INVALID": simply
   15218               IRTemp_INVALID would be correct in all cases here, and
   15219               for the non-r15 case it generates better code, by
   15220               avoiding two tests of the cond (since it is already
   15221               tested by loadGuardedLE).  However, the logic at the end
   15222               of this function, that deals with writes to r15, has an
   15223               optimisation which depends on seeing whether or not the
   15224               write is conditional.  Hence in this particular case we
   15225               let it "see" the guard condition. */
   15226            putIRegA( rD, mkexpr(tD),
   15227                      rD == 15 ? condT : IRTemp_INVALID, jk );
   15228         } else { // byte load
   15229            vassert(bB == 1);
   15230            IRTemp tD = newTemp(Ity_I32);
   15231            loadGuardedLE( tD, ILGop_8Uto32, mkexpr(taT), llGetIReg(rD), condT );
   15232            /* No point in similar 3rd arg complexity here, since we
   15233               can't sanely write anything to r15 like this. */
   15234            putIRegA( rD, mkexpr(tD), IRTemp_INVALID, Ijk_Boring );
   15235         }
   15236 
   15237         /* Update Rn if necessary. */
   15238         switch (summary & 0x0F) {
   15239            case 2: case 3:
   15240               // should be assured by logic above:
   15241               if (bL == 1)
   15242                  vassert(rD != rN); /* since we just wrote rD */
   15243               putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
   15244               break;
   15245         }
   15246      }
   15247 
   15248      switch (summary & 0x0F) {
   15249         case 1:  DIP("%sr%s%s r%u, %s\n",
   15250                      bL == 0 ? "st" : "ld",
   15251                      bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
   15252                  break;
   15253         case 2:  DIP("%sr%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
   15254                      bL == 0 ? "st" : "ld",
   15255                      bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
   15256                  break;
   15257         case 3:  DIP("%sr%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
   15258                      bL == 0 ? "st" : "ld",
   15259                      bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
   15260                  break;
   15261         default: vassert(0);
   15262      }
   15263 
   15264      /* XXX deal with alignment constraints */
   15265 
   15266      goto decode_success;
   15267 
   15268      /* Complications:
   15269 
   15270         For all loads: if the Amode specifies base register
   15271         writeback, and the same register is specified for Rd and Rn,
   15272         the results are UNPREDICTABLE.
   15273 
   15274         For all loads and stores: if R15 is written, branch to
   15275         that address afterwards.
   15276 
   15277         STRB: straightforward
   15278         LDRB: loaded data is zero extended
   15279         STR:  lowest 2 bits of address are ignored
   15280         LDR:  if the lowest 2 bits of the address are nonzero
   15281               then the loaded value is rotated right by 8 * the lowest 2 bits
   15282      */
   15283    }
   15284 
   15285   after_load_store_ubyte_or_word:
   15286 
   15287    /* --------------------- Load/store (sbyte & hword) -------- */
   15288    // LDRH LDRSH STRH LDRSB
   15289    /*                 31   27   23   19 15 11   7    3     # highest bit
   15290                         28   24   20 16 12    8    4    0
   15291       A5-36   1 | 16  cond 0001 U10L Rn Rd im4h 1SH1 im4l
   15292       A5-38   1 | 32  cond 0001 U00L Rn Rd 0000 1SH1 Rm
   15293       A5-40   2 | 16  cond 0001 U11L Rn Rd im4h 1SH1 im4l
   15294       A5-42   2 | 32  cond 0001 U01L Rn Rd 0000 1SH1 Rm
   15295       A5-44   3 | 16  cond 0000 U10L Rn Rd im4h 1SH1 im4l
   15296       A5-46   3 | 32  cond 0000 U00L Rn Rd 0000 1SH1 Rm
   15297    */
   15298    /* case coding:
   15299              1   at-ea               (access at ea)
   15300              2   at-ea-then-upd      (access at ea, then Rn = ea)
   15301              3   at-Rn-then-upd      (access at Rn, then Rn = ea)
   15302       ea coding
   15303              16  Rn +/- imm8
   15304              32  Rn +/- Rm
   15305    */
   15306    /* Quickly skip over all of this for hopefully most instructions */
   15307    if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
   15308       goto after_load_store_sbyte_or_hword;
   15309 
   15310    /* Check the "1SH1" thing. */
   15311    if ((INSN(7,4) & BITS4(1,0,0,1)) != BITS4(1,0,0,1))
   15312       goto after_load_store_sbyte_or_hword;
   15313 
   15314    summary = 0;
   15315 
   15316    /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,0)) {
   15317       summary = 1 | 16;
   15318    }
   15319    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,0)) {
   15320       summary = 1 | 32;
   15321    }
   15322    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,1)) {
   15323       summary = 2 | 16;
   15324    }
   15325    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,1)) {
   15326       summary = 2 | 32;
   15327    }
   15328    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(1,0)) {
   15329       summary = 3 | 16;
   15330    }
   15331    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(0,0)) {
   15332       summary = 3 | 32;
   15333    }
   15334    else goto after_load_store_sbyte_or_hword;
   15335 
   15336    { UInt rN   = (insn >> 16) & 0xF; /* 19:16 */
   15337      UInt rD   = (insn >> 12) & 0xF; /* 15:12 */
   15338      UInt rM   = (insn >> 0)  & 0xF; /*  3:0  */
   15339      UInt bU   = (insn >> 23) & 1;   /* 23 U=1 offset+, U=0 offset- */
   15340      UInt bL   = (insn >> 20) & 1;   /* 20 L=1 load, L=0 store */
   15341      UInt bH   = (insn >> 5) & 1;    /* H=1 halfword, H=0 byte */
   15342      UInt bS   = (insn >> 6) & 1;    /* S=1 signed, S=0 unsigned */
   15343      UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
   15344 
   15345      /* Skip combinations that are either meaningless or already
   15346         handled by main word-or-unsigned-byte load-store
   15347         instructions. */
   15348      if (bS == 0 && bH == 0) /* "unsigned byte" */
   15349         goto after_load_store_sbyte_or_hword;
   15350      if (bS == 1 && bL == 0) /* "signed store" */
   15351         goto after_load_store_sbyte_or_hword;
   15352 
   15353      /* Require 11:8 == 0 for Rn +/- Rm cases */
   15354      if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
   15355         goto after_load_store_sbyte_or_hword;
   15356 
   15357      /* Skip some invalid cases, which would lead to two competing
   15358         updates to the same register, or which are otherwise
   15359         disallowed by the spec. */
   15360      switch (summary) {
   15361         case 1 | 16:
   15362            break;
   15363         case 1 | 32:
   15364            if (rM == 15) goto after_load_store_sbyte_or_hword;
   15365            break;
   15366         case 2 | 16: case 3 | 16:
   15367            if (rN == 15) goto after_load_store_sbyte_or_hword;
   15368            if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
   15369            break;
   15370         case 2 | 32: case 3 | 32:
   15371            if (rM == 15) goto after_load_store_sbyte_or_hword;
   15372            if (rN == 15) goto after_load_store_sbyte_or_hword;
   15373            if (rN == rM) goto after_load_store_sbyte_or_hword;
   15374            if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
   15375            break;
   15376         default:
   15377            vassert(0);
   15378      }
   15379 
   15380      /* If this is a branch, make it unconditional at this point.
   15381         Doing conditional branches in-line is too complex (for now).
   15382         Note that you'd have to be insane to use any of these loads to
   15383         do a branch, since they only load 16 bits at most, but we
   15384         handle it just in case. */
   15385      if (bL == 1 && rD == 15 && condT != IRTemp_INVALID) {
   15386         // go uncond
   15387         mk_skip_over_A32_if_cond_is_false( condT );
   15388         condT = IRTemp_INVALID;
   15389         // now uncond
   15390      }
   15391 
   15392      /* compute the effective address.  Bind it to a tmp since we
   15393         may need to use it twice. */
   15394      IRExpr* eaE = NULL;
   15395      switch (summary & 0xF0) {
   15396         case 16:
   15397            eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
   15398            break;
   15399         case 32:
   15400            eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
   15401            break;
   15402      }
   15403      vassert(eaE);
   15404      IRTemp eaT = newTemp(Ity_I32);
   15405      assign(eaT, eaE);
   15406 
   15407      /* get the old Rn value */
   15408      IRTemp rnT = newTemp(Ity_I32);
   15409      assign(rnT, getIRegA(rN));
   15410 
   15411      /* decide on the transfer address */
   15412      IRTemp taT = IRTemp_INVALID;
   15413      switch (summary & 0x0F) {
   15414         case 1: case 2: taT = eaT; break;
   15415         case 3:         taT = rnT; break;
   15416      }
   15417      vassert(taT != IRTemp_INVALID);
   15418 
   15419      /* ll previous value of rD, for dealing with conditional loads */
   15420      IRTemp llOldRd = newTemp(Ity_I32);
   15421      assign(llOldRd, llGetIReg(rD));
   15422 
   15423      /* halfword store  H 1  L 0  S 0
   15424         uhalf load      H 1  L 1  S 0
   15425         shalf load      H 1  L 1  S 1
   15426         sbyte load      H 0  L 1  S 1
   15427      */
   15428      const HChar* name = NULL;
   15429      /* generate the transfer */
   15430      /**/ if (bH == 1 && bL == 0 && bS == 0) { // halfword store
   15431         storeGuardedLE( mkexpr(taT),
   15432                         unop(Iop_32to16, getIRegA(rD)), condT );
   15433         name = "strh";
   15434      }
   15435      else if (bH == 1 && bL == 1 && bS == 0) { // uhalf load
   15436         IRTemp newRd = newTemp(Ity_I32);
   15437         loadGuardedLE( newRd, ILGop_16Uto32,
   15438                        mkexpr(taT), mkexpr(llOldRd), condT );
   15439         putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
   15440         name = "ldrh";
   15441      }
   15442      else if (bH == 1 && bL == 1 && bS == 1) { // shalf load
   15443         IRTemp newRd = newTemp(Ity_I32);
   15444         loadGuardedLE( newRd, ILGop_16Sto32,
   15445                        mkexpr(taT), mkexpr(llOldRd), condT );
   15446         putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
   15447         name = "ldrsh";
   15448      }
   15449      else if (bH == 0 && bL == 1 && bS == 1) { // sbyte load
   15450         IRTemp newRd = newTemp(Ity_I32);
   15451         loadGuardedLE( newRd, ILGop_8Sto32,
   15452                        mkexpr(taT), mkexpr(llOldRd), condT );
   15453         putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
   15454         name = "ldrsb";
   15455      }
   15456      else
   15457         vassert(0); // should be assured by logic above
   15458 
   15459      /* Update Rn if necessary. */
   15460      switch (summary & 0x0F) {
   15461         case 2: case 3:
   15462            // should be assured by logic above:
   15463            if (bL == 1)
   15464               vassert(rD != rN); /* since we just wrote rD */
   15465            putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
   15466            break;
   15467      }
   15468 
   15469      switch (summary & 0x0F) {
   15470         case 1:  DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
   15471                  break;
   15472         case 2:  DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
   15473                      name, nCC(INSN_COND), rD, dis_buf);
   15474                  break;
   15475         case 3:  DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
   15476                      name, nCC(INSN_COND), rD, dis_buf);
   15477                  break;
   15478         default: vassert(0);
   15479      }
   15480 
   15481      /* XXX deal with alignment constraints */
   15482 
   15483      goto decode_success;
   15484 
   15485      /* Complications:
   15486 
   15487         For all loads: if the Amode specifies base register
   15488         writeback, and the same register is specified for Rd and Rn,
   15489         the results are UNPREDICTABLE.
   15490 
   15491         For all loads and stores: if R15 is written, branch to
   15492         that address afterwards.
   15493 
   15494         Misaligned halfword stores => Unpredictable
   15495         Misaligned halfword loads  => Unpredictable
   15496      */
   15497    }
   15498 
   15499   after_load_store_sbyte_or_hword:
   15500 
   15501    /* --------------------- Load/store multiple -------------- */
   15502    // LD/STMIA LD/STMIB LD/STMDA LD/STMDB
   15503    // Remarkably complex and difficult to get right
   15504    // match 27:20 as 100XX0WL
   15505    if (BITS8(1,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,1,0,0))) {
   15506       // A5-50 LD/STMIA  cond 1000 10WL Rn RegList
   15507       // A5-51 LD/STMIB  cond 1001 10WL Rn RegList
   15508       // A5-53 LD/STMDA  cond 1000 00WL Rn RegList
   15509       // A5-53 LD/STMDB  cond 1001 00WL Rn RegList
   15510       //                   28   24   20 16       0
   15511 
   15512       UInt bINC    = (insn >> 23) & 1;
   15513       UInt bBEFORE = (insn >> 24) & 1;
   15514 
   15515       UInt bL      = (insn >> 20) & 1;  /* load=1, store=0 */
   15516       UInt bW      = (insn >> 21) & 1;  /* Rn wback=1, no wback=0 */
   15517       UInt rN      = (insn >> 16) & 0xF;
   15518       UInt regList = insn & 0xFFFF;
   15519       /* Skip some invalid cases, which would lead to two competing
   15520          updates to the same register, or which are otherwise
   15521          disallowed by the spec.  Note the test above has required
   15522          that S == 0, since that looks like a kernel-mode only thing.
   15523          Done by forcing the real pattern, viz 100XXSWL to actually be
   15524          100XX0WL. */
   15525       if (rN == 15) goto after_load_store_multiple;
   15526       // reglist can't be empty
   15527       if (regList == 0) goto after_load_store_multiple;
   15528       // if requested to writeback Rn, and this is a load instruction,
   15529       // then Rn can't appear in RegList, since we'd have two competing
   15530       // new values for Rn.  We do however accept this case for store
   15531       // instructions.
   15532       if (bW == 1 && bL == 1 && ((1 << rN) & regList) > 0)
   15533          goto after_load_store_multiple;
   15534 
   15535       /* Now, we can't do a conditional load or store, since that very
   15536          likely will generate an exception.  So we have to take a side
   15537          exit at this point if the condition is false. */
   15538       if (condT != IRTemp_INVALID) {
   15539          mk_skip_over_A32_if_cond_is_false( condT );
   15540          condT = IRTemp_INVALID;
   15541       }
   15542 
   15543       /* Ok, now we're unconditional.  Generate the IR. */
   15544       mk_ldm_stm( True/*arm*/, rN, bINC, bBEFORE, bW, bL, regList );
   15545 
   15546       DIP("%sm%c%c%s r%u%s, {0x%04x}\n",
   15547           bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
   15548           nCC(INSN_COND),
   15549           rN, bW ? "!" : "", regList);
   15550 
   15551       goto decode_success;
   15552    }
   15553 
   15554   after_load_store_multiple:
   15555 
   15556    /* --------------------- Control flow --------------------- */
   15557    // B, BL (Branch, or Branch-and-Link, to immediate offset)
   15558    //
   15559    if (BITS8(1,0,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))) {
   15560       UInt link   = (insn >> 24) & 1;
   15561       UInt uimm24 = insn & ((1<<24)-1);
   15562       Int  simm24 = (Int)uimm24;
   15563       UInt dst    = guest_R15_curr_instr_notENC + 8
   15564                     + (((simm24 << 8) >> 8) << 2);
   15565       IRJumpKind jk = link ? Ijk_Call : Ijk_Boring;
   15566       if (link) {
   15567          putIRegA(14, mkU32(guest_R15_curr_instr_notENC + 4),
   15568                       condT, Ijk_Boring);
   15569       }
   15570       if (condT == IRTemp_INVALID) {
   15571          /* unconditional transfer to 'dst'.  See if we can simply
   15572             continue tracing at the destination. */
   15573          if (resteerOkFn( callback_opaque, dst )) {
   15574             /* yes */
   15575             dres.whatNext   = Dis_ResteerU;
   15576             dres.continueAt = dst;
   15577          } else {
   15578             /* no; terminate the SB at this point. */
   15579             llPutIReg(15, mkU32(dst));
   15580             dres.jk_StopHere = jk;
   15581             dres.whatNext    = Dis_StopHere;
   15582          }
   15583          DIP("b%s 0x%x\n", link ? "l" : "", dst);
   15584       } else {
   15585          /* conditional transfer to 'dst' */
   15586          const HChar* comment = "";
   15587 
   15588          /* First see if we can do some speculative chasing into one
   15589             arm or the other.  Be conservative and only chase if
   15590             !link, that is, this is a normal conditional branch to a
   15591             known destination. */
   15592          if (!link
   15593              && resteerCisOk
   15594              && vex_control.guest_chase_cond
   15595              && dst < guest_R15_curr_instr_notENC
   15596              && resteerOkFn( callback_opaque, dst) ) {
   15597             /* Speculation: assume this backward branch is taken.  So
   15598                we need to emit a side-exit to the insn following this
   15599                one, on the negation of the condition, and continue at
   15600                the branch target address (dst). */
   15601             stmt( IRStmt_Exit( unop(Iop_Not1,
   15602                                     unop(Iop_32to1, mkexpr(condT))),
   15603                                Ijk_Boring,
   15604                                IRConst_U32(guest_R15_curr_instr_notENC+4),
   15605                                OFFB_R15T ));
   15606             dres.whatNext   = Dis_ResteerC;
   15607             dres.continueAt = (Addr32)dst;
   15608             comment = "(assumed taken)";
   15609          }
   15610          else
   15611          if (!link
   15612              && resteerCisOk
   15613              && vex_control.guest_chase_cond
   15614              && dst >= guest_R15_curr_instr_notENC
   15615              && resteerOkFn( callback_opaque,
   15616                              guest_R15_curr_instr_notENC+4) ) {
   15617             /* Speculation: assume this forward branch is not taken.
   15618                So we need to emit a side-exit to dst (the dest) and
   15619                continue disassembling at the insn immediately
   15620                following this one. */
   15621             stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
   15622                                Ijk_Boring,
   15623                                IRConst_U32(dst),
   15624                                OFFB_R15T ));
   15625             dres.whatNext   = Dis_ResteerC;
   15626             dres.continueAt = guest_R15_curr_instr_notENC+4;
   15627             comment = "(assumed not taken)";
   15628          }
   15629          else {
   15630             /* Conservative default translation - end the block at
   15631                this point. */
   15632             stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
   15633                                jk, IRConst_U32(dst), OFFB_R15T ));
   15634             llPutIReg(15, mkU32(guest_R15_curr_instr_notENC + 4));
   15635             dres.jk_StopHere = Ijk_Boring;
   15636             dres.whatNext    = Dis_StopHere;
   15637          }
   15638          DIP("b%s%s 0x%x %s\n", link ? "l" : "", nCC(INSN_COND),
   15639              dst, comment);
   15640       }
   15641       goto decode_success;
   15642    }
   15643 
   15644    // B, BL (Branch, or Branch-and-Link, to a register)
   15645    // NB: interworking branch
   15646    if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
   15647        && INSN(19,12) == BITS8(1,1,1,1,1,1,1,1)
   15648        && (INSN(11,4) == BITS8(1,1,1,1,0,0,1,1)
   15649            || INSN(11,4) == BITS8(1,1,1,1,0,0,0,1))) {
   15650       IRTemp  dst = newTemp(Ity_I32);
   15651       UInt    link = (INSN(11,4) >> 1) & 1;
   15652       UInt    rM   = INSN(3,0);
   15653       // we don't decode the case (link && rM == 15), as that's
   15654       // Unpredictable.
   15655       if (!(link && rM == 15)) {
   15656          if (condT != IRTemp_INVALID) {
   15657             mk_skip_over_A32_if_cond_is_false( condT );
   15658          }
   15659          // rM contains an interworking address exactly as we require
   15660          // (with continuation CPSR.T in bit 0), so we can use it
   15661          // as-is, with no masking.
   15662          assign( dst, getIRegA(rM) );
   15663          if (link) {
   15664             putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
   15665                       IRTemp_INVALID/*because AL*/, Ijk_Boring );
   15666          }
   15667          llPutIReg(15, mkexpr(dst));
   15668          dres.jk_StopHere = link ? Ijk_Call
   15669                                  : (rM == 14 ? Ijk_Ret : Ijk_Boring);
   15670          dres.whatNext    = Dis_StopHere;
   15671          if (condT == IRTemp_INVALID) {
   15672             DIP("b%sx r%u\n", link ? "l" : "", rM);
   15673          } else {
   15674             DIP("b%sx%s r%u\n", link ? "l" : "", nCC(INSN_COND), rM);
   15675          }
   15676          goto decode_success;
   15677       }
   15678       /* else: (link && rM == 15): just fall through */
   15679    }
   15680 
   15681    /* --- NB: ARM interworking branches are in NV space, hence
   15682       are handled elsewhere by decode_NV_instruction.
   15683       ---
   15684    */
   15685 
   15686    /* --------------------- Clz --------------------- */
   15687    // CLZ
   15688    if (INSN(27,20) == BITS8(0,0,0,1,0,1,1,0)
   15689        && INSN(19,16) == BITS4(1,1,1,1)
   15690        && INSN(11,4) == BITS8(1,1,1,1,0,0,0,1)) {
   15691       UInt rD = INSN(15,12);
   15692       UInt rM = INSN(3,0);
   15693       IRTemp arg = newTemp(Ity_I32);
   15694       IRTemp res = newTemp(Ity_I32);
   15695       assign(arg, getIRegA(rM));
   15696       assign(res, IRExpr_ITE(
   15697                      binop(Iop_CmpEQ32, mkexpr(arg), mkU32(0)),
   15698                      mkU32(32),
   15699                      unop(Iop_Clz32, mkexpr(arg))
   15700             ));
   15701       putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   15702       DIP("clz%s r%u, r%u\n", nCC(INSN_COND), rD, rM);
   15703       goto decode_success;
   15704    }
   15705 
   15706    /* --------------------- Mul etc --------------------- */
   15707    // MUL
   15708    if (BITS8(0,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
   15709        && INSN(15,12) == BITS4(0,0,0,0)
   15710        && INSN(7,4) == BITS4(1,0,0,1)) {
   15711       UInt bitS = (insn >> 20) & 1; /* 20:20 */
   15712       UInt rD = INSN(19,16);
   15713       UInt rS = INSN(11,8);
   15714       UInt rM = INSN(3,0);
   15715       if (rD == 15 || rM == 15 || rS == 15) {
   15716          /* Unpredictable; don't decode; fall through */
   15717       } else {
   15718          IRTemp argL = newTemp(Ity_I32);
   15719          IRTemp argR = newTemp(Ity_I32);
   15720          IRTemp res  = newTemp(Ity_I32);
   15721          IRTemp oldC = IRTemp_INVALID;
   15722          IRTemp oldV = IRTemp_INVALID;
   15723          assign( argL, getIRegA(rM));
   15724          assign( argR, getIRegA(rS));
   15725          assign( res, binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) );
   15726          if (bitS) {
   15727             oldC = newTemp(Ity_I32);
   15728             assign(oldC, mk_armg_calculate_flag_c());
   15729             oldV = newTemp(Ity_I32);
   15730             assign(oldV, mk_armg_calculate_flag_v());
   15731          }
   15732          // now update guest state
   15733          putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
   15734          if (bitS) {
   15735             IRTemp pair = newTemp(Ity_I32);
   15736             assign( pair, binop(Iop_Or32,
   15737                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
   15738                                 mkexpr(oldV)) );
   15739             setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
   15740          }
   15741          DIP("mul%c%s r%u, r%u, r%u\n",
   15742              bitS ? 's' : ' ', nCC(INSN_COND), rD, rM, rS);
   15743          goto decode_success;
   15744       }
   15745       /* fall through */
   15746    }
   15747 
   15748    /* --------------------- Integer Divides --------------------- */
   15749    // SDIV
   15750    if (BITS8(0,1,1,1,0,0,0,1) == INSN(27,20)
   15751        && INSN(15,12) == BITS4(1,1,1,1)
   15752        && INSN(7,4) == BITS4(0,0,0,1)) {
   15753       UInt rD = INSN(19,16);
   15754       UInt rM = INSN(11,8);
   15755       UInt rN = INSN(3,0);
   15756       if (rD == 15 || rM == 15 || rN == 15) {
   15757          /* Unpredictable; don't decode; fall through */
   15758       } else {
   15759          IRTemp res  = newTemp(Ity_I32);
   15760          IRTemp argL = newTemp(Ity_I32);
   15761          IRTemp argR = newTemp(Ity_I32);
   15762          assign(argL, getIRegA(rN));
   15763          assign(argR, getIRegA(rM));
   15764          assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
   15765          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   15766          DIP("sdiv r%u, r%u, r%u\n", rD, rN, rM);
   15767          goto decode_success;
   15768       }
   15769     }
   15770 
   15771    // UDIV
   15772    if (BITS8(0,1,1,1,0,0,1,1) == INSN(27,20)
   15773        && INSN(15,12) == BITS4(1,1,1,1)
   15774        && INSN(7,4) == BITS4(0,0,0,1)) {
   15775       UInt rD = INSN(19,16);
   15776       UInt rM = INSN(11,8);
   15777       UInt rN = INSN(3,0);
   15778       if (rD == 15 || rM == 15 || rN == 15) {
   15779          /* Unpredictable; don't decode; fall through */
   15780       } else {
   15781          IRTemp res  = newTemp(Ity_I32);
   15782          IRTemp argL = newTemp(Ity_I32);
   15783          IRTemp argR = newTemp(Ity_I32);
   15784          assign(argL, getIRegA(rN));
   15785          assign(argR, getIRegA(rM));
   15786          assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
   15787          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   15788          DIP("udiv r%u, r%u, r%u\n", rD, rN, rM);
   15789          goto decode_success;
   15790       }
   15791    }
   15792 
   15793    // MLA, MLS
   15794    if (BITS8(0,0,0,0,0,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
   15795        && INSN(7,4) == BITS4(1,0,0,1)) {
   15796       UInt bitS  = (insn >> 20) & 1; /* 20:20 */
   15797       UInt isMLS = (insn >> 22) & 1; /* 22:22 */
   15798       UInt rD = INSN(19,16);
   15799       UInt rN = INSN(15,12);
   15800       UInt rS = INSN(11,8);
   15801       UInt rM = INSN(3,0);
   15802       if (bitS == 1 && isMLS == 1) {
   15803          /* This isn't allowed (MLS that sets flags).  don't decode;
   15804             fall through */
   15805       }
   15806       else
   15807       if (rD == 15 || rM == 15 || rS == 15 || rN == 15) {
   15808          /* Unpredictable; don't decode; fall through */
   15809       } else {
   15810          IRTemp argL = newTemp(Ity_I32);
   15811          IRTemp argR = newTemp(Ity_I32);
   15812          IRTemp argP = newTemp(Ity_I32);
   15813          IRTemp res  = newTemp(Ity_I32);
   15814          IRTemp oldC = IRTemp_INVALID;
   15815          IRTemp oldV = IRTemp_INVALID;
   15816          assign( argL, getIRegA(rM));
   15817          assign( argR, getIRegA(rS));
   15818          assign( argP, getIRegA(rN));
   15819          assign( res, binop(isMLS ? Iop_Sub32 : Iop_Add32,
   15820                             mkexpr(argP),
   15821                             binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) ));
   15822          if (bitS) {
   15823             vassert(!isMLS); // guaranteed above
   15824             oldC = newTemp(Ity_I32);
   15825             assign(oldC, mk_armg_calculate_flag_c());
   15826             oldV = newTemp(Ity_I32);
   15827             assign(oldV, mk_armg_calculate_flag_v());
   15828          }
   15829          // now update guest state
   15830          putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
   15831          if (bitS) {
   15832             IRTemp pair = newTemp(Ity_I32);
   15833             assign( pair, binop(Iop_Or32,
   15834                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
   15835                                 mkexpr(oldV)) );
   15836             setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
   15837          }
   15838          DIP("ml%c%c%s r%u, r%u, r%u, r%u\n",
   15839              isMLS ? 's' : 'a', bitS ? 's' : ' ',
   15840              nCC(INSN_COND), rD, rM, rS, rN);
   15841          goto decode_success;
   15842       }
   15843       /* fall through */
   15844    }
   15845 
   15846    // SMULL, UMULL
   15847    if (BITS8(0,0,0,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
   15848        && INSN(7,4) == BITS4(1,0,0,1)) {
   15849       UInt bitS = (insn >> 20) & 1; /* 20:20 */
   15850       UInt rDhi = INSN(19,16);
   15851       UInt rDlo = INSN(15,12);
   15852       UInt rS   = INSN(11,8);
   15853       UInt rM   = INSN(3,0);
   15854       UInt isS  = (INSN(27,20) >> 2) & 1; /* 22:22 */
   15855       if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo)  {
   15856          /* Unpredictable; don't decode; fall through */
   15857       } else {
   15858          IRTemp argL  = newTemp(Ity_I32);
   15859          IRTemp argR  = newTemp(Ity_I32);
   15860          IRTemp res   = newTemp(Ity_I64);
   15861          IRTemp resHi = newTemp(Ity_I32);
   15862          IRTemp resLo = newTemp(Ity_I32);
   15863          IRTemp oldC  = IRTemp_INVALID;
   15864          IRTemp oldV  = IRTemp_INVALID;
   15865          IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
   15866          assign( argL, getIRegA(rM));
   15867          assign( argR, getIRegA(rS));
   15868          assign( res, binop(mulOp, mkexpr(argL), mkexpr(argR)) );
   15869          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
   15870          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
   15871          if (bitS) {
   15872             oldC = newTemp(Ity_I32);
   15873             assign(oldC, mk_armg_calculate_flag_c());
   15874             oldV = newTemp(Ity_I32);
   15875             assign(oldV, mk_armg_calculate_flag_v());
   15876          }
   15877          // now update guest state
   15878          putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
   15879          putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
   15880          if (bitS) {
   15881             IRTemp pair = newTemp(Ity_I32);
   15882             assign( pair, binop(Iop_Or32,
   15883                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
   15884                                 mkexpr(oldV)) );
   15885             setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
   15886          }
   15887          DIP("%cmull%c%s r%u, r%u, r%u, r%u\n",
   15888              isS ? 's' : 'u', bitS ? 's' : ' ',
   15889              nCC(INSN_COND), rDlo, rDhi, rM, rS);
   15890          goto decode_success;
   15891       }
   15892       /* fall through */
   15893    }
   15894 
   15895    // SMLAL, UMLAL
   15896    if (BITS8(0,0,0,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
   15897        && INSN(7,4) == BITS4(1,0,0,1)) {
   15898       UInt bitS = (insn >> 20) & 1; /* 20:20 */
   15899       UInt rDhi = INSN(19,16);
   15900       UInt rDlo = INSN(15,12);
   15901       UInt rS   = INSN(11,8);
   15902       UInt rM   = INSN(3,0);
   15903       UInt isS  = (INSN(27,20) >> 2) & 1; /* 22:22 */
   15904       if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo)  {
   15905          /* Unpredictable; don't decode; fall through */
   15906       } else {
   15907          IRTemp argL  = newTemp(Ity_I32);
   15908          IRTemp argR  = newTemp(Ity_I32);
   15909          IRTemp old   = newTemp(Ity_I64);
   15910          IRTemp res   = newTemp(Ity_I64);
   15911          IRTemp resHi = newTemp(Ity_I32);
   15912          IRTemp resLo = newTemp(Ity_I32);
   15913          IRTemp oldC  = IRTemp_INVALID;
   15914          IRTemp oldV  = IRTemp_INVALID;
   15915          IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
   15916          assign( argL, getIRegA(rM));
   15917          assign( argR, getIRegA(rS));
   15918          assign( old, binop(Iop_32HLto64, getIRegA(rDhi), getIRegA(rDlo)) );
   15919          assign( res, binop(Iop_Add64,
   15920                             mkexpr(old),
   15921                             binop(mulOp, mkexpr(argL), mkexpr(argR))) );
   15922          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
   15923          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
   15924          if (bitS) {
   15925             oldC = newTemp(Ity_I32);
   15926             assign(oldC, mk_armg_calculate_flag_c());
   15927             oldV = newTemp(Ity_I32);
   15928             assign(oldV, mk_armg_calculate_flag_v());
   15929          }
   15930          // now update guest state
   15931          putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
   15932          putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
   15933          if (bitS) {
   15934             IRTemp pair = newTemp(Ity_I32);
   15935             assign( pair, binop(Iop_Or32,
   15936                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
   15937                                 mkexpr(oldV)) );
   15938             setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
   15939          }
   15940          DIP("%cmlal%c%s r%u, r%u, r%u, r%u\n",
   15941              isS ? 's' : 'u', bitS ? 's' : ' ', nCC(INSN_COND),
   15942              rDlo, rDhi, rM, rS);
   15943          goto decode_success;
   15944       }
   15945       /* fall through */
   15946    }
   15947 
   15948    // UMAAL
   15949    if (BITS8(0,0,0,0,0,1,0,0) == INSN(27,20) && INSN(7,4) == BITS4(1,0,0,1)) {
   15950       UInt rDhi = INSN(19,16);
   15951       UInt rDlo = INSN(15,12);
   15952       UInt rM   = INSN(11,8);
   15953       UInt rN   = INSN(3,0);
   15954       if (rDlo == 15 || rDhi == 15 || rN == 15 || rM == 15 || rDhi == rDlo)  {
   15955          /* Unpredictable; don't decode; fall through */
   15956       } else {
   15957          IRTemp argN   = newTemp(Ity_I32);
   15958          IRTemp argM   = newTemp(Ity_I32);
   15959          IRTemp argDhi = newTemp(Ity_I32);
   15960          IRTemp argDlo = newTemp(Ity_I32);
   15961          IRTemp res    = newTemp(Ity_I64);
   15962          IRTemp resHi  = newTemp(Ity_I32);
   15963          IRTemp resLo  = newTemp(Ity_I32);
   15964          assign( argN,   getIRegA(rN) );
   15965          assign( argM,   getIRegA(rM) );
   15966          assign( argDhi, getIRegA(rDhi) );
   15967          assign( argDlo, getIRegA(rDlo) );
   15968          assign( res,
   15969                  binop(Iop_Add64,
   15970                        binop(Iop_Add64,
   15971                              binop(Iop_MullU32, mkexpr(argN), mkexpr(argM)),
   15972                              unop(Iop_32Uto64, mkexpr(argDhi))),
   15973                        unop(Iop_32Uto64, mkexpr(argDlo))) );
   15974          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
   15975          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
   15976          // now update guest state
   15977          putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
   15978          putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
   15979          DIP("umaal %s r%u, r%u, r%u, r%u\n",
   15980              nCC(INSN_COND), rDlo, rDhi, rN, rM);
   15981          goto decode_success;
   15982       }
   15983       /* fall through */
   15984    }
   15985 
   15986    /* --------------------- Msr etc --------------------- */
   15987 
   15988    // MSR apsr, #imm
   15989    if (INSN(27,20) == BITS8(0,0,1,1,0,0,1,0)
   15990        && INSN(17,12) == BITS6(0,0,1,1,1,1)) {
   15991       UInt write_ge    = INSN(18,18);
   15992       UInt write_nzcvq = INSN(19,19);
   15993       if (write_nzcvq || write_ge) {
   15994          UInt   imm = (INSN(11,0) >> 0) & 0xFF;
   15995          UInt   rot = 2 * ((INSN(11,0) >> 8) & 0xF);
   15996          IRTemp immT = newTemp(Ity_I32);
   15997          vassert(rot <= 30);
   15998          imm = ROR32(imm, rot);
   15999          assign(immT, mkU32(imm));
   16000          desynthesise_APSR( write_nzcvq, write_ge, immT, condT );
   16001          DIP("msr%s cpsr%s%sf, #0x%08x\n", nCC(INSN_COND),
   16002              write_nzcvq ? "f" : "", write_ge ? "g" : "", imm);
   16003          goto decode_success;
   16004       }
   16005       /* fall through */
   16006    }
   16007 
   16008    // MSR apsr, reg
   16009    if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
   16010        && INSN(17,12) == BITS6(0,0,1,1,1,1)
   16011        && INSN(11,4) == BITS8(0,0,0,0,0,0,0,0)) {
   16012       UInt rN          = INSN(3,0);
   16013       UInt write_ge    = INSN(18,18);
   16014       UInt write_nzcvq = INSN(19,19);
   16015       if (rN != 15 && (write_nzcvq || write_ge)) {
   16016          IRTemp rNt = newTemp(Ity_I32);
   16017          assign(rNt, getIRegA(rN));
   16018          desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
   16019          DIP("msr%s cpsr_%s%s, r%u\n", nCC(INSN_COND),
   16020              write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
   16021          goto decode_success;
   16022       }
   16023       /* fall through */
   16024    }
   16025 
   16026    // MRS rD, cpsr
   16027    if ((insn & 0x0FFF0FFF) == 0x010F0000) {
   16028       UInt rD   = INSN(15,12);
   16029       if (rD != 15) {
   16030          IRTemp apsr = synthesise_APSR();
   16031          putIRegA( rD, mkexpr(apsr), condT, Ijk_Boring );
   16032          DIP("mrs%s r%u, cpsr\n", nCC(INSN_COND), rD);
   16033          goto decode_success;
   16034       }
   16035       /* fall through */
   16036    }
   16037 
   16038    /* --------------------- Svc --------------------- */
   16039    if (BITS8(1,1,1,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))) {
   16040       UInt imm24 = (insn >> 0) & 0xFFFFFF;
   16041       if (imm24 == 0) {
   16042          /* A syscall.  We can't do this conditionally, hence: */
   16043          if (condT != IRTemp_INVALID) {
   16044             mk_skip_over_A32_if_cond_is_false( condT );
   16045          }
   16046          // AL after here
   16047          llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 4 ));
   16048          dres.jk_StopHere = Ijk_Sys_syscall;
   16049          dres.whatNext    = Dis_StopHere;
   16050          DIP("svc%s #0x%08x\n", nCC(INSN_COND), imm24);
   16051          goto decode_success;
   16052       }
   16053       /* fall through */
   16054    }
   16055 
   16056    /* ------------------------ swp ------------------------ */
   16057 
   16058    // SWP, SWPB
   16059    if (BITS8(0,0,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   16060        && BITS4(0,0,0,0) == INSN(11,8)
   16061        && BITS4(1,0,0,1) == INSN(7,4)) {
   16062       UInt   rN   = INSN(19,16);
   16063       UInt   rD   = INSN(15,12);
   16064       UInt   rM   = INSN(3,0);
   16065       IRTemp tRn  = newTemp(Ity_I32);
   16066       IRTemp tNew = newTemp(Ity_I32);
   16067       IRTemp tOld = IRTemp_INVALID;
   16068       IRTemp tSC1 = newTemp(Ity_I1);
   16069       UInt   isB  = (insn >> 22) & 1;
   16070 
   16071       if (rD == 15 || rN == 15 || rM == 15 || rN == rM || rN == rD) {
   16072          /* undecodable; fall through */
   16073       } else {
   16074          /* make unconditional */
   16075          if (condT != IRTemp_INVALID) {
   16076             mk_skip_over_A32_if_cond_is_false( condT );
   16077             condT = IRTemp_INVALID;
   16078          }
   16079          /* Ok, now we're unconditional.  Generate a LL-SC loop. */
   16080          assign(tRn, getIRegA(rN));
   16081          assign(tNew, getIRegA(rM));
   16082          if (isB) {
   16083             /* swpb */
   16084             tOld = newTemp(Ity_I8);
   16085             stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
   16086                               NULL/*=>isLL*/) );
   16087             stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
   16088                               unop(Iop_32to8, mkexpr(tNew))) );
   16089          } else {
   16090             /* swp */
   16091             tOld = newTemp(Ity_I32);
   16092             stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
   16093                               NULL/*=>isLL*/) );
   16094             stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
   16095                               mkexpr(tNew)) );
   16096          }
   16097          stmt( IRStmt_Exit(unop(Iop_Not1, mkexpr(tSC1)),
   16098                            /*Ijk_NoRedir*/Ijk_Boring,
   16099                            IRConst_U32(guest_R15_curr_instr_notENC),
   16100                            OFFB_R15T ));
   16101          putIRegA(rD, isB ? unop(Iop_8Uto32, mkexpr(tOld)) : mkexpr(tOld),
   16102                       IRTemp_INVALID, Ijk_Boring);
   16103          DIP("swp%s%s r%u, r%u, [r%u]\n",
   16104              isB ? "b" : "", nCC(INSN_COND), rD, rM, rN);
   16105          goto decode_success;
   16106       }
   16107       /* fall through */
   16108    }
   16109 
   16110    /* ----------------------------------------------------------- */
   16111    /* -- ARMv6 instructions                                    -- */
   16112    /* ----------------------------------------------------------- */
   16113 
   16114    /* ------------------- {ldr,str}ex{,b,h,d} ------------------- */
   16115 
   16116    // LDREXD, LDREX, LDREXH, LDREXB
   16117    if (0x01900F9F == (insn & 0x0F900FFF)) {
   16118       UInt   rT    = INSN(15,12);
   16119       UInt   rN    = INSN(19,16);
   16120       IRType ty    = Ity_INVALID;
   16121       IROp   widen = Iop_INVALID;
   16122       const HChar* nm = NULL;
   16123       Bool   valid = True;
   16124       switch (INSN(22,21)) {
   16125          case 0: nm = "";  ty = Ity_I32; break;
   16126          case 1: nm = "d"; ty = Ity_I64; break;
   16127          case 2: nm = "b"; ty = Ity_I8;  widen = Iop_8Uto32; break;
   16128          case 3: nm = "h"; ty = Ity_I16; widen = Iop_16Uto32; break;
   16129          default: vassert(0);
   16130       }
   16131       if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
   16132          if (rT == 15 || rN == 15)
   16133             valid = False;
   16134       } else {
   16135          vassert(ty == Ity_I64);
   16136          if ((rT & 1) == 1 || rT == 14 || rN == 15)
   16137             valid = False;
   16138       }
   16139       if (valid) {
   16140          IRTemp res;
   16141          /* make unconditional */
   16142          if (condT != IRTemp_INVALID) {
   16143            mk_skip_over_A32_if_cond_is_false( condT );
   16144            condT = IRTemp_INVALID;
   16145          }
   16146          /* Ok, now we're unconditional.  Do the load. */
   16147          res = newTemp(ty);
   16148          // FIXME: assumes little-endian guest
   16149          stmt( IRStmt_LLSC(Iend_LE, res, getIRegA(rN),
   16150                            NULL/*this is a load*/) );
   16151          if (ty == Ity_I64) {
   16152             // FIXME: assumes little-endian guest
   16153             putIRegA(rT+0, unop(Iop_64to32, mkexpr(res)),
   16154                            IRTemp_INVALID, Ijk_Boring);
   16155             putIRegA(rT+1, unop(Iop_64HIto32, mkexpr(res)),
   16156                            IRTemp_INVALID, Ijk_Boring);
   16157             DIP("ldrex%s%s r%u, r%u, [r%u]\n",
   16158                 nm, nCC(INSN_COND), rT+0, rT+1, rN);
   16159          } else {
   16160             putIRegA(rT, widen == Iop_INVALID
   16161                             ? mkexpr(res) : unop(widen, mkexpr(res)),
   16162                      IRTemp_INVALID, Ijk_Boring);
   16163             DIP("ldrex%s%s r%u, [r%u]\n", nm, nCC(INSN_COND), rT, rN);
   16164          }
   16165          goto decode_success;
   16166       }
   16167       /* undecodable; fall through */
   16168    }
   16169 
   16170    // STREXD, STREX, STREXH, STREXB
   16171    if (0x01800F90 == (insn & 0x0F900FF0)) {
   16172       UInt   rT     = INSN(3,0);
   16173       UInt   rN     = INSN(19,16);
   16174       UInt   rD     = INSN(15,12);
   16175       IRType ty     = Ity_INVALID;
   16176       IROp   narrow = Iop_INVALID;
   16177       const HChar* nm = NULL;
   16178       Bool   valid  = True;
   16179       switch (INSN(22,21)) {
   16180          case 0: nm = "";  ty = Ity_I32; break;
   16181          case 1: nm = "d"; ty = Ity_I64; break;
   16182          case 2: nm = "b"; ty = Ity_I8;  narrow = Iop_32to8; break;
   16183          case 3: nm = "h"; ty = Ity_I16; narrow = Iop_32to16; break;
   16184          default: vassert(0);
   16185       }
   16186       if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
   16187          if (rD == 15 || rN == 15 || rT == 15
   16188              || rD == rN || rD == rT)
   16189             valid = False;
   16190       } else {
   16191          vassert(ty == Ity_I64);
   16192          if (rD == 15 || (rT & 1) == 1 || rT == 14 || rN == 15
   16193              || rD == rN || rD == rT || rD == rT+1)
   16194             valid = False;
   16195       }
   16196       if (valid) {
   16197          IRTemp resSC1, resSC32, data;
   16198          /* make unconditional */
   16199          if (condT != IRTemp_INVALID) {
   16200             mk_skip_over_A32_if_cond_is_false( condT );
   16201             condT = IRTemp_INVALID;
   16202          }
   16203          /* Ok, now we're unconditional.  Do the store. */
   16204          data = newTemp(ty);
   16205          assign(data,
   16206                 ty == Ity_I64
   16207                    // FIXME: assumes little-endian guest
   16208                    ? binop(Iop_32HLto64, getIRegA(rT+1), getIRegA(rT+0))
   16209                    : narrow == Iop_INVALID
   16210                       ? getIRegA(rT)
   16211                       : unop(narrow, getIRegA(rT)));
   16212          resSC1 = newTemp(Ity_I1);
   16213          // FIXME: assumes little-endian guest
   16214          stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegA(rN), mkexpr(data)) );
   16215 
   16216          /* Set rD to 1 on failure, 0 on success.  Currently we have
   16217             resSC1 == 0 on failure, 1 on success. */
   16218          resSC32 = newTemp(Ity_I32);
   16219          assign(resSC32,
   16220                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
   16221 
   16222          putIRegA(rD, mkexpr(resSC32),
   16223                       IRTemp_INVALID, Ijk_Boring);
   16224          if (ty == Ity_I64) {
   16225             DIP("strex%s%s r%u, r%u, r%u, [r%u]\n",
   16226                 nm, nCC(INSN_COND), rD, rT, rT+1, rN);
   16227          } else {
   16228             DIP("strex%s%s r%u, r%u, [r%u]\n",
   16229                 nm, nCC(INSN_COND), rD, rT, rN);
   16230          }
   16231          goto decode_success;
   16232       }
   16233       /* fall through */
   16234    }
   16235 
   16236    /* --------------------- movw, movt --------------------- */
   16237    if (0x03000000 == (insn & 0x0FF00000)
   16238        || 0x03400000 == (insn & 0x0FF00000)) /* pray for CSE */ {
   16239       UInt rD    = INSN(15,12);
   16240       UInt imm16 = (insn & 0xFFF) | ((insn >> 4) & 0x0000F000);
   16241       UInt isT   = (insn >> 22) & 1;
   16242       if (rD == 15) {
   16243          /* forget it */
   16244       } else {
   16245          if (isT) {
   16246             putIRegA(rD,
   16247                      binop(Iop_Or32,
   16248                            binop(Iop_And32, getIRegA(rD), mkU32(0xFFFF)),
   16249                            mkU32(imm16 << 16)),
   16250                      condT, Ijk_Boring);
   16251             DIP("movt%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
   16252             goto decode_success;
   16253          } else {
   16254             putIRegA(rD, mkU32(imm16), condT, Ijk_Boring);
   16255             DIP("movw%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
   16256             goto decode_success;
   16257          }
   16258       }
   16259       /* fall through */
   16260    }
   16261 
   16262    /* ----------- uxtb, sxtb, uxth, sxth, uxtb16, sxtb16 ----------- */
   16263    /* FIXME: this is an exact duplicate of the Thumb version.  They
   16264       should be commoned up. */
   16265    if (BITS8(0,1,1,0,1, 0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,0))
   16266        && BITS4(1,1,1,1) == INSN(19,16)
   16267        && BITS4(0,1,1,1) == INSN(7,4)
   16268        && BITS4(0,0, 0,0) == (INSN(11,8) & BITS4(0,0,1,1))) {
   16269       UInt subopc = INSN(27,20) & BITS8(0,0,0,0,0, 1,1,1);
   16270       if (subopc != BITS4(0,0,0,1) && subopc != BITS4(0,1,0,1)) {
   16271          Int    rot  = (INSN(11,8) >> 2) & 3;
   16272          UInt   rM   = INSN(3,0);
   16273          UInt   rD   = INSN(15,12);
   16274          IRTemp srcT = newTemp(Ity_I32);
   16275          IRTemp rotT = newTemp(Ity_I32);
   16276          IRTemp dstT = newTemp(Ity_I32);
   16277          const HChar* nm = "???";
   16278          assign(srcT, getIRegA(rM));
   16279          assign(rotT, genROR32(srcT, 8 * rot)); /* 0, 8, 16 or 24 only */
   16280          switch (subopc) {
   16281             case BITS4(0,1,1,0): // UXTB
   16282                assign(dstT, unop(Iop_8Uto32, unop(Iop_32to8, mkexpr(rotT))));
   16283                nm = "uxtb";
   16284                break;
   16285             case BITS4(0,0,1,0): // SXTB
   16286                assign(dstT, unop(Iop_8Sto32, unop(Iop_32to8, mkexpr(rotT))));
   16287                nm = "sxtb";
   16288                break;
   16289             case BITS4(0,1,1,1): // UXTH
   16290                assign(dstT, unop(Iop_16Uto32, unop(Iop_32to16, mkexpr(rotT))));
   16291                nm = "uxth";
   16292                break;
   16293             case BITS4(0,0,1,1): // SXTH
   16294                assign(dstT, unop(Iop_16Sto32, unop(Iop_32to16, mkexpr(rotT))));
   16295                nm = "sxth";
   16296                break;
   16297             case BITS4(0,1,0,0): // UXTB16
   16298                assign(dstT, binop(Iop_And32, mkexpr(rotT), mkU32(0x00FF00FF)));
   16299                nm = "uxtb16";
   16300                break;
   16301             case BITS4(0,0,0,0): { // SXTB16
   16302                IRTemp lo32 = newTemp(Ity_I32);
   16303                IRTemp hi32 = newTemp(Ity_I32);
   16304                assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
   16305                assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
   16306                assign(
   16307                   dstT,
   16308                   binop(Iop_Or32,
   16309                         binop(Iop_And32,
   16310                               unop(Iop_8Sto32,
   16311                                    unop(Iop_32to8, mkexpr(lo32))),
   16312                               mkU32(0xFFFF)),
   16313                         binop(Iop_Shl32,
   16314                               unop(Iop_8Sto32,
   16315                                    unop(Iop_32to8, mkexpr(hi32))),
   16316                               mkU8(16))
   16317                ));
   16318                nm = "sxtb16";
   16319                break;
   16320             }
   16321             default:
   16322                vassert(0); // guarded by "if" above
   16323          }
   16324          putIRegA(rD, mkexpr(dstT), condT, Ijk_Boring);
   16325          DIP("%s%s r%u, r%u, ROR #%d\n", nm, nCC(INSN_COND), rD, rM, rot);
   16326          goto decode_success;
   16327       }
   16328       /* fall through */
   16329    }
   16330 
   16331    /* ------------------- bfi, bfc ------------------- */
   16332    if (BITS8(0,1,1,1,1,1,0, 0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
   16333        && BITS4(0, 0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
   16334       UInt rD  = INSN(15,12);
   16335       UInt rN  = INSN(3,0);
   16336       UInt msb = (insn >> 16) & 0x1F; /* 20:16 */
   16337       UInt lsb = (insn >> 7) & 0x1F;  /* 11:7 */
   16338       if (rD == 15 || msb < lsb) {
   16339          /* undecodable; fall through */
   16340       } else {
   16341          IRTemp src    = newTemp(Ity_I32);
   16342          IRTemp olddst = newTemp(Ity_I32);
   16343          IRTemp newdst = newTemp(Ity_I32);
   16344          UInt   mask = 1 << (msb - lsb);
   16345          mask = (mask - 1) + mask;
   16346          vassert(mask != 0); // guaranteed by "msb < lsb" check above
   16347          mask <<= lsb;
   16348 
   16349          assign(src, rN == 15 ? mkU32(0) : getIRegA(rN));
   16350          assign(olddst, getIRegA(rD));
   16351          assign(newdst,
   16352                 binop(Iop_Or32,
   16353                    binop(Iop_And32,
   16354                          binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
   16355                          mkU32(mask)),
   16356                    binop(Iop_And32,
   16357                          mkexpr(olddst),
   16358                          mkU32(~mask)))
   16359                );
   16360 
   16361          putIRegA(rD, mkexpr(newdst), condT, Ijk_Boring);
   16362 
   16363          if (rN == 15) {
   16364             DIP("bfc%s r%u, #%u, #%u\n",
   16365                 nCC(INSN_COND), rD, lsb, msb-lsb+1);
   16366          } else {
   16367             DIP("bfi%s r%u, r%u, #%u, #%u\n",
   16368                 nCC(INSN_COND), rD, rN, lsb, msb-lsb+1);
   16369          }
   16370          goto decode_success;
   16371       }
   16372       /* fall through */
   16373    }
   16374 
   16375    /* ------------------- {u,s}bfx ------------------- */
   16376    if (BITS8(0,1,1,1,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
   16377        && BITS4(0,1,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
   16378       UInt rD  = INSN(15,12);
   16379       UInt rN  = INSN(3,0);
   16380       UInt wm1 = (insn >> 16) & 0x1F; /* 20:16 */
   16381       UInt lsb = (insn >> 7) & 0x1F;  /* 11:7 */
   16382       UInt msb = lsb + wm1;
   16383       UInt isU = (insn >> 22) & 1;    /* 22:22 */
   16384       if (rD == 15 || rN == 15 || msb >= 32) {
   16385          /* undecodable; fall through */
   16386       } else {
   16387          IRTemp src  = newTemp(Ity_I32);
   16388          IRTemp tmp  = newTemp(Ity_I32);
   16389          IRTemp res  = newTemp(Ity_I32);
   16390          UInt   mask = ((1 << wm1) - 1) + (1 << wm1);
   16391          vassert(msb >= 0 && msb <= 31);
   16392          vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
   16393 
   16394          assign(src, getIRegA(rN));
   16395          assign(tmp, binop(Iop_And32,
   16396                            binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
   16397                            mkU32(mask)));
   16398          assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
   16399                            binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
   16400                            mkU8(31-wm1)));
   16401 
   16402          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   16403 
   16404          DIP("%s%s r%u, r%u, #%u, #%u\n",
   16405              isU ? "ubfx" : "sbfx",
   16406              nCC(INSN_COND), rD, rN, lsb, wm1 + 1);
   16407          goto decode_success;
   16408       }
   16409       /* fall through */
   16410    }
   16411 
   16412    /* --------------------- Load/store doubleword ------------- */
   16413    // LDRD STRD
   16414    /*                 31   27   23   19 15 11   7    3     # highest bit
   16415                         28   24   20 16 12    8    4    0
   16416       A5-36   1 | 16  cond 0001 U100 Rn Rd im4h 11S1 im4l
   16417       A5-38   1 | 32  cond 0001 U000 Rn Rd 0000 11S1 Rm
   16418       A5-40   2 | 16  cond 0001 U110 Rn Rd im4h 11S1 im4l
   16419       A5-42   2 | 32  cond 0001 U010 Rn Rd 0000 11S1 Rm
   16420       A5-44   3 | 16  cond 0000 U100 Rn Rd im4h 11S1 im4l
   16421       A5-46   3 | 32  cond 0000 U000 Rn Rd 0000 11S1 Rm
   16422    */
   16423    /* case coding:
   16424              1   at-ea               (access at ea)
   16425              2   at-ea-then-upd      (access at ea, then Rn = ea)
   16426              3   at-Rn-then-upd      (access at Rn, then Rn = ea)
   16427       ea coding
   16428              16  Rn +/- imm8
   16429              32  Rn +/- Rm
   16430    */
   16431    /* Quickly skip over all of this for hopefully most instructions */
   16432    if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
   16433       goto after_load_store_doubleword;
   16434 
   16435    /* Check the "11S1" thing. */
   16436    if ((INSN(7,4) & BITS4(1,1,0,1)) != BITS4(1,1,0,1))
   16437       goto after_load_store_doubleword;
   16438 
   16439    summary = 0;
   16440 
   16441    /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,0,0)) {
   16442       summary = 1 | 16;
   16443    }
   16444    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,0,0)) {
   16445       summary = 1 | 32;
   16446    }
   16447    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,1,0)) {
   16448       summary = 2 | 16;
   16449    }
   16450    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,1,0)) {
   16451       summary = 2 | 32;
   16452    }
   16453    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(1,0,0)) {
   16454       summary = 3 | 16;
   16455    }
   16456    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(0,0,0)) {
   16457       summary = 3 | 32;
   16458    }
   16459    else goto after_load_store_doubleword;
   16460 
   16461    { UInt rN   = (insn >> 16) & 0xF; /* 19:16 */
   16462      UInt rD   = (insn >> 12) & 0xF; /* 15:12 */
   16463      UInt rM   = (insn >> 0)  & 0xF; /*  3:0  */
   16464      UInt bU   = (insn >> 23) & 1;   /* 23 U=1 offset+, U=0 offset- */
   16465      UInt bS   = (insn >> 5) & 1;    /* S=1 store, S=0 load */
   16466      UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
   16467 
   16468      /* Require rD to be an even numbered register */
   16469      if ((rD & 1) != 0)
   16470         goto after_load_store_doubleword;
   16471 
   16472      /* Require 11:8 == 0 for Rn +/- Rm cases */
   16473      if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
   16474         goto after_load_store_doubleword;
   16475 
   16476      /* Skip some invalid cases, which would lead to two competing
   16477         updates to the same register, or which are otherwise
   16478         disallowed by the spec. */
   16479      switch (summary) {
   16480         case 1 | 16:
   16481            break;
   16482         case 1 | 32:
   16483            if (rM == 15) goto after_load_store_doubleword;
   16484            break;
   16485         case 2 | 16: case 3 | 16:
   16486            if (rN == 15) goto after_load_store_doubleword;
   16487            if (bS == 0 && (rN == rD || rN == rD+1))
   16488               goto after_load_store_doubleword;
   16489            break;
   16490         case 2 | 32: case 3 | 32:
   16491            if (rM == 15) goto after_load_store_doubleword;
   16492            if (rN == 15) goto after_load_store_doubleword;
   16493            if (rN == rM) goto after_load_store_doubleword;
   16494            if (bS == 0 && (rN == rD || rN == rD+1))
   16495               goto after_load_store_doubleword;
   16496            break;
   16497         default:
   16498            vassert(0);
   16499      }
   16500 
   16501      /* If this is a branch, make it unconditional at this point.
   16502         Doing conditional branches in-line is too complex (for
   16503         now). */
   16504      vassert((rD & 1) == 0); /* from tests above */
   16505      if (bS == 0 && rD+1 == 15 && condT != IRTemp_INVALID) {
   16506         // go uncond
   16507         mk_skip_over_A32_if_cond_is_false( condT );
   16508         condT = IRTemp_INVALID;
   16509         // now uncond
   16510      }
   16511 
   16512      /* compute the effective address.  Bind it to a tmp since we
   16513         may need to use it twice. */
   16514      IRExpr* eaE = NULL;
   16515      switch (summary & 0xF0) {
   16516         case 16:
   16517            eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
   16518            break;
   16519         case 32:
   16520            eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
   16521            break;
   16522      }
   16523      vassert(eaE);
   16524      IRTemp eaT = newTemp(Ity_I32);
   16525      assign(eaT, eaE);
   16526 
   16527      /* get the old Rn value */
   16528      IRTemp rnT = newTemp(Ity_I32);
   16529      assign(rnT, getIRegA(rN));
   16530 
   16531      /* decide on the transfer address */
   16532      IRTemp taT = IRTemp_INVALID;
   16533      switch (summary & 0x0F) {
   16534         case 1: case 2: taT = eaT; break;
   16535         case 3:         taT = rnT; break;
   16536      }
   16537      vassert(taT != IRTemp_INVALID);
   16538 
   16539      /* XXX deal with alignment constraints */
   16540      /* XXX: but the A8 doesn't seem to trap for misaligned loads, so,
   16541         ignore alignment issues for the time being. */
   16542 
   16543      /* For almost all cases, we do the writeback after the transfers.
   16544         However, that leaves the stack "uncovered" in this case:
   16545            strd    rD, [sp, #-8]
   16546         In which case, do the writeback to SP now, instead of later.
   16547         This is bad in that it makes the insn non-restartable if the
   16548         accesses fault, but at least keeps Memcheck happy. */
   16549      Bool writeback_already_done = False;
   16550      if (bS == 1 /*store*/ && summary == (2 | 16)
   16551          && rN == 13 && rN != rD && rN != rD+1
   16552          && bU == 0/*minus*/) {
   16553         putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
   16554         writeback_already_done = True;
   16555      }
   16556 
   16557      /* doubleword store  S 1
   16558         doubleword load   S 0
   16559      */
   16560      const HChar* name = NULL;
   16561      /* generate the transfers */
   16562      if (bS == 1) { // doubleword store
   16563         storeGuardedLE( binop(Iop_Add32, mkexpr(taT), mkU32(0)),
   16564                         getIRegA(rD+0), condT );
   16565         storeGuardedLE( binop(Iop_Add32, mkexpr(taT), mkU32(4)),
   16566                         getIRegA(rD+1), condT );
   16567         name = "strd";
   16568      } else { // doubleword load
   16569         IRTemp oldRd0 = newTemp(Ity_I32);
   16570         IRTemp oldRd1 = newTemp(Ity_I32);
   16571         assign(oldRd0, llGetIReg(rD+0));
   16572         assign(oldRd1, llGetIReg(rD+1));
   16573         IRTemp newRd0 = newTemp(Ity_I32);
   16574         IRTemp newRd1 = newTemp(Ity_I32);
   16575         loadGuardedLE( newRd0, ILGop_Ident32,
   16576                        binop(Iop_Add32, mkexpr(taT), mkU32(0)),
   16577                        mkexpr(oldRd0), condT );
   16578         putIRegA( rD+0, mkexpr(newRd0), IRTemp_INVALID, Ijk_Boring );
   16579         loadGuardedLE( newRd1, ILGop_Ident32,
   16580                        binop(Iop_Add32, mkexpr(taT), mkU32(4)),
   16581                        mkexpr(oldRd1), condT );
   16582         putIRegA( rD+1, mkexpr(newRd1), IRTemp_INVALID, Ijk_Boring );
   16583         name = "ldrd";
   16584      }
   16585 
   16586      /* Update Rn if necessary. */
   16587      switch (summary & 0x0F) {
   16588         case 2: case 3:
   16589            // should be assured by logic above:
   16590            vassert(rN != 15); /* from checks above */
   16591            if (bS == 0) {
   16592               vassert(rD+0 != rN); /* since we just wrote rD+0 */
   16593               vassert(rD+1 != rN); /* since we just wrote rD+1 */
   16594            }
   16595            if (!writeback_already_done)
   16596               putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
   16597            break;
   16598      }
   16599 
   16600      switch (summary & 0x0F) {
   16601         case 1:  DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
   16602                  break;
   16603         case 2:  DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
   16604                      name, nCC(INSN_COND), rD, dis_buf);
   16605                  break;
   16606         case 3:  DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
   16607                      name, nCC(INSN_COND), rD, dis_buf);
   16608                  break;
   16609         default: vassert(0);
   16610      }
   16611 
   16612      goto decode_success;
   16613    }
   16614 
   16615   after_load_store_doubleword:
   16616 
   16617    /* ------------------- {s,u}xtab ------------- */
   16618    if (BITS8(0,1,1,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   16619        && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
   16620        && BITS4(0,1,1,1) == INSN(7,4)) {
   16621       UInt rN  = INSN(19,16);
   16622       UInt rD  = INSN(15,12);
   16623       UInt rM  = INSN(3,0);
   16624       UInt rot = (insn >> 10) & 3;
   16625       UInt isU = INSN(22,22);
   16626       if (rN == 15/*it's {S,U}XTB*/ || rD == 15 || rM == 15) {
   16627          /* undecodable; fall through */
   16628       } else {
   16629          IRTemp srcL = newTemp(Ity_I32);
   16630          IRTemp srcR = newTemp(Ity_I32);
   16631          IRTemp res  = newTemp(Ity_I32);
   16632          assign(srcR, getIRegA(rM));
   16633          assign(srcL, getIRegA(rN));
   16634          assign(res,  binop(Iop_Add32,
   16635                             mkexpr(srcL),
   16636                             unop(isU ? Iop_8Uto32 : Iop_8Sto32,
   16637                                  unop(Iop_32to8,
   16638                                       genROR32(srcR, 8 * rot)))));
   16639          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   16640          DIP("%cxtab%s r%u, r%u, r%u, ror #%u\n",
   16641              isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
   16642          goto decode_success;
   16643       }
   16644       /* fall through */
   16645    }
   16646 
   16647    /* ------------------- {s,u}xtah ------------- */
   16648    if (BITS8(0,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   16649        && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
   16650        && BITS4(0,1,1,1) == INSN(7,4)) {
   16651       UInt rN  = INSN(19,16);
   16652       UInt rD  = INSN(15,12);
   16653       UInt rM  = INSN(3,0);
   16654       UInt rot = (insn >> 10) & 3;
   16655       UInt isU = INSN(22,22);
   16656       if (rN == 15/*it's {S,U}XTH*/ || rD == 15 || rM == 15) {
   16657          /* undecodable; fall through */
   16658       } else {
   16659          IRTemp srcL = newTemp(Ity_I32);
   16660          IRTemp srcR = newTemp(Ity_I32);
   16661          IRTemp res  = newTemp(Ity_I32);
   16662          assign(srcR, getIRegA(rM));
   16663          assign(srcL, getIRegA(rN));
   16664          assign(res,  binop(Iop_Add32,
   16665                             mkexpr(srcL),
   16666                             unop(isU ? Iop_16Uto32 : Iop_16Sto32,
   16667                                  unop(Iop_32to16,
   16668                                       genROR32(srcR, 8 * rot)))));
   16669          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   16670 
   16671          DIP("%cxtah%s r%u, r%u, r%u, ror #%u\n",
   16672              isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
   16673          goto decode_success;
   16674       }
   16675       /* fall through */
   16676    }
   16677 
   16678    /* ------------------- rev16, rev ------------------ */
   16679    if (INSN(27,16) == 0x6BF
   16680        && (INSN(11,4) == 0xFB/*rev16*/ || INSN(11,4) == 0xF3/*rev*/)) {
   16681       Bool isREV = INSN(11,4) == 0xF3;
   16682       UInt rM    = INSN(3,0);
   16683       UInt rD    = INSN(15,12);
   16684       if (rM != 15 && rD != 15) {
   16685          IRTemp rMt = newTemp(Ity_I32);
   16686          assign(rMt, getIRegA(rM));
   16687          IRTemp res = isREV ? gen_REV(rMt) : gen_REV16(rMt);
   16688          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   16689          DIP("rev%s%s r%u, r%u\n", isREV ? "" : "16",
   16690              nCC(INSN_COND), rD, rM);
   16691          goto decode_success;
   16692       }
   16693    }
   16694 
   16695    /* ------------------- revsh ----------------------- */
   16696    if (INSN(27,16) == 0x6FF && INSN(11,4) == 0xFB) {
   16697       UInt rM = INSN(3,0);
   16698       UInt rD = INSN(15,12);
   16699       if (rM != 15 && rD != 15) {
   16700          IRTemp irt_rM  = newTemp(Ity_I32);
   16701          IRTemp irt_hi  = newTemp(Ity_I32);
   16702          IRTemp irt_low = newTemp(Ity_I32);
   16703          IRTemp irt_res = newTemp(Ity_I32);
   16704          assign(irt_rM, getIRegA(rM));
   16705          assign(irt_hi,
   16706                 binop(Iop_Sar32,
   16707                       binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
   16708                       mkU8(16)
   16709                 )
   16710          );
   16711          assign(irt_low,
   16712                 binop(Iop_And32,
   16713                       binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
   16714                       mkU32(0xFF)
   16715                 )
   16716          );
   16717          assign(irt_res,
   16718                 binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
   16719          );
   16720          putIRegA(rD, mkexpr(irt_res), condT, Ijk_Boring);
   16721          DIP("revsh%s r%u, r%u\n", nCC(INSN_COND), rD, rM);
   16722          goto decode_success;
   16723       }
   16724    }
   16725 
   16726    /* ------------------- rbit ------------------ */
   16727    if (INSN(27,16) == 0x6FF && INSN(11,4) == 0xF3) {
   16728       UInt rD = INSN(15,12);
   16729       UInt rM = INSN(3,0);
   16730       if (rD != 15 && rM != 15) {
   16731          IRTemp arg = newTemp(Ity_I32);
   16732          assign(arg, getIRegA(rM));
   16733          IRTemp res = gen_BITREV(arg);
   16734          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   16735          DIP("rbit r%u, r%u\n", rD, rM);
   16736          goto decode_success;
   16737       }
   16738    }
   16739 
   16740    /* ------------------- smmul ------------------ */
   16741    if (INSN(27,20) == BITS8(0,1,1,1,0,1,0,1)
   16742        && INSN(15,12) == BITS4(1,1,1,1)
   16743        && (INSN(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
   16744       UInt bitR = INSN(5,5);
   16745       UInt rD = INSN(19,16);
   16746       UInt rM = INSN(11,8);
   16747       UInt rN = INSN(3,0);
   16748       if (rD != 15 && rM != 15 && rN != 15) {
   16749          IRExpr* res
   16750          = unop(Iop_64HIto32,
   16751                 binop(Iop_Add64,
   16752                       binop(Iop_MullS32, getIRegA(rN), getIRegA(rM)),
   16753                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
   16754          putIRegA(rD, res, condT, Ijk_Boring);
   16755          DIP("smmul%s%s r%u, r%u, r%u\n",
   16756              nCC(INSN_COND), bitR ? "r" : "", rD, rN, rM);
   16757          goto decode_success;
   16758       }
   16759    }
   16760 
   16761    /* ------------------- smmla ------------------ */
   16762    if (INSN(27,20) == BITS8(0,1,1,1,0,1,0,1)
   16763        && INSN(15,12) != BITS4(1,1,1,1)
   16764        && (INSN(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
   16765       UInt bitR = INSN(5,5);
   16766       UInt rD = INSN(19,16);
   16767       UInt rA = INSN(15,12);
   16768       UInt rM = INSN(11,8);
   16769       UInt rN = INSN(3,0);
   16770       if (rD != 15 && rM != 15 && rN != 15) {
   16771          IRExpr* res
   16772          = unop(Iop_64HIto32,
   16773                 binop(Iop_Add64,
   16774                       binop(Iop_Add64,
   16775                             binop(Iop_32HLto64, getIRegA(rA), mkU32(0)),
   16776                             binop(Iop_MullS32, getIRegA(rN), getIRegA(rM))),
   16777                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
   16778          putIRegA(rD, res, condT, Ijk_Boring);
   16779          DIP("smmla%s%s r%u, r%u, r%u, r%u\n",
   16780              nCC(INSN_COND), bitR ? "r" : "", rD, rN, rM, rA);
   16781          goto decode_success;
   16782       }
   16783    }
   16784 
   16785    /* -------------- (A1) LDRT reg+/-#imm12 -------------- */
   16786    /* Load Register Unprivileged:
   16787       ldrt<c> Rt, [Rn] {, #+/-imm12}
   16788    */
   16789    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,0,1,1) ) {
   16790       UInt rT     = INSN(15,12);
   16791       UInt rN     = INSN(19,16);
   16792       UInt imm12  = INSN(11,0);
   16793       UInt bU     = INSN(23,23);
   16794       Bool valid  = True;
   16795       if (rT == 15 || rN == 15 || rN == rT) valid = False;
   16796       if (valid) {
   16797          IRTemp newRt = newTemp(Ity_I32);
   16798          loadGuardedLE( newRt,
   16799                         ILGop_Ident32, getIRegA(rN), getIRegA(rT), condT );
   16800          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   16801          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   16802                              getIRegA(rN), mkU32(imm12));
   16803          putIRegA(rN, erN, condT, Ijk_Boring);
   16804          DIP("ldrt%s r%u, [r%u], #%c%u\n",
   16805              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
   16806          goto decode_success;
   16807       }
   16808    }
   16809 
   16810    /* -------------- (A2) LDRT reg+/-reg with shift -------------- */
   16811    /* Load Register Unprivileged:
   16812       ldrt<c> Rt, [Rn], +/-Rm{, shift}
   16813    */
   16814    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,0,1,1)
   16815         && INSN(4,4) == 0 ) {
   16816       UInt rT     = INSN(15,12);
   16817       UInt rN     = INSN(19,16);
   16818       UInt rM     = INSN(3,0);
   16819       UInt imm5   = INSN(11,7);
   16820       UInt bU     = INSN(23,23);
   16821       UInt type   = INSN(6,5);
   16822       Bool valid  = True;
   16823       if (rT == 15 || rN == 15 || rN == rT || rM == 15
   16824           /* || (ArchVersion() < 6 && rM == rN) */)
   16825          valid = False;
   16826       if (valid) {
   16827          IRTemp newRt = newTemp(Ity_I32);
   16828          loadGuardedLE( newRt,
   16829                         ILGop_Ident32, getIRegA(rN), getIRegA(rT), condT );
   16830          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   16831          // dis_buf generated is slightly bogus, in fact.
   16832          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
   16833                                                        type, imm5, dis_buf);
   16834          putIRegA(rN, erN, condT, Ijk_Boring);
   16835          DIP("ldrt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
   16836          goto decode_success;
   16837       }
   16838    }
   16839 
   16840    /* -------------- (A1) LDRBT reg+/-#imm12 -------------- */
   16841    /* Load Register Byte Unprivileged:
   16842       ldrbt<c> Rt, [Rn], #+/-imm12
   16843    */
   16844    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,1,1,1) ) {
   16845       UInt rT     = INSN(15,12);
   16846       UInt rN     = INSN(19,16);
   16847       UInt imm12  = INSN(11,0);
   16848       UInt bU     = INSN(23,23);
   16849       Bool valid  = True;
   16850       if (rT == 15 || rN == 15 || rN == rT) valid = False;
   16851       if (valid) {
   16852          IRTemp newRt = newTemp(Ity_I32);
   16853          loadGuardedLE( newRt,
   16854                         ILGop_8Uto32, getIRegA(rN), getIRegA(rT), condT );
   16855          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   16856          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   16857                              getIRegA(rN), mkU32(imm12));
   16858          putIRegA(rN, erN, condT, Ijk_Boring);
   16859          DIP("ldrbt%s r%u, [r%u], #%c%u\n",
   16860              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
   16861          goto decode_success;
   16862       }
   16863    }
   16864 
   16865    /* -------------- (A2) LDRBT reg+/-reg with shift -------------- */
   16866    /* Load Register Byte Unprivileged:
   16867       ldrbt<c> Rt, [Rn], +/-Rm{, shift}
   16868    */
   16869    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,1,1,1)
   16870         && INSN(4,4) == 0 ) {
   16871       UInt rT     = INSN(15,12);
   16872       UInt rN     = INSN(19,16);
   16873       UInt rM     = INSN(3,0);
   16874       UInt imm5   = INSN(11,7);
   16875       UInt bU     = INSN(23,23);
   16876       UInt type   = INSN(6,5);
   16877       Bool valid  = True;
   16878       if (rT == 15 || rN == 15 || rN == rT || rM == 15
   16879           /* || (ArchVersion() < 6 && rM == rN) */)
   16880          valid = False;
   16881       if (valid) {
   16882          IRTemp newRt = newTemp(Ity_I32);
   16883          loadGuardedLE( newRt,
   16884                         ILGop_8Uto32, getIRegA(rN), getIRegA(rT), condT );
   16885          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   16886          // dis_buf generated is slightly bogus, in fact.
   16887          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
   16888                                                        type, imm5, dis_buf);
   16889          putIRegA(rN, erN, condT, Ijk_Boring);
   16890          DIP("ldrbt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
   16891          goto decode_success;
   16892       }
   16893    }
   16894 
   16895    /* -------------- (A1) LDRHT reg+#imm8 -------------- */
   16896    /* Load Register Halfword Unprivileged:
   16897       ldrht<c> Rt, [Rn] {, #+/-imm8}
   16898    */
   16899    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
   16900        && INSN(7,4) == BITS4(1,0,1,1) ) {
   16901       UInt rT    = INSN(15,12);
   16902       UInt rN    = INSN(19,16);
   16903       UInt bU    = INSN(23,23);
   16904       UInt imm4H = INSN(11,8);
   16905       UInt imm4L = INSN(3,0);
   16906       UInt imm8  = (imm4H << 4) | imm4L;
   16907       Bool valid = True;
   16908       if (rT == 15 || rN == 15 || rN == rT)
   16909          valid = False;
   16910       if (valid) {
   16911          IRTemp newRt = newTemp(Ity_I32);
   16912          loadGuardedLE( newRt,
   16913                         ILGop_16Uto32, getIRegA(rN), getIRegA(rT), condT );
   16914          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   16915          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   16916                              getIRegA(rN), mkU32(imm8));
   16917          putIRegA(rN, erN, condT, Ijk_Boring);
   16918          DIP("ldrht%s r%u, [r%u], #%c%u\n",
   16919              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
   16920          goto decode_success;
   16921       }
   16922    }
   16923 
   16924    /* -------------- (A2) LDRHT reg+/-reg -------------- */
   16925    /* Load Register Halfword Unprivileged:
   16926       ldrht<c> Rt, [Rn], +/-Rm
   16927    */
   16928    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
   16929        && INSN(11,4) == BITS8(0,0,0,0,1,0,1,1) ) {
   16930       UInt rT    = INSN(15,12);
   16931       UInt rN    = INSN(19,16);
   16932       UInt rM    = INSN(3,0);
   16933       UInt bU    = INSN(23,23);
   16934       Bool valid = True;
   16935       if (rT == 15 || rN == 15 || rN == rT || rM == 15)
   16936          valid = False;
   16937       if (valid) {
   16938          IRTemp newRt = newTemp(Ity_I32);
   16939          loadGuardedLE( newRt,
   16940                         ILGop_16Uto32, getIRegA(rN), getIRegA(rT), condT );
   16941          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   16942          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   16943                              getIRegA(rN), getIRegA(rM));
   16944          putIRegA(rN, erN, condT, Ijk_Boring);
   16945          DIP("ldrht%s r%u, [r%u], %cr%u\n",
   16946              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
   16947          goto decode_success;
   16948       }
   16949    }
   16950 
   16951    /* -------------- (A1) LDRSHT reg+#imm8 -------------- */
   16952    /* Load Register Signed Halfword Unprivileged:
   16953       ldrsht<c> Rt, [Rn] {, #+/-imm8}
   16954    */
   16955    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
   16956        && INSN(7,4) == BITS4(1,1,1,1)) {
   16957       UInt rT    = INSN(15,12);
   16958       UInt rN    = INSN(19,16);
   16959       UInt bU    = INSN(23,23);
   16960       UInt imm4H = INSN(11,8);
   16961       UInt imm4L = INSN(3,0);
   16962       UInt imm8  = (imm4H << 4) | imm4L;
   16963       Bool valid = True;
   16964       if (rN == 15 || rT == 15 || rN == rT)
   16965          valid = False;
   16966       if (valid) {
   16967          IRTemp newRt = newTemp(Ity_I32);
   16968          loadGuardedLE( newRt,
   16969                         ILGop_16Sto32, getIRegA(rN), getIRegA(rT), condT );
   16970          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   16971          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   16972                              getIRegA(rN), mkU32(imm8));
   16973          putIRegA(rN, erN, condT, Ijk_Boring);
   16974          DIP("ldrsht%s r%u, [r%u], #%c%u\n",
   16975              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
   16976          goto decode_success;
   16977       }
   16978    }
   16979 
   16980    /* -------------- (A2) LDRSHT reg+/-reg -------------- */
   16981    /* Load Register Signed Halfword Unprivileged:
   16982       ldrsht<c> Rt, [Rn], +/-Rm
   16983    */
   16984    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
   16985        && INSN(11,4) == BITS8(0,0,0,0,1,1,1,1)) {
   16986       UInt rT    = INSN(15,12);
   16987       UInt rN    = INSN(19,16);
   16988       UInt rM    = INSN(3,0);
   16989       UInt bU    = INSN(23,23);
   16990       Bool valid = True;
   16991       if (rN == 15 || rT == 15 || rN == rT || rM == 15)
   16992          valid = False;
   16993       if (valid) {
   16994          IRTemp newRt = newTemp(Ity_I32);
   16995          loadGuardedLE( newRt,
   16996                         ILGop_16Sto32, getIRegA(rN), getIRegA(rT), condT );
   16997          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   16998          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   16999                              getIRegA(rN), getIRegA(rM));
   17000          putIRegA(rN, erN, condT, Ijk_Boring);
   17001          DIP("ldrsht%s r%u, [r%u], %cr%u\n",
   17002              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
   17003          goto decode_success;
   17004       }
   17005    }
   17006 
   17007    /* -------------- (A1) LDRSBT reg+#imm8 -------------- */
   17008    /* Load Register Signed Byte Unprivileged:
   17009       ldrsbt<c> Rt, [Rn] {, #+/-imm8}
   17010    */
   17011    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
   17012        && INSN(7,4) == BITS4(1,1,0,1)) {
   17013       UInt rT    = INSN(15,12);
   17014       UInt rN    = INSN(19,16);
   17015       UInt bU    = INSN(23,23);
   17016       UInt imm4H = INSN(11,8);
   17017       UInt imm4L = INSN(3,0);
   17018       UInt imm8  = (imm4H << 4) | imm4L;
   17019       Bool valid = True;
   17020       if (rT == 15 || rN == 15 || rN == rT)
   17021          valid = False;
   17022       if (valid) {
   17023          IRTemp newRt = newTemp(Ity_I32);
   17024          loadGuardedLE( newRt,
   17025                         ILGop_8Sto32, getIRegA(rN), getIRegA(rT), condT );
   17026          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   17027          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   17028                              getIRegA(rN), mkU32(imm8));
   17029          putIRegA(rN, erN, condT, Ijk_Boring);
   17030          DIP("ldrsbt%s r%u, [r%u], #%c%u\n",
   17031              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
   17032          goto decode_success;
   17033       }
   17034    }
   17035 
   17036    /* -------------- (A2) LDRSBT reg+/-reg -------------- */
   17037    /* Load Register Signed Byte Unprivileged:
   17038       ldrsbt<c> Rt, [Rn], +/-Rm
   17039    */
   17040    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
   17041        && INSN(11,4) == BITS8(0,0,0,0,1,1,0,1)) {
   17042       UInt rT    = INSN(15,12);
   17043       UInt rN    = INSN(19,16);
   17044       UInt bU    = INSN(23,23);
   17045       UInt rM    = INSN(3,0);
   17046       Bool valid = True;
   17047       if (rT == 15 || rN == 15 || rN == rT || rM == 15)
   17048          valid = False;
   17049       if (valid) {
   17050          IRTemp newRt = newTemp(Ity_I32);
   17051          loadGuardedLE( newRt,
   17052                         ILGop_8Sto32, getIRegA(rN), getIRegA(rT), condT );
   17053          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   17054          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   17055                              getIRegA(rN), getIRegA(rM));
   17056          putIRegA(rN, erN, condT, Ijk_Boring);
   17057          DIP("ldrsbt%s r%u, [r%u], %cr%u\n",
   17058              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
   17059          goto decode_success;
   17060       }
   17061    }
   17062 
   17063    /* -------------- (A1) STRBT reg+#imm12 -------------- */
   17064    /* Store Register Byte Unprivileged:
   17065       strbt<c> Rt, [Rn], #+/-imm12
   17066    */
   17067    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,1,1,0) ) {
   17068       UInt rT     = INSN(15,12);
   17069       UInt rN     = INSN(19,16);
   17070       UInt imm12  = INSN(11,0);
   17071       UInt bU     = INSN(23,23);
   17072       Bool valid = True;
   17073       if (rT == 15 || rN == 15 || rN == rT) valid = False;
   17074       if (valid) {
   17075          IRExpr* address = getIRegA(rN);
   17076          IRExpr* data = unop(Iop_32to8, getIRegA(rT));
   17077          storeGuardedLE( address, data, condT);
   17078          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
   17079                                getIRegA(rN), mkU32(imm12));
   17080          putIRegA(rN, newRn, condT, Ijk_Boring);
   17081          DIP("strbt%s r%u, [r%u], #%c%u\n",
   17082              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
   17083          goto decode_success;
   17084       }
   17085    }
   17086 
   17087    /* -------------- (A2) STRBT reg+/-reg -------------- */
   17088    /* Store Register Byte Unprivileged:
   17089       strbt<c> Rt, [Rn], +/-Rm{, shift}
   17090    */
   17091    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,1,1,0)
   17092        && INSN(4,4) == 0) {
   17093       UInt rT     = INSN(15,12);
   17094       UInt rN     = INSN(19,16);
   17095       UInt imm5   = INSN(11,7);
   17096       UInt type   = INSN(6,5);
   17097       UInt rM     = INSN(3,0);
   17098       UInt bU     = INSN(23,23);
   17099       Bool valid  = True;
   17100       if (rT == 15 || rN == 15 || rN == rT || rM == 15) valid = False;
   17101       if (valid) {
   17102          IRExpr* address = getIRegA(rN);
   17103          IRExpr* data = unop(Iop_32to8, getIRegA(rT));
   17104          storeGuardedLE( address, data, condT);
   17105          // dis_buf generated is slightly bogus, in fact.
   17106          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
   17107                                                        type, imm5, dis_buf);
   17108          putIRegA(rN, erN, condT, Ijk_Boring);
   17109          DIP("strbt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
   17110          goto decode_success;
   17111       }
   17112    }
   17113 
   17114    /* -------------- (A1) STRHT reg+#imm8 -------------- */
   17115    /* Store Register Halfword Unprivileged:
   17116       strht<c> Rt, [Rn], #+/-imm8
   17117    */
   17118    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,0)
   17119        && INSN(7,4) == BITS4(1,0,1,1) ) {
   17120       UInt rT    = INSN(15,12);
   17121       UInt rN    = INSN(19,16);
   17122       UInt imm4H = INSN(11,8);
   17123       UInt imm4L = INSN(3,0);
   17124       UInt imm8  = (imm4H << 4) | imm4L;
   17125       UInt bU    = INSN(23,23);
   17126       Bool valid = True;
   17127       if (rT == 15 || rN == 15 || rN == rT) valid = False;
   17128       if (valid) {
   17129          IRExpr* address = getIRegA(rN);
   17130          IRExpr* data = unop(Iop_32to16, getIRegA(rT));
   17131          storeGuardedLE( address, data, condT);
   17132          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
   17133                                getIRegA(rN), mkU32(imm8));
   17134          putIRegA(rN, newRn, condT, Ijk_Boring);
   17135          DIP("strht%s r%u, [r%u], #%c%u\n",
   17136              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
   17137          goto decode_success;
   17138       }
   17139    }
   17140 
   17141    /* -------------- (A2) STRHT reg+reg -------------- */
   17142    /* Store Register Halfword Unprivileged:
   17143       strht<c> Rt, [Rn], +/-Rm
   17144    */
   17145    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,0)
   17146        && INSN(11,4) == BITS8(0,0,0,0,1,0,1,1) ) {
   17147       UInt rT    = INSN(15,12);
   17148       UInt rN    = INSN(19,16);
   17149       UInt rM    = INSN(3,0);
   17150       UInt bU    = INSN(23,23);
   17151       Bool valid = True;
   17152       if (rT == 15 || rN == 15 || rN == rT || rM == 15) valid = False;
   17153       if (valid) {
   17154          IRExpr* address = getIRegA(rN);
   17155          IRExpr* data = unop(Iop_32to16, getIRegA(rT));
   17156          storeGuardedLE( address, data, condT);
   17157          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
   17158                                getIRegA(rN), getIRegA(rM));
   17159          putIRegA(rN, newRn, condT, Ijk_Boring);
   17160          DIP("strht%s r%u, [r%u], %cr%u\n",
   17161              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
   17162          goto decode_success;
   17163       }
   17164    }
   17165 
   17166    /* -------------- (A1) STRT reg+imm12 -------------- */
   17167    /* Store Register Unprivileged:
   17168       strt<c> Rt, [Rn], #+/-imm12
   17169    */
   17170    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,0,1,0) ) {
   17171       UInt rT    = INSN(15,12);
   17172       UInt rN    = INSN(19,16);
   17173       UInt imm12 = INSN(11,0);
   17174       UInt bU    = INSN(23,23);
   17175       Bool valid = True;
   17176       if (rN == 15 || rN == rT) valid = False;
   17177       if (valid) {
   17178          IRExpr* address = getIRegA(rN);
   17179          storeGuardedLE( address, getIRegA(rT), condT);
   17180          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
   17181                                getIRegA(rN), mkU32(imm12));
   17182          putIRegA(rN, newRn, condT, Ijk_Boring);
   17183          DIP("strt%s r%u, [r%u], %c%u\n",
   17184              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
   17185          goto decode_success;
   17186       }
   17187    }
   17188 
   17189    /* -------------- (A2) STRT reg+reg -------------- */
   17190    /* Store Register Unprivileged:
   17191       strt<c> Rt, [Rn], +/-Rm{, shift}
   17192    */
   17193    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,0,1,0)
   17194        && INSN(4,4) == 0 ) {
   17195       UInt rT    = INSN(15,12);
   17196       UInt rN    = INSN(19,16);
   17197       UInt rM    = INSN(3,0);
   17198       UInt type  = INSN(6,5);
   17199       UInt imm5  = INSN(11,7);
   17200       UInt bU    = INSN(23,23);
   17201       Bool valid = True;
   17202       if (rN == 15 || rN == rT || rM == 15) valid = False;
   17203       /* FIXME We didn't do:
   17204          if ArchVersion() < 6 && rM == rN then UNPREDICTABLE */
   17205       if (valid) {
   17206          storeGuardedLE( getIRegA(rN), getIRegA(rT), condT);
   17207          // dis_buf generated is slightly bogus, in fact.
   17208          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
   17209                                                        type, imm5, dis_buf);
   17210          putIRegA(rN, erN, condT, Ijk_Boring);
   17211          DIP("strt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
   17212          goto decode_success;
   17213       }
   17214    }
   17215 
   17216    /* ----------------------------------------------------------- */
   17217    /* -- ARMv7 instructions                                    -- */
   17218    /* ----------------------------------------------------------- */
   17219 
   17220    /* -------------- read CP15 TPIDRURO register ------------- */
   17221    /* mrc     p15, 0, r0,  c13, c0, 3  up to
   17222       mrc     p15, 0, r14, c13, c0, 3
   17223    */
   17224    /* I don't know whether this is really v7-only.  But anyway, we
   17225       have to support it since arm-linux uses TPIDRURO as a thread
   17226       state register. */
   17227    if (0x0E1D0F70 == (insn & 0x0FFF0FFF)) {
   17228       UInt rD = INSN(15,12);
   17229       if (rD <= 14) {
   17230          /* skip r15, that's too stupid to handle */
   17231          putIRegA(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32),
   17232                       condT, Ijk_Boring);
   17233          DIP("mrc%s p15,0, r%u, c13, c0, 3\n", nCC(INSN_COND), rD);
   17234          goto decode_success;
   17235       }
   17236       /* fall through */
   17237    }
   17238 
   17239    /* -------------- read CP15 PMUSRENR register ------------- */
   17240    /* mrc     p15, 0, r0,  c9, c14, 0  up to
   17241       mrc     p15, 0, r14, c9, c14, 0
   17242    */
   17243    /* A program reading this register is really asking "which
   17244       performance monitoring registes are available in user space?
   17245       The simple answer here is to return zero, meaning "none".  See
   17246       #345984. */
   17247    if (0x0E190F1E == (insn & 0x0FFF0FFF)) {
   17248       UInt rD = INSN(15,12);
   17249       if (rD <= 14) {
   17250          /* skip r15, that's too stupid to handle */
   17251          putIRegA(rD, mkU32(0), condT, Ijk_Boring);
   17252          DIP("mrc%s p15,0, r%u, c9, c14, 0\n", nCC(INSN_COND), rD);
   17253          goto decode_success;
   17254       }
   17255       /* fall through */
   17256    }
   17257 
   17258    /* Handle various kinds of barriers.  This is rather indiscriminate
   17259       in the sense that they are all turned into an IR Fence, which
   17260       means we don't know which they are, so the back end has to
   17261       re-emit them all when it comes acrosss an IR Fence.
   17262    */
   17263    /* v6 */ /* mcr 15, 0, rT, c7, c10, 5 */
   17264    if (0xEE070FBA == (insn & 0xFFFF0FFF)) {
   17265       UInt rT = INSN(15,12);
   17266       if (rT <= 14) {
   17267          /* mcr 15, 0, rT, c7, c10, 5 (v6) equiv to DMB (v7).  Data
   17268             Memory Barrier -- ensures ordering of memory accesses. */
   17269          stmt( IRStmt_MBE(Imbe_Fence) );
   17270          DIP("mcr 15, 0, r%u, c7, c10, 5 (data memory barrier)\n", rT);
   17271          goto decode_success;
   17272       }
   17273       /* fall through */
   17274    }
   17275    /* other flavours of barrier */
   17276    switch (insn) {
   17277       case 0xEE070F9A: /* v6 */
   17278          /* mcr 15, 0, r0, c7, c10, 4 (v6) equiv to DSB (v7).  Data
   17279             Synch Barrier -- ensures completion of memory accesses. */
   17280          stmt( IRStmt_MBE(Imbe_Fence) );
   17281          DIP("mcr 15, 0, r0, c7, c10, 4 (data synch barrier)\n");
   17282          goto decode_success;
   17283       case 0xEE070F95: /* v6 */
   17284          /* mcr 15, 0, r0, c7, c5, 4 (v6) equiv to ISB (v7).
   17285             Instruction Synchronisation Barrier (or Flush Prefetch
   17286             Buffer) -- a pipe flush, I think.  I suspect we could
   17287             ignore those, but to be on the safe side emit a fence
   17288             anyway. */
   17289          stmt( IRStmt_MBE(Imbe_Fence) );
   17290          DIP("mcr 15, 0, r0, c7, c5, 4 (insn synch barrier)\n");
   17291          goto decode_success;
   17292       default:
   17293          break;
   17294    }
   17295 
   17296    /* ----------------------------------------------------------- */
   17297    /* -- Hints                                                 -- */
   17298    /* ----------------------------------------------------------- */
   17299 
   17300    switch (insn & 0x0FFFFFFF) {
   17301       /* ------------------- NOP ------------------ */
   17302       case 0x0320F000:
   17303          DIP("nop%s\n", nCC(INSN_COND));
   17304          goto decode_success;
   17305       /* ------------------- YIELD ------------------ */
   17306       case 0x0320F001:
   17307          /* Continue after conditionally yielding. */
   17308          DIP("yield%s\n", nCC(INSN_COND));
   17309          stmt( IRStmt_Exit( unop(Iop_32to1,
   17310                                  condT == IRTemp_INVALID
   17311                                     ? mkU32(1) : mkexpr(condT)),
   17312                             Ijk_Yield,
   17313                             IRConst_U32(guest_R15_curr_instr_notENC + 4),
   17314                             OFFB_R15T ));
   17315          goto decode_success;
   17316       default:
   17317          break;
   17318    }
   17319 
   17320    /* ----------------------------------------------------------- */
   17321    /* -- VFP (CP 10, CP 11) instructions (in ARM mode)         -- */
   17322    /* ----------------------------------------------------------- */
   17323 
   17324    if (INSN_COND != ARMCondNV) {
   17325       Bool ok_vfp = decode_CP10_CP11_instruction (
   17326                        &dres, INSN(27,0), condT, INSN_COND,
   17327                        False/*!isT*/
   17328                     );
   17329       if (ok_vfp)
   17330          goto decode_success;
   17331    }
   17332 
   17333    /* ----------------------------------------------------------- */
   17334    /* -- NEON instructions (in ARM mode)                       -- */
   17335    /* ----------------------------------------------------------- */
   17336 
   17337    /* These are all in NV space, and so are taken care of (far) above,
   17338       by a call from this function to decode_NV_instruction(). */
   17339 
   17340    /* ----------------------------------------------------------- */
   17341    /* -- v6 media instructions (in ARM mode)                   -- */
   17342    /* ----------------------------------------------------------- */
   17343 
   17344    { Bool ok_v6m = decode_V6MEDIA_instruction(
   17345                        &dres, INSN(27,0), condT, INSN_COND,
   17346                        False/*!isT*/
   17347                    );
   17348      if (ok_v6m)
   17349         goto decode_success;
   17350    }
   17351 
   17352    /* ----------------------------------------------------------- */
   17353    /* -- Undecodable                                           -- */
   17354    /* ----------------------------------------------------------- */
   17355 
   17356    goto decode_failure;
   17357    /*NOTREACHED*/
   17358 
   17359   decode_failure:
   17360    /* All decode failures end up here. */
   17361    if (sigill_diag) {
   17362       vex_printf("disInstr(arm): unhandled instruction: "
   17363                  "0x%x\n", insn);
   17364       vex_printf("                 cond=%d(0x%x) 27:20=%d(0x%02x) "
   17365                                    "4:4=%d "
   17366                                    "3:0=%d(0x%x)\n",
   17367                  (Int)INSN_COND, (UInt)INSN_COND,
   17368                  (Int)INSN(27,20), (UInt)INSN(27,20),
   17369                  (Int)INSN(4,4),
   17370                  (Int)INSN(3,0), (UInt)INSN(3,0) );
   17371    }
   17372 
   17373    /* Tell the dispatcher that this insn cannot be decoded, and so has
   17374       not been executed, and (is currently) the next to be executed.
   17375       R15 should be up-to-date since it made so at the start of each
   17376       insn, but nevertheless be paranoid and update it again right
   17377       now. */
   17378    vassert(0 == (guest_R15_curr_instr_notENC & 3));
   17379    llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC) );
   17380    dres.len         = 0;
   17381    dres.whatNext    = Dis_StopHere;
   17382    dres.jk_StopHere = Ijk_NoDecode;
   17383    dres.continueAt  = 0;
   17384    return dres;
   17385 
   17386   decode_success:
   17387    /* All decode successes end up here. */
   17388    DIP("\n");
   17389 
   17390    vassert(dres.len == 4 || dres.len == 20);
   17391 
   17392    /* Now then.  Do we have an implicit jump to r15 to deal with? */
   17393    if (r15written) {
   17394       /* If we get jump to deal with, we assume that there's been no
   17395          other competing branch stuff previously generated for this
   17396          insn.  That's reasonable, in the sense that the ARM insn set
   17397          appears to declare as "Unpredictable" any instruction which
   17398          generates more than one possible new value for r15.  Hence
   17399          just assert.  The decoders themselves should check against
   17400          all such instructions which are thusly Unpredictable, and
   17401          decline to decode them.  Hence we should never get here if we
   17402          have competing new values for r15, and hence it is safe to
   17403          assert here. */
   17404       vassert(dres.whatNext == Dis_Continue);
   17405       vassert(irsb->next == NULL);
   17406       vassert(irsb->jumpkind == Ijk_Boring);
   17407       /* If r15 is unconditionally written, terminate the block by
   17408          jumping to it.  If it's conditionally written, still
   17409          terminate the block (a shame, but we can't do side exits to
   17410          arbitrary destinations), but first jump to the next
   17411          instruction if the condition doesn't hold. */
   17412       /* We can't use getIReg(15) to get the destination, since that
   17413          will produce r15+8, which isn't what we want.  Must use
   17414          llGetIReg(15) instead. */
   17415       if (r15guard == IRTemp_INVALID) {
   17416          /* unconditional */
   17417       } else {
   17418          /* conditional */
   17419          stmt( IRStmt_Exit(
   17420                   unop(Iop_32to1,
   17421                        binop(Iop_Xor32,
   17422                              mkexpr(r15guard), mkU32(1))),
   17423                   r15kind,
   17424                   IRConst_U32(guest_R15_curr_instr_notENC + 4),
   17425                   OFFB_R15T
   17426          ));
   17427       }
   17428       /* This seems crazy, but we're required to finish the insn with
   17429          a write to the guest PC.  As usual we rely on ir_opt to tidy
   17430          up later. */
   17431       llPutIReg(15, llGetIReg(15));
   17432       dres.whatNext    = Dis_StopHere;
   17433       dres.jk_StopHere = r15kind;
   17434    } else {
   17435       /* Set up the end-state in the normal way. */
   17436       switch (dres.whatNext) {
   17437          case Dis_Continue:
   17438             llPutIReg(15, mkU32(dres.len + guest_R15_curr_instr_notENC));
   17439             break;
   17440          case Dis_ResteerU:
   17441          case Dis_ResteerC:
   17442             llPutIReg(15, mkU32(dres.continueAt));
   17443             break;
   17444          case Dis_StopHere:
   17445             break;
   17446          default:
   17447             vassert(0);
   17448       }
   17449    }
   17450 
   17451    return dres;
   17452 
   17453 #  undef INSN_COND
   17454 #  undef INSN
   17455 }
   17456 
   17457 
   17458 /*------------------------------------------------------------*/
   17459 /*--- Disassemble a single Thumb2 instruction              ---*/
   17460 /*------------------------------------------------------------*/
   17461 
   17462 static const UChar it_length_table[256]; /* fwds */
   17463 
   17464 /* NB: in Thumb mode we do fetches of regs with getIRegT, which
   17465    automagically adds 4 to fetches of r15.  However, writes to regs
   17466    are done with putIRegT, which disallows writes to r15.  Hence any
   17467    r15 writes and associated jumps have to be done "by hand". */
   17468 
   17469 /* Disassemble a single Thumb instruction into IR.  The instruction is
   17470    located in host memory at guest_instr, and has (decoded) guest IP
   17471    of guest_R15_curr_instr_notENC, which will have been set before the
   17472    call here. */
   17473 
   17474 static
   17475 DisResult disInstr_THUMB_WRK (
   17476              Bool         (*resteerOkFn) ( /*opaque*/void*, Addr ),
   17477              Bool         resteerCisOk,
   17478              void*        callback_opaque,
   17479              const UChar* guest_instr,
   17480              const VexArchInfo* archinfo,
   17481              const VexAbiInfo*  abiinfo,
   17482              Bool         sigill_diag
   17483           )
   17484 {
   17485    /* A macro to fish bits out of insn0.  There's also INSN1, to fish
   17486       bits out of insn1, but that's defined only after the end of the
   17487       16-bit insn decoder, so as to stop it mistakenly being used
   17488       therein. */
   17489 #  define INSN0(_bMax,_bMin)  SLICE_UInt(((UInt)insn0), (_bMax), (_bMin))
   17490 
   17491    DisResult dres;
   17492    UShort    insn0; /*  first 16 bits of the insn */
   17493    UShort    insn1; /* second 16 bits of the insn */
   17494    //Bool      allow_VFP = False;
   17495    //UInt      hwcaps = archinfo->hwcaps;
   17496    HChar     dis_buf[128];  // big enough to hold LDMIA etc text
   17497 
   17498    /* Summary result of the ITxxx backwards analysis: False == safe
   17499       but suboptimal. */
   17500    Bool guaranteedUnconditional = False;
   17501 
   17502    /* What insn variants are we supporting today? */
   17503    //allow_VFP  = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
   17504    // etc etc
   17505 
   17506    /* Set result defaults. */
   17507    dres.whatNext    = Dis_Continue;
   17508    dres.len         = 2;
   17509    dres.continueAt  = 0;
   17510    dres.jk_StopHere = Ijk_INVALID;
   17511 
   17512    /* Set default actions for post-insn handling of writes to r15, if
   17513       required. */
   17514    r15written = False;
   17515    r15guard   = IRTemp_INVALID; /* unconditional */
   17516    r15kind    = Ijk_Boring;
   17517 
   17518    /* Insns could be 2 or 4 bytes long.  Just get the first 16 bits at
   17519       this point.  If we need the second 16, get them later.  We can't
   17520       get them both out immediately because it risks a fault (very
   17521       unlikely, but ..) if the second 16 bits aren't actually
   17522       necessary. */
   17523    insn0 = getUShortLittleEndianly( guest_instr );
   17524    insn1 = 0; /* We'll get it later, once we know we need it. */
   17525 
   17526    /* Similarly, will set this later. */
   17527    IRTemp old_itstate = IRTemp_INVALID;
   17528 
   17529    if (0) vex_printf("insn: 0x%x\n", insn0);
   17530 
   17531    DIP("\t(thumb) 0x%x:  ", (UInt)guest_R15_curr_instr_notENC);
   17532 
   17533    vassert(0 == (guest_R15_curr_instr_notENC & 1));
   17534 
   17535    /* ----------------------------------------------------------- */
   17536    /* Spot "Special" instructions (see comment at top of file). */
   17537    {
   17538       const UChar* code = guest_instr;
   17539       /* Spot the 16-byte preamble:
   17540 
   17541          ea4f 0cfc  mov.w   ip, ip, ror #3
   17542          ea4f 3c7c  mov.w   ip, ip, ror #13
   17543          ea4f 7c7c  mov.w   ip, ip, ror #29
   17544          ea4f 4cfc  mov.w   ip, ip, ror #19
   17545       */
   17546       UInt word1 = 0x0CFCEA4F;
   17547       UInt word2 = 0x3C7CEA4F;
   17548       UInt word3 = 0x7C7CEA4F;
   17549       UInt word4 = 0x4CFCEA4F;
   17550       if (getUIntLittleEndianly(code+ 0) == word1 &&
   17551           getUIntLittleEndianly(code+ 4) == word2 &&
   17552           getUIntLittleEndianly(code+ 8) == word3 &&
   17553           getUIntLittleEndianly(code+12) == word4) {
   17554          /* Got a "Special" instruction preamble.  Which one is it? */
   17555          // 0x 0A 0A EA 4A
   17556          if (getUIntLittleEndianly(code+16) == 0x0A0AEA4A
   17557                                                /* orr.w r10,r10,r10 */) {
   17558             /* R3 = client_request ( R4 ) */
   17559             DIP("r3 = client_request ( %%r4 )\n");
   17560             llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
   17561             dres.jk_StopHere = Ijk_ClientReq;
   17562             dres.whatNext    = Dis_StopHere;
   17563             goto decode_success;
   17564          }
   17565          else
   17566          // 0x 0B 0B EA 4B
   17567          if (getUIntLittleEndianly(code+16) == 0x0B0BEA4B
   17568                                                /* orr r11,r11,r11 */) {
   17569             /* R3 = guest_NRADDR */
   17570             DIP("r3 = guest_NRADDR\n");
   17571             dres.len = 20;
   17572             llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
   17573             goto decode_success;
   17574          }
   17575          else
   17576          // 0x 0C 0C EA 4C
   17577          if (getUIntLittleEndianly(code+16) == 0x0C0CEA4C
   17578                                                /* orr r12,r12,r12 */) {
   17579             /*  branch-and-link-to-noredir R4 */
   17580             DIP("branch-and-link-to-noredir r4\n");
   17581             llPutIReg(14, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
   17582             llPutIReg(15, getIRegT(4));
   17583             dres.jk_StopHere = Ijk_NoRedir;
   17584             dres.whatNext    = Dis_StopHere;
   17585             goto decode_success;
   17586          }
   17587          else
   17588          // 0x 09 09 EA 49
   17589          if (getUIntLittleEndianly(code+16) == 0x0909EA49
   17590                                                /* orr r9,r9,r9 */) {
   17591             /* IR injection */
   17592             DIP("IR injection\n");
   17593             vex_inject_ir(irsb, Iend_LE);
   17594             // Invalidate the current insn. The reason is that the IRop we're
   17595             // injecting here can change. In which case the translation has to
   17596             // be redone. For ease of handling, we simply invalidate all the
   17597             // time.
   17598             stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_R15_curr_instr_notENC)));
   17599             stmt(IRStmt_Put(OFFB_CMLEN,   mkU32(20)));
   17600             llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
   17601             dres.whatNext    = Dis_StopHere;
   17602             dres.jk_StopHere = Ijk_InvalICache;
   17603             goto decode_success;
   17604          }
   17605          /* We don't know what it is.  Set insn0 so decode_failure
   17606             can print the insn following the Special-insn preamble. */
   17607          insn0 = getUShortLittleEndianly(code+16);
   17608          goto decode_failure;
   17609          /*NOTREACHED*/
   17610       }
   17611 
   17612    }
   17613 
   17614    /* ----------------------------------------------------------- */
   17615 
   17616    /* Main Thumb instruction decoder starts here.  It's a series of
   17617       switches which examine ever longer bit sequences at the MSB of
   17618       the instruction word, first for 16-bit insns, then for 32-bit
   17619       insns. */
   17620 
   17621    /* --- BEGIN ITxxx optimisation analysis --- */
   17622    /* This is a crucial optimisation for the ITState boilerplate that
   17623       follows.  Examine the 9 halfwords preceding this instruction,
   17624       and if we are absolutely sure that none of them constitute an
   17625       'it' instruction, then we can be sure that this instruction is
   17626       not under the control of any 'it' instruction, and so
   17627       guest_ITSTATE must be zero.  So write zero into ITSTATE right
   17628       now, so that iropt can fold out almost all of the resulting
   17629       junk.
   17630 
   17631       If we aren't sure, we can always safely skip this step.  So be a
   17632       bit conservative about it: only poke around in the same page as
   17633       this instruction, lest we get a fault from the previous page
   17634       that would not otherwise have happened.  The saving grace is
   17635       that such skipping is pretty rare -- it only happens,
   17636       statistically, 18/4096ths of the time, so is judged unlikely to
   17637       be a performance problems.
   17638 
   17639       FIXME: do better.  Take into account the number of insns covered
   17640       by any IT insns we find, to rule out cases where an IT clearly
   17641       cannot cover this instruction.  This would improve behaviour for
   17642       branch targets immediately following an IT-guarded group that is
   17643       not of full length.  Eg, (and completely ignoring issues of 16-
   17644       vs 32-bit insn length):
   17645 
   17646              ite cond
   17647              insn1
   17648              insn2
   17649       label: insn3
   17650              insn4
   17651 
   17652       The 'it' only conditionalises insn1 and insn2.  However, the
   17653       current analysis is conservative and considers insn3 and insn4
   17654       also possibly guarded.  Hence if 'label:' is the start of a hot
   17655       loop we will get a big performance hit.
   17656    */
   17657    {
   17658       /* Summary result of this analysis: False == safe but
   17659          suboptimal. */
   17660       vassert(guaranteedUnconditional == False);
   17661 
   17662       UInt pc = guest_R15_curr_instr_notENC;
   17663       vassert(0 == (pc & 1));
   17664 
   17665       UInt pageoff = pc & 0xFFF;
   17666       if (pageoff >= 18) {
   17667          /* It's safe to poke about in the 9 halfwords preceding this
   17668             insn.  So, have a look at them. */
   17669          guaranteedUnconditional = True; /* assume no 'it' insn found,
   17670                                             till we do */
   17671          UShort* hwp = (UShort*)(HWord)pc;
   17672          Int i;
   17673          for (i = -1; i >= -9; i--) {
   17674             /* We're in the same page.  (True, but commented out due
   17675                to expense.) */
   17676             /*
   17677             vassert( ( ((UInt)(&hwp[i])) & 0xFFFFF000 )
   17678                       == ( pc & 0xFFFFF000 ) );
   17679             */
   17680             /* All valid IT instructions must have the form 0xBFxy,
   17681                where x can be anything, but y must be nonzero.  Find
   17682                the number of insns covered by it (1 .. 4) and check to
   17683                see if it can possibly reach up to the instruction in
   17684                question.  Some (x,y) combinations mean UNPREDICTABLE,
   17685                and the table is constructed to be conservative by
   17686                returning 4 for those cases, so the analysis is safe
   17687                even if the code uses unpredictable IT instructions (in
   17688                which case its authors are nuts, but hey.)  */
   17689             UShort hwp_i = hwp[i];
   17690             if (UNLIKELY((hwp_i & 0xFF00) == 0xBF00 && (hwp_i & 0xF) != 0)) {
   17691                /* might be an 'it' insn. */
   17692                /* # guarded insns */
   17693                Int n_guarded = (Int)it_length_table[hwp_i & 0xFF];
   17694                vassert(n_guarded >= 1 && n_guarded <= 4);
   17695                if (n_guarded * 2 /* # guarded HWs, worst case */
   17696                    > (-(i+1)))   /* -(i+1): # remaining HWs after the IT */
   17697                    /* -(i+0) also seems to work, even though I think
   17698                       it's wrong.  I don't understand that. */
   17699                   guaranteedUnconditional = False;
   17700                break;
   17701             }
   17702          }
   17703       }
   17704    }
   17705    /* --- END ITxxx optimisation analysis --- */
   17706 
   17707    /* Generate the guarding condition for this insn, by examining
   17708       ITSTATE.  Assign it to condT.  Also, generate new
   17709       values for ITSTATE ready for stuffing back into the
   17710       guest state, but don't actually do the Put yet, since it will
   17711       need to stuffed back in only after the instruction gets to a
   17712       point where it is sure to complete.  Mostly we let the code at
   17713       decode_success handle this, but in cases where the insn contains
   17714       a side exit, we have to update them before the exit. */
   17715 
   17716    /* If the ITxxx optimisation analysis above could not prove that
   17717       this instruction is guaranteed unconditional, we insert a
   17718       lengthy IR preamble to compute the guarding condition at
   17719       runtime.  If it can prove it (which obviously we hope is the
   17720       normal case) then we insert a minimal preamble, which is
   17721       equivalent to setting guest_ITSTATE to zero and then folding
   17722       that through the full preamble (which completely disappears). */
   17723 
   17724    IRTemp condT              = IRTemp_INVALID;
   17725    IRTemp cond_AND_notInIT_T = IRTemp_INVALID;
   17726 
   17727    IRTemp new_itstate        = IRTemp_INVALID;
   17728    vassert(old_itstate == IRTemp_INVALID);
   17729 
   17730    if (guaranteedUnconditional) {
   17731       /* BEGIN "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
   17732 
   17733       // ITSTATE = 0 :: I32
   17734       IRTemp z32 = newTemp(Ity_I32);
   17735       assign(z32, mkU32(0));
   17736       put_ITSTATE(z32);
   17737 
   17738       // old_itstate = 0 :: I32
   17739       //
   17740       // old_itstate = get_ITSTATE();
   17741       old_itstate = z32; /* 0 :: I32 */
   17742 
   17743       // new_itstate = old_itstate >> 8
   17744       //             = 0 >> 8
   17745       //             = 0 :: I32
   17746       //
   17747       // new_itstate = newTemp(Ity_I32);
   17748       // assign(new_itstate,
   17749       //        binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
   17750       new_itstate = z32;
   17751 
   17752       // ITSTATE = 0 :: I32(again)
   17753       //
   17754       // put_ITSTATE(new_itstate);
   17755 
   17756       // condT1 = calc_cond_dyn( xor(and(old_istate,0xF0), 0xE0) )
   17757       //        = calc_cond_dyn( xor(0,0xE0) )
   17758       //        = calc_cond_dyn ( 0xE0 )
   17759       //        = 1 :: I32
   17760       // Not that this matters, since the computed value is not used:
   17761       // see condT folding below
   17762       //
   17763       // IRTemp condT1 = newTemp(Ity_I32);
   17764       // assign(condT1,
   17765       //        mk_armg_calculate_condition_dyn(
   17766       //           binop(Iop_Xor32,
   17767       //                 binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
   17768       //                 mkU32(0xE0))
   17769       //       )
   17770       // );
   17771 
   17772       // condT = 32to8(and32(old_itstate,0xF0)) == 0  ? 1  : condT1
   17773       //       = 32to8(and32(0,0xF0)) == 0  ? 1  : condT1
   17774       //       = 32to8(0) == 0  ? 1  : condT1
   17775       //       = 0 == 0  ? 1  : condT1
   17776       //       = 1
   17777       //
   17778       // condT = newTemp(Ity_I32);
   17779       // assign(condT, IRExpr_ITE(
   17780       //                  unop(Iop_32to8, binop(Iop_And32,
   17781       //                                        mkexpr(old_itstate),
   17782       //                                        mkU32(0xF0))),
   17783       //                  mkexpr(condT1),
   17784       //                  mkU32(1))
   17785       //       ));
   17786       condT = newTemp(Ity_I32);
   17787       assign(condT, mkU32(1));
   17788 
   17789       // notInITt = xor32(and32(old_itstate, 1), 1)
   17790       //          = xor32(and32(0, 1), 1)
   17791       //          = xor32(0, 1)
   17792       //          = 1 :: I32
   17793       //
   17794       // IRTemp notInITt = newTemp(Ity_I32);
   17795       // assign(notInITt,
   17796       //        binop(Iop_Xor32,
   17797       //              binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
   17798       //              mkU32(1)));
   17799 
   17800       // cond_AND_notInIT_T = and32(notInITt, condT)
   17801       //                    = and32(1, 1)
   17802       //                    = 1
   17803       //
   17804       // cond_AND_notInIT_T = newTemp(Ity_I32);
   17805       // assign(cond_AND_notInIT_T,
   17806       //        binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
   17807       cond_AND_notInIT_T = condT; /* 1 :: I32 */
   17808 
   17809       /* END "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
   17810    } else {
   17811       /* BEGIN { STANDARD PREAMBLE; } */
   17812 
   17813       old_itstate = get_ITSTATE();
   17814 
   17815       new_itstate = newTemp(Ity_I32);
   17816       assign(new_itstate,
   17817              binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
   17818 
   17819       put_ITSTATE(new_itstate);
   17820 
   17821       /* Same strategy as for ARM insns: generate a condition
   17822          temporary at this point (or IRTemp_INVALID, meaning
   17823          unconditional).  We leave it to lower-level instruction
   17824          decoders to decide whether they can generate straight-line
   17825          code, or whether they must generate a side exit before the
   17826          instruction.  condT :: Ity_I32 and is always either zero or
   17827          one. */
   17828       IRTemp condT1 = newTemp(Ity_I32);
   17829       assign(condT1,
   17830              mk_armg_calculate_condition_dyn(
   17831                 binop(Iop_Xor32,
   17832                       binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
   17833                       mkU32(0xE0))
   17834             )
   17835       );
   17836 
   17837       /* This is a bit complex, but needed to make Memcheck understand
   17838          that, if the condition in old_itstate[7:4] denotes AL (that
   17839          is, if this instruction is to be executed unconditionally),
   17840          then condT does not depend on the results of calling the
   17841          helper.
   17842 
   17843          We test explicitly for old_itstate[7:4] == AL ^ 0xE, and in
   17844          that case set condT directly to 1.  Else we use the results
   17845          of the helper.  Since old_itstate is always defined and
   17846          because Memcheck does lazy V-bit propagation through ITE,
   17847          this will cause condT to always be a defined 1 if the
   17848          condition is 'AL'.  From an execution semantics point of view
   17849          this is irrelevant since we're merely duplicating part of the
   17850          behaviour of the helper.  But it makes it clear to Memcheck,
   17851          in this case, that condT does not in fact depend on the
   17852          contents of the condition code thunk.  Without it, we get
   17853          quite a lot of false errors.
   17854 
   17855          So, just to clarify: from a straight semantics point of view,
   17856          we can simply do "assign(condT, mkexpr(condT1))", and the
   17857          simulator still runs fine.  It's just that we get loads of
   17858          false errors from Memcheck. */
   17859       condT = newTemp(Ity_I32);
   17860       assign(condT, IRExpr_ITE(
   17861                        binop(Iop_CmpNE32, binop(Iop_And32,
   17862                                                 mkexpr(old_itstate),
   17863                                                 mkU32(0xF0)),
   17864                                           mkU32(0)),
   17865                        mkexpr(condT1),
   17866                        mkU32(1)
   17867             ));
   17868 
   17869       /* Something we don't have in ARM: generate a 0 or 1 value
   17870          indicating whether or not we are in an IT block (NB: 0 = in
   17871          IT block, 1 = not in IT block).  This is used to gate
   17872          condition code updates in 16-bit Thumb instructions. */
   17873       IRTemp notInITt = newTemp(Ity_I32);
   17874       assign(notInITt,
   17875              binop(Iop_Xor32,
   17876                    binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
   17877                    mkU32(1)));
   17878 
   17879       /* Compute 'condT && notInITt' -- that is, the instruction is
   17880          going to execute, and we're not in an IT block.  This is the
   17881          gating condition for updating condition codes in 16-bit Thumb
   17882          instructions, except for CMP, CMN and TST. */
   17883       cond_AND_notInIT_T = newTemp(Ity_I32);
   17884       assign(cond_AND_notInIT_T,
   17885              binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
   17886       /* END { STANDARD PREAMBLE; } */
   17887    }
   17888 
   17889 
   17890    /* At this point:
   17891       * ITSTATE has been updated
   17892       * condT holds the guarding condition for this instruction (0 or 1),
   17893       * notInITt is 1 if we're in "normal" code, 0 if in an IT block
   17894       * cond_AND_notInIT_T is the AND of the above two.
   17895 
   17896       If the instruction proper can't trap, then there's nothing else
   17897       to do w.r.t. ITSTATE -- just go and and generate IR for the
   17898       insn, taking into account the guarding condition.
   17899 
   17900       If, however, the instruction might trap, then we must back up
   17901       ITSTATE to the old value, and re-update it after the potentially
   17902       trapping IR section.  A trap can happen either via a memory
   17903       reference or because we need to throw SIGILL.
   17904 
   17905       If an instruction has a side exit, we need to be sure that any
   17906       ITSTATE backup is re-updated before the side exit.
   17907    */
   17908 
   17909    /* ----------------------------------------------------------- */
   17910    /* --                                                       -- */
   17911    /* -- Thumb 16-bit integer instructions                     -- */
   17912    /* --                                                       -- */
   17913    /* -- IMPORTANT: references to insn1 or INSN1 are           -- */
   17914    /* --            not allowed in this section                -- */
   17915    /* --                                                       -- */
   17916    /* ----------------------------------------------------------- */
   17917 
   17918    /* 16-bit instructions inside an IT block, apart from CMP, CMN and
   17919       TST, do not set the condition codes.  Hence we must dynamically
   17920       test for this case for every condition code update. */
   17921 
   17922    IROp   anOp   = Iop_INVALID;
   17923    const HChar* anOpNm = NULL;
   17924 
   17925    /* ================ 16-bit 15:6 cases ================ */
   17926 
   17927    switch (INSN0(15,6)) {
   17928 
   17929    case 0x10a:   // CMP
   17930    case 0x10b: { // CMN
   17931       /* ---------------- CMP Rn, Rm ---------------- */
   17932       Bool   isCMN = INSN0(15,6) == 0x10b;
   17933       UInt   rN    = INSN0(2,0);
   17934       UInt   rM    = INSN0(5,3);
   17935       IRTemp argL  = newTemp(Ity_I32);
   17936       IRTemp argR  = newTemp(Ity_I32);
   17937       assign( argL, getIRegT(rN) );
   17938       assign( argR, getIRegT(rM) );
   17939       /* Update flags regardless of whether in an IT block or not. */
   17940       setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
   17941                       argL, argR, condT );
   17942       DIP("%s r%u, r%u\n", isCMN ? "cmn" : "cmp", rN, rM);
   17943       goto decode_success;
   17944    }
   17945 
   17946    case 0x108: {
   17947       /* ---------------- TST Rn, Rm ---------------- */
   17948       UInt   rN   = INSN0(2,0);
   17949       UInt   rM   = INSN0(5,3);
   17950       IRTemp oldC = newTemp(Ity_I32);
   17951       IRTemp oldV = newTemp(Ity_I32);
   17952       IRTemp res  = newTemp(Ity_I32);
   17953       assign( oldC, mk_armg_calculate_flag_c() );
   17954       assign( oldV, mk_armg_calculate_flag_v() );
   17955       assign( res,  binop(Iop_And32, getIRegT(rN), getIRegT(rM)) );
   17956       /* Update flags regardless of whether in an IT block or not. */
   17957       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
   17958       DIP("tst r%u, r%u\n", rN, rM);
   17959       goto decode_success;
   17960    }
   17961 
   17962    case 0x109: {
   17963       /* ---------------- NEGS Rd, Rm ---------------- */
   17964       /* Rd = -Rm */
   17965       UInt   rM   = INSN0(5,3);
   17966       UInt   rD   = INSN0(2,0);
   17967       IRTemp arg  = newTemp(Ity_I32);
   17968       IRTemp zero = newTemp(Ity_I32);
   17969       assign(arg, getIRegT(rM));
   17970       assign(zero, mkU32(0));
   17971       // rD can never be r15
   17972       putIRegT(rD, binop(Iop_Sub32, mkexpr(zero), mkexpr(arg)), condT);
   17973       setFlags_D1_D2( ARMG_CC_OP_SUB, zero, arg, cond_AND_notInIT_T);
   17974       DIP("negs r%u, r%u\n", rD, rM);
   17975       goto decode_success;
   17976    }
   17977 
   17978    case 0x10F: {
   17979       /* ---------------- MVNS Rd, Rm ---------------- */
   17980       /* Rd = ~Rm */
   17981       UInt   rM   = INSN0(5,3);
   17982       UInt   rD   = INSN0(2,0);
   17983       IRTemp oldV = newTemp(Ity_I32);
   17984       IRTemp oldC = newTemp(Ity_I32);
   17985       IRTemp res  = newTemp(Ity_I32);
   17986       assign( oldV, mk_armg_calculate_flag_v() );
   17987       assign( oldC, mk_armg_calculate_flag_c() );
   17988       assign(res, unop(Iop_Not32, getIRegT(rM)));
   17989       // rD can never be r15
   17990       putIRegT(rD, mkexpr(res), condT);
   17991       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   17992                          cond_AND_notInIT_T );
   17993       DIP("mvns r%u, r%u\n", rD, rM);
   17994       goto decode_success;
   17995    }
   17996 
   17997    case 0x10C:
   17998       /* ---------------- ORRS Rd, Rm ---------------- */
   17999       anOp = Iop_Or32; anOpNm = "orr"; goto and_orr_eor_mul;
   18000    case 0x100:
   18001       /* ---------------- ANDS Rd, Rm ---------------- */
   18002       anOp = Iop_And32; anOpNm = "and"; goto and_orr_eor_mul;
   18003    case 0x101:
   18004       /* ---------------- EORS Rd, Rm ---------------- */
   18005       anOp = Iop_Xor32; anOpNm = "eor"; goto and_orr_eor_mul;
   18006    case 0x10d:
   18007       /* ---------------- MULS Rd, Rm ---------------- */
   18008       anOp = Iop_Mul32; anOpNm = "mul"; goto and_orr_eor_mul;
   18009    and_orr_eor_mul: {
   18010       /* Rd = Rd `op` Rm */
   18011       UInt   rM   = INSN0(5,3);
   18012       UInt   rD   = INSN0(2,0);
   18013       IRTemp res  = newTemp(Ity_I32);
   18014       IRTemp oldV = newTemp(Ity_I32);
   18015       IRTemp oldC = newTemp(Ity_I32);
   18016       assign( oldV, mk_armg_calculate_flag_v() );
   18017       assign( oldC, mk_armg_calculate_flag_c() );
   18018       assign( res, binop(anOp, getIRegT(rD), getIRegT(rM) ));
   18019       // not safe to read guest state after here
   18020       // rD can never be r15
   18021       putIRegT(rD, mkexpr(res), condT);
   18022       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   18023                          cond_AND_notInIT_T );
   18024       DIP("%s r%u, r%u\n", anOpNm, rD, rM);
   18025       goto decode_success;
   18026    }
   18027 
   18028    case 0x10E: {
   18029       /* ---------------- BICS Rd, Rm ---------------- */
   18030       /* Rd = Rd & ~Rm */
   18031       UInt   rM   = INSN0(5,3);
   18032       UInt   rD   = INSN0(2,0);
   18033       IRTemp res  = newTemp(Ity_I32);
   18034       IRTemp oldV = newTemp(Ity_I32);
   18035       IRTemp oldC = newTemp(Ity_I32);
   18036       assign( oldV, mk_armg_calculate_flag_v() );
   18037       assign( oldC, mk_armg_calculate_flag_c() );
   18038       assign( res, binop(Iop_And32, getIRegT(rD),
   18039                                     unop(Iop_Not32, getIRegT(rM) )));
   18040       // not safe to read guest state after here
   18041       // rD can never be r15
   18042       putIRegT(rD, mkexpr(res), condT);
   18043       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   18044                          cond_AND_notInIT_T );
   18045       DIP("bics r%u, r%u\n", rD, rM);
   18046       goto decode_success;
   18047    }
   18048 
   18049    case 0x105: {
   18050       /* ---------------- ADCS Rd, Rm ---------------- */
   18051       /* Rd = Rd + Rm + oldC */
   18052       UInt   rM   = INSN0(5,3);
   18053       UInt   rD   = INSN0(2,0);
   18054       IRTemp argL = newTemp(Ity_I32);
   18055       IRTemp argR = newTemp(Ity_I32);
   18056       IRTemp oldC = newTemp(Ity_I32);
   18057       IRTemp res  = newTemp(Ity_I32);
   18058       assign(argL, getIRegT(rD));
   18059       assign(argR, getIRegT(rM));
   18060       assign(oldC, mk_armg_calculate_flag_c());
   18061       assign(res, binop(Iop_Add32,
   18062                         binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
   18063                         mkexpr(oldC)));
   18064       // rD can never be r15
   18065       putIRegT(rD, mkexpr(res), condT);
   18066       setFlags_D1_D2_ND( ARMG_CC_OP_ADC, argL, argR, oldC,
   18067                          cond_AND_notInIT_T );
   18068       DIP("adcs r%u, r%u\n", rD, rM);
   18069       goto decode_success;
   18070    }
   18071 
   18072    case 0x106: {
   18073       /* ---------------- SBCS Rd, Rm ---------------- */
   18074       /* Rd = Rd - Rm - (oldC ^ 1) */
   18075       UInt   rM   = INSN0(5,3);
   18076       UInt   rD   = INSN0(2,0);
   18077       IRTemp argL = newTemp(Ity_I32);
   18078       IRTemp argR = newTemp(Ity_I32);
   18079       IRTemp oldC = newTemp(Ity_I32);
   18080       IRTemp res  = newTemp(Ity_I32);
   18081       assign(argL, getIRegT(rD));
   18082       assign(argR, getIRegT(rM));
   18083       assign(oldC, mk_armg_calculate_flag_c());
   18084       assign(res, binop(Iop_Sub32,
   18085                         binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
   18086                         binop(Iop_Xor32, mkexpr(oldC), mkU32(1))));
   18087       // rD can never be r15
   18088       putIRegT(rD, mkexpr(res), condT);
   18089       setFlags_D1_D2_ND( ARMG_CC_OP_SBB, argL, argR, oldC,
   18090                          cond_AND_notInIT_T );
   18091       DIP("sbcs r%u, r%u\n", rD, rM);
   18092       goto decode_success;
   18093    }
   18094 
   18095    case 0x2CB: {
   18096       /* ---------------- UXTB Rd, Rm ---------------- */
   18097       /* Rd = 8Uto32(Rm) */
   18098       UInt rM = INSN0(5,3);
   18099       UInt rD = INSN0(2,0);
   18100       putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFF)),
   18101                    condT);
   18102       DIP("uxtb r%u, r%u\n", rD, rM);
   18103       goto decode_success;
   18104    }
   18105 
   18106    case 0x2C9: {
   18107       /* ---------------- SXTB Rd, Rm ---------------- */
   18108       /* Rd = 8Sto32(Rm) */
   18109       UInt rM = INSN0(5,3);
   18110       UInt rD = INSN0(2,0);
   18111       putIRegT(rD, binop(Iop_Sar32,
   18112                          binop(Iop_Shl32, getIRegT(rM), mkU8(24)),
   18113                          mkU8(24)),
   18114                    condT);
   18115       DIP("sxtb r%u, r%u\n", rD, rM);
   18116       goto decode_success;
   18117    }
   18118 
   18119    case 0x2CA: {
   18120       /* ---------------- UXTH Rd, Rm ---------------- */
   18121       /* Rd = 16Uto32(Rm) */
   18122       UInt rM = INSN0(5,3);
   18123       UInt rD = INSN0(2,0);
   18124       putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFFFF)),
   18125                    condT);
   18126       DIP("uxth r%u, r%u\n", rD, rM);
   18127       goto decode_success;
   18128    }
   18129 
   18130    case 0x2C8: {
   18131       /* ---------------- SXTH Rd, Rm ---------------- */
   18132       /* Rd = 16Sto32(Rm) */
   18133       UInt rM = INSN0(5,3);
   18134       UInt rD = INSN0(2,0);
   18135       putIRegT(rD, binop(Iop_Sar32,
   18136                          binop(Iop_Shl32, getIRegT(rM), mkU8(16)),
   18137                          mkU8(16)),
   18138                    condT);
   18139       DIP("sxth r%u, r%u\n", rD, rM);
   18140       goto decode_success;
   18141    }
   18142 
   18143    case 0x102:   // LSLS
   18144    case 0x103:   // LSRS
   18145    case 0x104:   // ASRS
   18146    case 0x107: { // RORS
   18147       /* ---------------- LSLS Rs, Rd ---------------- */
   18148       /* ---------------- LSRS Rs, Rd ---------------- */
   18149       /* ---------------- ASRS Rs, Rd ---------------- */
   18150       /* ---------------- RORS Rs, Rd ---------------- */
   18151       /* Rd = Rd `op` Rs, and set flags */
   18152       UInt   rS   = INSN0(5,3);
   18153       UInt   rD   = INSN0(2,0);
   18154       IRTemp oldV = newTemp(Ity_I32);
   18155       IRTemp rDt  = newTemp(Ity_I32);
   18156       IRTemp rSt  = newTemp(Ity_I32);
   18157       IRTemp res  = newTemp(Ity_I32);
   18158       IRTemp resC = newTemp(Ity_I32);
   18159       const HChar* wot  = "???";
   18160       assign(rSt, getIRegT(rS));
   18161       assign(rDt, getIRegT(rD));
   18162       assign(oldV, mk_armg_calculate_flag_v());
   18163       /* Does not appear to be the standard 'how' encoding. */
   18164       switch (INSN0(15,6)) {
   18165          case 0x102:
   18166             compute_result_and_C_after_LSL_by_reg(
   18167                dis_buf, &res, &resC, rDt, rSt, rD, rS
   18168             );
   18169             wot = "lsl";
   18170             break;
   18171          case 0x103:
   18172             compute_result_and_C_after_LSR_by_reg(
   18173                dis_buf, &res, &resC, rDt, rSt, rD, rS
   18174             );
   18175             wot = "lsr";
   18176             break;
   18177          case 0x104:
   18178             compute_result_and_C_after_ASR_by_reg(
   18179                dis_buf, &res, &resC, rDt, rSt, rD, rS
   18180             );
   18181             wot = "asr";
   18182             break;
   18183          case 0x107:
   18184             compute_result_and_C_after_ROR_by_reg(
   18185                dis_buf, &res, &resC, rDt, rSt, rD, rS
   18186             );
   18187             wot = "ror";
   18188             break;
   18189          default:
   18190             /*NOTREACHED*/vassert(0);
   18191       }
   18192       // not safe to read guest state after this point
   18193       putIRegT(rD, mkexpr(res), condT);
   18194       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
   18195                          cond_AND_notInIT_T );
   18196       DIP("%ss r%u, r%u\n", wot, rS, rD);
   18197       goto decode_success;
   18198    }
   18199 
   18200    case 0x2E8:   // REV
   18201    case 0x2E9: { // REV16
   18202       /* ---------------- REV   Rd, Rm ---------------- */
   18203       /* ---------------- REV16 Rd, Rm ---------------- */
   18204       UInt rM = INSN0(5,3);
   18205       UInt rD = INSN0(2,0);
   18206       Bool isREV = INSN0(15,6) == 0x2E8;
   18207       IRTemp arg = newTemp(Ity_I32);
   18208       assign(arg, getIRegT(rM));
   18209       IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
   18210       putIRegT(rD, mkexpr(res), condT);
   18211       DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM);
   18212       goto decode_success;
   18213    }
   18214 
   18215    case 0x2EB: { // REVSH
   18216       /* ---------------- REVSH Rd, Rn ---------------- */
   18217       UInt rM = INSN0(5,3);
   18218       UInt rD = INSN0(2,0);
   18219       IRTemp irt_rM  = newTemp(Ity_I32);
   18220       IRTemp irt_hi  = newTemp(Ity_I32);
   18221       IRTemp irt_low = newTemp(Ity_I32);
   18222       IRTemp irt_res = newTemp(Ity_I32);
   18223       assign(irt_rM, getIRegT(rM));
   18224       assign(irt_hi,
   18225              binop(Iop_Sar32,
   18226                    binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
   18227                    mkU8(16)
   18228              )
   18229       );
   18230       assign(irt_low,
   18231              binop(Iop_And32,
   18232                    binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
   18233                    mkU32(0xFF)
   18234              )
   18235       );
   18236       assign(irt_res,
   18237              binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
   18238       );
   18239       putIRegT(rD, mkexpr(irt_res), condT);
   18240       DIP("revsh r%u, r%u\n", rD, rM);
   18241       goto decode_success;
   18242    }
   18243 
   18244    default:
   18245       break; /* examine the next shortest prefix */
   18246 
   18247    }
   18248 
   18249 
   18250    /* ================ 16-bit 15:7 cases ================ */
   18251 
   18252    switch (INSN0(15,7)) {
   18253 
   18254    case BITS9(1,0,1,1,0,0,0,0,0): {
   18255       /* ------------ ADD SP, #imm7 * 4 ------------ */
   18256       UInt uimm7 = INSN0(6,0);
   18257       putIRegT(13, binop(Iop_Add32, getIRegT(13), mkU32(uimm7 * 4)),
   18258                    condT);
   18259       DIP("add sp, #%u\n", uimm7 * 4);
   18260       goto decode_success;
   18261    }
   18262 
   18263    case BITS9(1,0,1,1,0,0,0,0,1): {
   18264       /* ------------ SUB SP, #imm7 * 4 ------------ */
   18265       UInt uimm7 = INSN0(6,0);
   18266       putIRegT(13, binop(Iop_Sub32, getIRegT(13), mkU32(uimm7 * 4)),
   18267                    condT);
   18268       DIP("sub sp, #%u\n", uimm7 * 4);
   18269       goto decode_success;
   18270    }
   18271 
   18272    case BITS9(0,1,0,0,0,1,1,1,0): {
   18273       /* ---------------- BX rM ---------------- */
   18274       /* Branch to reg, and optionally switch modes.  Reg contains a
   18275          suitably encoded address therefore (w CPSR.T at the bottom).
   18276          Have to special-case r15, as usual. */
   18277       UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
   18278       if (BITS3(0,0,0) == INSN0(2,0)) {
   18279          IRTemp dst = newTemp(Ity_I32);
   18280          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   18281          mk_skip_over_T16_if_cond_is_false(condT);
   18282          condT = IRTemp_INVALID;
   18283          // now uncond
   18284          if (rM <= 14) {
   18285             assign( dst, getIRegT(rM) );
   18286          } else {
   18287             vassert(rM == 15);
   18288             assign( dst, mkU32(guest_R15_curr_instr_notENC + 4) );
   18289          }
   18290          llPutIReg(15, mkexpr(dst));
   18291          dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
   18292          dres.whatNext    = Dis_StopHere;
   18293          DIP("bx r%u (possibly switch to ARM mode)\n", rM);
   18294          goto decode_success;
   18295       }
   18296       break;
   18297    }
   18298 
   18299    /* ---------------- BLX rM ---------------- */
   18300    /* Branch and link to interworking address in rM. */
   18301    case BITS9(0,1,0,0,0,1,1,1,1): {
   18302       if (BITS3(0,0,0) == INSN0(2,0)) {
   18303          UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
   18304          IRTemp dst = newTemp(Ity_I32);
   18305          if (rM <= 14) {
   18306             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   18307             mk_skip_over_T16_if_cond_is_false(condT);
   18308             condT = IRTemp_INVALID;
   18309             // now uncond
   18310             /* We're returning to Thumb code, hence "| 1" */
   18311             assign( dst, getIRegT(rM) );
   18312             putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ),
   18313                           IRTemp_INVALID );
   18314             llPutIReg(15, mkexpr(dst));
   18315             dres.jk_StopHere = Ijk_Call;
   18316             dres.whatNext    = Dis_StopHere;
   18317             DIP("blx r%u (possibly switch to ARM mode)\n", rM);
   18318             goto decode_success;
   18319          }
   18320          /* else unpredictable, fall through */
   18321       }
   18322       break;
   18323    }
   18324 
   18325    default:
   18326       break; /* examine the next shortest prefix */
   18327 
   18328    }
   18329 
   18330 
   18331    /* ================ 16-bit 15:8 cases ================ */
   18332 
   18333    switch (INSN0(15,8)) {
   18334 
   18335    case BITS8(1,1,0,1,1,1,1,1): {
   18336       /* ---------------- SVC ---------------- */
   18337       UInt imm8 = INSN0(7,0);
   18338       if (imm8 == 0) {
   18339          /* A syscall.  We can't do this conditionally, hence: */
   18340          mk_skip_over_T16_if_cond_is_false( condT );
   18341          // FIXME: what if we have to back up and restart this insn?
   18342          // then ITSTATE will be wrong (we'll have it as "used")
   18343          // when it isn't.  Correct is to save ITSTATE in a
   18344          // stash pseudo-reg, and back up from that if we have to
   18345          // restart.
   18346          // uncond after here
   18347          llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ));
   18348          dres.jk_StopHere = Ijk_Sys_syscall;
   18349          dres.whatNext    = Dis_StopHere;
   18350          DIP("svc #0x%08x\n", imm8);
   18351          goto decode_success;
   18352       }
   18353       /* else fall through */
   18354       break;
   18355    }
   18356 
   18357    case BITS8(0,1,0,0,0,1,0,0): {
   18358       /* ---------------- ADD(HI) Rd, Rm ---------------- */
   18359       UInt h1 = INSN0(7,7);
   18360       UInt h2 = INSN0(6,6);
   18361       UInt rM = (h2 << 3) | INSN0(5,3);
   18362       UInt rD = (h1 << 3) | INSN0(2,0);
   18363       //if (h1 == 0 && h2 == 0) { // Original T1 was more restrictive
   18364       if (rD == 15 && rM == 15) {
   18365          // then it's invalid
   18366       } else {
   18367          IRTemp res = newTemp(Ity_I32);
   18368          assign( res, binop(Iop_Add32, getIRegT(rD), getIRegT(rM) ));
   18369          if (rD != 15) {
   18370             putIRegT( rD, mkexpr(res), condT );
   18371          } else {
   18372             /* Only allowed outside or last-in IT block; SIGILL if not so. */
   18373             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   18374             /* jump over insn if not selected */
   18375             mk_skip_over_T16_if_cond_is_false(condT);
   18376             condT = IRTemp_INVALID;
   18377             // now uncond
   18378             /* non-interworking branch */
   18379             llPutIReg(15, binop(Iop_Or32, mkexpr(res), mkU32(1)));
   18380             dres.jk_StopHere = Ijk_Boring;
   18381             dres.whatNext    = Dis_StopHere;
   18382          }
   18383          DIP("add(hi) r%u, r%u\n", rD, rM);
   18384          goto decode_success;
   18385       }
   18386       break;
   18387    }
   18388 
   18389    case BITS8(0,1,0,0,0,1,0,1): {
   18390       /* ---------------- CMP(HI) Rd, Rm ---------------- */
   18391       UInt h1 = INSN0(7,7);
   18392       UInt h2 = INSN0(6,6);
   18393       UInt rM = (h2 << 3) | INSN0(5,3);
   18394       UInt rN = (h1 << 3) | INSN0(2,0);
   18395       if (h1 != 0 || h2 != 0) {
   18396          IRTemp argL  = newTemp(Ity_I32);
   18397          IRTemp argR  = newTemp(Ity_I32);
   18398          assign( argL, getIRegT(rN) );
   18399          assign( argR, getIRegT(rM) );
   18400          /* Update flags regardless of whether in an IT block or not. */
   18401          setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
   18402          DIP("cmphi r%u, r%u\n", rN, rM);
   18403          goto decode_success;
   18404       }
   18405       break;
   18406    }
   18407 
   18408    case BITS8(0,1,0,0,0,1,1,0): {
   18409       /* ---------------- MOV(HI) Rd, Rm ---------------- */
   18410       UInt h1 = INSN0(7,7);
   18411       UInt h2 = INSN0(6,6);
   18412       UInt rM = (h2 << 3) | INSN0(5,3);
   18413       UInt rD = (h1 << 3) | INSN0(2,0);
   18414       /* The old ARM ARM seems to disallow the case where both Rd and
   18415          Rm are "low" registers, but newer versions allow it. */
   18416       if (1 /*h1 != 0 || h2 != 0*/) {
   18417          IRTemp val = newTemp(Ity_I32);
   18418          assign( val, getIRegT(rM) );
   18419          if (rD != 15) {
   18420             putIRegT( rD, mkexpr(val), condT );
   18421          } else {
   18422             /* Only allowed outside or last-in IT block; SIGILL if not so. */
   18423             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   18424             /* jump over insn if not selected */
   18425             mk_skip_over_T16_if_cond_is_false(condT);
   18426             condT = IRTemp_INVALID;
   18427             // now uncond
   18428             /* non-interworking branch */
   18429             llPutIReg(15, binop(Iop_Or32, mkexpr(val), mkU32(1)));
   18430             dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
   18431             dres.whatNext    = Dis_StopHere;
   18432          }
   18433          DIP("mov r%u, r%u\n", rD, rM);
   18434          goto decode_success;
   18435       }
   18436       break;
   18437    }
   18438 
   18439    case BITS8(1,0,1,1,1,1,1,1): {
   18440       /* ---------------- IT (if-then) ---------------- */
   18441       UInt firstcond = INSN0(7,4);
   18442       UInt mask = INSN0(3,0);
   18443       UInt newITSTATE = 0;
   18444       /* This is the ITSTATE represented as described in
   18445          libvex_guest_arm.h.  It is not the ARM ARM representation. */
   18446       HChar c1 = '.';
   18447       HChar c2 = '.';
   18448       HChar c3 = '.';
   18449       Bool valid = compute_ITSTATE( &newITSTATE, &c1, &c2, &c3,
   18450                                     firstcond, mask );
   18451       if (valid && firstcond != 0xF/*NV*/) {
   18452          /* Not allowed in an IT block; SIGILL if so. */
   18453          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   18454 
   18455          IRTemp t = newTemp(Ity_I32);
   18456          assign(t, mkU32(newITSTATE));
   18457          put_ITSTATE(t);
   18458 
   18459          DIP("it%c%c%c %s\n", c1, c2, c3, nCC(firstcond));
   18460          goto decode_success;
   18461       }
   18462       break;
   18463    }
   18464 
   18465    case BITS8(1,0,1,1,0,0,0,1):
   18466    case BITS8(1,0,1,1,0,0,1,1):
   18467    case BITS8(1,0,1,1,1,0,0,1):
   18468    case BITS8(1,0,1,1,1,0,1,1): {
   18469       /* ---------------- CB{N}Z ---------------- */
   18470       UInt rN    = INSN0(2,0);
   18471       UInt bOP   = INSN0(11,11);
   18472       UInt imm32 = (INSN0(9,9) << 6) | (INSN0(7,3) << 1);
   18473       gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   18474       /* It's a conditional branch forward. */
   18475       IRTemp kond = newTemp(Ity_I1);
   18476       assign( kond, binop(bOP ? Iop_CmpNE32 : Iop_CmpEQ32,
   18477                           getIRegT(rN), mkU32(0)) );
   18478 
   18479       vassert(0 == (guest_R15_curr_instr_notENC & 1));
   18480       /* Looks like the nearest insn we can branch to is the one after
   18481          next.  That makes sense, as there's no point in being able to
   18482          encode a conditional branch to the next instruction. */
   18483       UInt dst = (guest_R15_curr_instr_notENC + 4 + imm32) | 1;
   18484       stmt(IRStmt_Exit( mkexpr(kond),
   18485                         Ijk_Boring,
   18486                         IRConst_U32(toUInt(dst)),
   18487                         OFFB_R15T ));
   18488       DIP("cb%s r%u, 0x%x\n", bOP ? "nz" : "z", rN, dst - 1);
   18489       goto decode_success;
   18490    }
   18491 
   18492    default:
   18493       break; /* examine the next shortest prefix */
   18494 
   18495    }
   18496 
   18497 
   18498    /* ================ 16-bit 15:9 cases ================ */
   18499 
   18500    switch (INSN0(15,9)) {
   18501 
   18502    case BITS7(1,0,1,1,0,1,0): {
   18503       /* ---------------- PUSH ---------------- */
   18504       /* This is a bit like STMxx, but way simpler. Complications we
   18505          don't have to deal with:
   18506          * SP being one of the transferred registers
   18507          * direction (increment vs decrement)
   18508          * before-vs-after-ness
   18509       */
   18510       Int  i, nRegs;
   18511       UInt bitR    = INSN0(8,8);
   18512       UInt regList = INSN0(7,0);
   18513       if (bitR) regList |= (1 << 14);
   18514 
   18515       /* At least one register must be transferred, else result is
   18516          UNPREDICTABLE. */
   18517       if (regList != 0) {
   18518          /* Since we can't generate a guaranteed non-trapping IR
   18519             sequence, (1) jump over the insn if it is gated false, and
   18520             (2) back out the ITSTATE update. */
   18521          mk_skip_over_T16_if_cond_is_false(condT);
   18522          condT = IRTemp_INVALID;
   18523          put_ITSTATE(old_itstate);
   18524          // now uncond
   18525 
   18526          nRegs = 0;
   18527          for (i = 0; i < 16; i++) {
   18528             if ((regList & (1 << i)) != 0)
   18529                nRegs++;
   18530          }
   18531          vassert(nRegs >= 1 && nRegs <= 9);
   18532 
   18533          /* Move SP down first of all, so we're "covered".  And don't
   18534             mess with its alignment. */
   18535          IRTemp newSP = newTemp(Ity_I32);
   18536          assign(newSP, binop(Iop_Sub32, getIRegT(13), mkU32(4 * nRegs)));
   18537          putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
   18538 
   18539          /* Generate a transfer base address as a forced-aligned
   18540             version of the final SP value. */
   18541          IRTemp base = newTemp(Ity_I32);
   18542          assign(base, binop(Iop_And32, mkexpr(newSP), mkU32(~3)));
   18543 
   18544          /* Now the transfers */
   18545          nRegs = 0;
   18546          for (i = 0; i < 16; i++) {
   18547             if ((regList & (1 << i)) != 0) {
   18548                storeLE( binop(Iop_Add32, mkexpr(base), mkU32(4 * nRegs)),
   18549                         getIRegT(i) );
   18550                nRegs++;
   18551             }
   18552          }
   18553 
   18554          /* Reinstate the ITSTATE update. */
   18555          put_ITSTATE(new_itstate);
   18556 
   18557          DIP("push {%s0x%04x}\n", bitR ? "lr," : "", regList & 0xFF);
   18558          goto decode_success;
   18559       }
   18560       break;
   18561    }
   18562 
   18563    case BITS7(1,0,1,1,1,1,0): {
   18564       /* ---------------- POP ---------------- */
   18565       Int  i, nRegs;
   18566       UInt bitR    = INSN0(8,8);
   18567       UInt regList = INSN0(7,0);
   18568 
   18569       /* At least one register must be transferred, else result is
   18570          UNPREDICTABLE. */
   18571       if (regList != 0 || bitR) {
   18572          /* Since we can't generate a guaranteed non-trapping IR
   18573             sequence, (1) jump over the insn if it is gated false, and
   18574             (2) back out the ITSTATE update. */
   18575          mk_skip_over_T16_if_cond_is_false(condT);
   18576          condT = IRTemp_INVALID;
   18577          put_ITSTATE(old_itstate);
   18578          // now uncond
   18579 
   18580          nRegs = 0;
   18581          for (i = 0; i < 8; i++) {
   18582             if ((regList & (1 << i)) != 0)
   18583                nRegs++;
   18584          }
   18585          vassert(nRegs >= 0 && nRegs <= 8);
   18586          vassert(bitR == 0 || bitR == 1);
   18587 
   18588          IRTemp oldSP = newTemp(Ity_I32);
   18589          assign(oldSP, getIRegT(13));
   18590 
   18591          /* Generate a transfer base address as a forced-aligned
   18592             version of the original SP value. */
   18593          IRTemp base = newTemp(Ity_I32);
   18594          assign(base, binop(Iop_And32, mkexpr(oldSP), mkU32(~3)));
   18595 
   18596          /* Compute a new value for SP, but don't install it yet, so
   18597             that we're "covered" until all the transfers are done.
   18598             And don't mess with its alignment. */
   18599          IRTemp newSP = newTemp(Ity_I32);
   18600          assign(newSP, binop(Iop_Add32, mkexpr(oldSP),
   18601                                         mkU32(4 * (nRegs + bitR))));
   18602 
   18603          /* Now the transfers, not including PC */
   18604          nRegs = 0;
   18605          for (i = 0; i < 8; i++) {
   18606             if ((regList & (1 << i)) != 0) {
   18607                putIRegT(i, loadLE( Ity_I32,
   18608                                    binop(Iop_Add32, mkexpr(base),
   18609                                                     mkU32(4 * nRegs))),
   18610                            IRTemp_INVALID );
   18611                nRegs++;
   18612             }
   18613          }
   18614 
   18615          IRTemp newPC = IRTemp_INVALID;
   18616          if (bitR) {
   18617             newPC = newTemp(Ity_I32);
   18618             assign( newPC, loadLE( Ity_I32,
   18619                                    binop(Iop_Add32, mkexpr(base),
   18620                                                     mkU32(4 * nRegs))));
   18621          }
   18622 
   18623          /* Now we can safely install the new SP value */
   18624          putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
   18625 
   18626          /* Reinstate the ITSTATE update. */
   18627          put_ITSTATE(new_itstate);
   18628 
   18629          /* now, do we also have to do a branch?  If so, it turns out
   18630             that the new PC value is encoded exactly as we need it to
   18631             be -- with CPSR.T in the bottom bit.  So we can simply use
   18632             it as is, no need to mess with it.  Note, therefore, this
   18633             is an interworking return. */
   18634          if (bitR) {
   18635             llPutIReg(15, mkexpr(newPC));
   18636             dres.jk_StopHere = Ijk_Ret;
   18637             dres.whatNext    = Dis_StopHere;
   18638          }
   18639 
   18640          DIP("pop {%s0x%04x}\n", bitR ? "pc," : "", regList & 0xFF);
   18641          goto decode_success;
   18642       }
   18643       break;
   18644    }
   18645 
   18646    case BITS7(0,0,0,1,1,1,0):   /* ADDS */
   18647    case BITS7(0,0,0,1,1,1,1): { /* SUBS */
   18648       /* ---------------- ADDS Rd, Rn, #uimm3 ---------------- */
   18649       /* ---------------- SUBS Rd, Rn, #uimm3 ---------------- */
   18650       UInt   uimm3 = INSN0(8,6);
   18651       UInt   rN    = INSN0(5,3);
   18652       UInt   rD    = INSN0(2,0);
   18653       UInt   isSub = INSN0(9,9);
   18654       IRTemp argL  = newTemp(Ity_I32);
   18655       IRTemp argR  = newTemp(Ity_I32);
   18656       assign( argL, getIRegT(rN) );
   18657       assign( argR, mkU32(uimm3) );
   18658       putIRegT(rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
   18659                          mkexpr(argL), mkexpr(argR)),
   18660                    condT);
   18661       setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
   18662                       argL, argR, cond_AND_notInIT_T );
   18663       DIP("%s r%u, r%u, #%u\n", isSub ? "subs" : "adds", rD, rN, uimm3);
   18664       goto decode_success;
   18665    }
   18666 
   18667    case BITS7(0,0,0,1,1,0,0):   /* ADDS */
   18668    case BITS7(0,0,0,1,1,0,1): { /* SUBS */
   18669       /* ---------------- ADDS Rd, Rn, Rm ---------------- */
   18670       /* ---------------- SUBS Rd, Rn, Rm ---------------- */
   18671       UInt   rM    = INSN0(8,6);
   18672       UInt   rN    = INSN0(5,3);
   18673       UInt   rD    = INSN0(2,0);
   18674       UInt   isSub = INSN0(9,9);
   18675       IRTemp argL  = newTemp(Ity_I32);
   18676       IRTemp argR  = newTemp(Ity_I32);
   18677       assign( argL, getIRegT(rN) );
   18678       assign( argR, getIRegT(rM) );
   18679       putIRegT( rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
   18680                           mkexpr(argL), mkexpr(argR)),
   18681                     condT );
   18682       setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
   18683                       argL, argR, cond_AND_notInIT_T );
   18684       DIP("%s r%u, r%u, r%u\n", isSub ? "subs" : "adds", rD, rN, rM);
   18685       goto decode_success;
   18686    }
   18687 
   18688    case BITS7(0,1,0,1,0,0,0):   /* STR */
   18689    case BITS7(0,1,0,1,1,0,0): { /* LDR */
   18690       /* ------------- LDR Rd, [Rn, Rm] ------------- */
   18691       /* ------------- STR Rd, [Rn, Rm] ------------- */
   18692       /* LDR/STR Rd, [Rn + Rm] */
   18693       UInt    rD   = INSN0(2,0);
   18694       UInt    rN   = INSN0(5,3);
   18695       UInt    rM   = INSN0(8,6);
   18696       UInt    isLD = INSN0(11,11);
   18697 
   18698       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
   18699       put_ITSTATE(old_itstate); // backout
   18700       if (isLD) {
   18701          IRTemp tD = newTemp(Ity_I32);
   18702          loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
   18703          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18704       } else {
   18705          storeGuardedLE(ea, getIRegT(rD), condT);
   18706       }
   18707       put_ITSTATE(new_itstate); // restore
   18708 
   18709       DIP("%s r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
   18710       goto decode_success;
   18711    }
   18712 
   18713    case BITS7(0,1,0,1,0,0,1):
   18714    case BITS7(0,1,0,1,1,0,1): {
   18715       /* ------------- LDRH Rd, [Rn, Rm] ------------- */
   18716       /* ------------- STRH Rd, [Rn, Rm] ------------- */
   18717       /* LDRH/STRH Rd, [Rn + Rm] */
   18718       UInt    rD   = INSN0(2,0);
   18719       UInt    rN   = INSN0(5,3);
   18720       UInt    rM   = INSN0(8,6);
   18721       UInt    isLD = INSN0(11,11);
   18722 
   18723       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
   18724       put_ITSTATE(old_itstate); // backout
   18725       if (isLD) {
   18726          IRTemp tD = newTemp(Ity_I32);
   18727          loadGuardedLE(tD, ILGop_16Uto32, ea, llGetIReg(rD), condT);
   18728          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18729       } else {
   18730          storeGuardedLE( ea, unop(Iop_32to16, getIRegT(rD)), condT );
   18731       }
   18732       put_ITSTATE(new_itstate); // restore
   18733 
   18734       DIP("%sh r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
   18735       goto decode_success;
   18736    }
   18737 
   18738    case BITS7(0,1,0,1,1,1,1): {
   18739       /* ------------- LDRSH Rd, [Rn, Rm] ------------- */
   18740       /* LDRSH Rd, [Rn + Rm] */
   18741       UInt    rD = INSN0(2,0);
   18742       UInt    rN = INSN0(5,3);
   18743       UInt    rM = INSN0(8,6);
   18744 
   18745       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
   18746       put_ITSTATE(old_itstate); // backout
   18747       IRTemp tD = newTemp(Ity_I32);
   18748       loadGuardedLE(tD, ILGop_16Sto32, ea, llGetIReg(rD), condT);
   18749       putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18750       put_ITSTATE(new_itstate); // restore
   18751 
   18752       DIP("ldrsh r%u, [r%u, r%u]\n", rD, rN, rM);
   18753       goto decode_success;
   18754    }
   18755 
   18756    case BITS7(0,1,0,1,0,1,1): {
   18757       /* ------------- LDRSB Rd, [Rn, Rm] ------------- */
   18758       /* LDRSB Rd, [Rn + Rm] */
   18759       UInt    rD = INSN0(2,0);
   18760       UInt    rN = INSN0(5,3);
   18761       UInt    rM = INSN0(8,6);
   18762 
   18763       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
   18764       put_ITSTATE(old_itstate); // backout
   18765       IRTemp tD = newTemp(Ity_I32);
   18766       loadGuardedLE(tD, ILGop_8Sto32, ea, llGetIReg(rD), condT);
   18767       putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18768       put_ITSTATE(new_itstate); // restore
   18769 
   18770       DIP("ldrsb r%u, [r%u, r%u]\n", rD, rN, rM);
   18771       goto decode_success;
   18772    }
   18773 
   18774    case BITS7(0,1,0,1,0,1,0):
   18775    case BITS7(0,1,0,1,1,1,0): {
   18776       /* ------------- LDRB Rd, [Rn, Rm] ------------- */
   18777       /* ------------- STRB Rd, [Rn, Rm] ------------- */
   18778       /* LDRB/STRB Rd, [Rn + Rm] */
   18779       UInt    rD   = INSN0(2,0);
   18780       UInt    rN   = INSN0(5,3);
   18781       UInt    rM   = INSN0(8,6);
   18782       UInt    isLD = INSN0(11,11);
   18783 
   18784       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
   18785       put_ITSTATE(old_itstate); // backout
   18786       if (isLD) {
   18787          IRTemp tD = newTemp(Ity_I32);
   18788          loadGuardedLE(tD, ILGop_8Uto32, ea, llGetIReg(rD), condT);
   18789          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18790       } else {
   18791          storeGuardedLE( ea, unop(Iop_32to8, getIRegT(rD)), condT );
   18792       }
   18793       put_ITSTATE(new_itstate); // restore
   18794 
   18795       DIP("%sb r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
   18796       goto decode_success;
   18797    }
   18798 
   18799    default:
   18800       break; /* examine the next shortest prefix */
   18801 
   18802    }
   18803 
   18804 
   18805    /* ================ 16-bit 15:11 cases ================ */
   18806 
   18807    switch (INSN0(15,11)) {
   18808 
   18809    case BITS5(0,0,1,1,0):
   18810    case BITS5(0,0,1,1,1): {
   18811       /* ---------------- ADDS Rn, #uimm8 ---------------- */
   18812       /* ---------------- SUBS Rn, #uimm8 ---------------- */
   18813       UInt   isSub = INSN0(11,11);
   18814       UInt   rN    = INSN0(10,8);
   18815       UInt   uimm8 = INSN0(7,0);
   18816       IRTemp argL  = newTemp(Ity_I32);
   18817       IRTemp argR  = newTemp(Ity_I32);
   18818       assign( argL, getIRegT(rN) );
   18819       assign( argR, mkU32(uimm8) );
   18820       putIRegT( rN, binop(isSub ? Iop_Sub32 : Iop_Add32,
   18821                           mkexpr(argL), mkexpr(argR)), condT );
   18822       setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
   18823                       argL, argR, cond_AND_notInIT_T );
   18824       DIP("%s r%u, #%u\n", isSub ? "subs" : "adds", rN, uimm8);
   18825       goto decode_success;
   18826    }
   18827 
   18828    case BITS5(1,0,1,0,0): {
   18829       /* ---------------- ADD rD, PC, #imm8 * 4 ---------------- */
   18830       /* a.k.a. ADR */
   18831       /* rD = align4(PC) + imm8 * 4 */
   18832       UInt rD   = INSN0(10,8);
   18833       UInt imm8 = INSN0(7,0);
   18834       putIRegT(rD, binop(Iop_Add32,
   18835                          binop(Iop_And32, getIRegT(15), mkU32(~3U)),
   18836                          mkU32(imm8 * 4)),
   18837                    condT);
   18838       DIP("add r%u, pc, #%u\n", rD, imm8 * 4);
   18839       goto decode_success;
   18840    }
   18841 
   18842    case BITS5(1,0,1,0,1): {
   18843       /* ---------------- ADD rD, SP, #imm8 * 4 ---------------- */
   18844       UInt rD   = INSN0(10,8);
   18845       UInt imm8 = INSN0(7,0);
   18846       putIRegT(rD, binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4)),
   18847                    condT);
   18848       DIP("add r%u, r13, #%u\n", rD, imm8 * 4);
   18849       goto decode_success;
   18850    }
   18851 
   18852    case BITS5(0,0,1,0,1): {
   18853       /* ---------------- CMP Rn, #uimm8 ---------------- */
   18854       UInt   rN    = INSN0(10,8);
   18855       UInt   uimm8 = INSN0(7,0);
   18856       IRTemp argL  = newTemp(Ity_I32);
   18857       IRTemp argR  = newTemp(Ity_I32);
   18858       assign( argL, getIRegT(rN) );
   18859       assign( argR, mkU32(uimm8) );
   18860       /* Update flags regardless of whether in an IT block or not. */
   18861       setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
   18862       DIP("cmp r%u, #%u\n", rN, uimm8);
   18863       goto decode_success;
   18864    }
   18865 
   18866    case BITS5(0,0,1,0,0): {
   18867       /* -------------- (T1) MOVS Rn, #uimm8 -------------- */
   18868       UInt   rD    = INSN0(10,8);
   18869       UInt   uimm8 = INSN0(7,0);
   18870       IRTemp oldV  = newTemp(Ity_I32);
   18871       IRTemp oldC  = newTemp(Ity_I32);
   18872       IRTemp res   = newTemp(Ity_I32);
   18873       assign( oldV, mk_armg_calculate_flag_v() );
   18874       assign( oldC, mk_armg_calculate_flag_c() );
   18875       assign( res, mkU32(uimm8) );
   18876       putIRegT(rD, mkexpr(res), condT);
   18877       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   18878                          cond_AND_notInIT_T );
   18879       DIP("movs r%u, #%u\n", rD, uimm8);
   18880       goto decode_success;
   18881    }
   18882 
   18883    case BITS5(0,1,0,0,1): {
   18884       /* ------------- LDR Rd, [PC, #imm8 * 4] ------------- */
   18885       /* LDR Rd, [align4(PC) + imm8 * 4] */
   18886       UInt   rD   = INSN0(10,8);
   18887       UInt   imm8 = INSN0(7,0);
   18888       IRTemp ea   = newTemp(Ity_I32);
   18889 
   18890       assign(ea, binop(Iop_Add32,
   18891                        binop(Iop_And32, getIRegT(15), mkU32(~3U)),
   18892                        mkU32(imm8 * 4)));
   18893       put_ITSTATE(old_itstate); // backout
   18894       IRTemp tD = newTemp(Ity_I32);
   18895       loadGuardedLE( tD, ILGop_Ident32, mkexpr(ea), llGetIReg(rD), condT );
   18896       putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18897       put_ITSTATE(new_itstate); // restore
   18898 
   18899       DIP("ldr r%u, [pc, #%u]\n", rD, imm8 * 4);
   18900       goto decode_success;
   18901    }
   18902 
   18903    case BITS5(0,1,1,0,0):   /* STR */
   18904    case BITS5(0,1,1,0,1): { /* LDR */
   18905       /* ------------- LDR Rd, [Rn, #imm5 * 4] ------------- */
   18906       /* ------------- STR Rd, [Rn, #imm5 * 4] ------------- */
   18907       /* LDR/STR Rd, [Rn + imm5 * 4] */
   18908       UInt    rD   = INSN0(2,0);
   18909       UInt    rN   = INSN0(5,3);
   18910       UInt    imm5 = INSN0(10,6);
   18911       UInt    isLD = INSN0(11,11);
   18912 
   18913       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 4));
   18914       put_ITSTATE(old_itstate); // backout
   18915       if (isLD) {
   18916          IRTemp tD = newTemp(Ity_I32);
   18917          loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
   18918          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18919       } else {
   18920          storeGuardedLE( ea, getIRegT(rD), condT );
   18921       }
   18922       put_ITSTATE(new_itstate); // restore
   18923 
   18924       DIP("%s r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 4);
   18925       goto decode_success;
   18926    }
   18927 
   18928    case BITS5(1,0,0,0,0):   /* STRH */
   18929    case BITS5(1,0,0,0,1): { /* LDRH */
   18930       /* ------------- LDRH Rd, [Rn, #imm5 * 2] ------------- */
   18931       /* ------------- STRH Rd, [Rn, #imm5 * 2] ------------- */
   18932       /* LDRH/STRH Rd, [Rn + imm5 * 2] */
   18933       UInt    rD   = INSN0(2,0);
   18934       UInt    rN   = INSN0(5,3);
   18935       UInt    imm5 = INSN0(10,6);
   18936       UInt    isLD = INSN0(11,11);
   18937 
   18938       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 2));
   18939       put_ITSTATE(old_itstate); // backout
   18940       if (isLD) {
   18941          IRTemp tD = newTemp(Ity_I32);
   18942          loadGuardedLE( tD, ILGop_16Uto32, ea, llGetIReg(rD), condT );
   18943          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18944       } else {
   18945          storeGuardedLE( ea, unop(Iop_32to16, getIRegT(rD)), condT );
   18946       }
   18947       put_ITSTATE(new_itstate); // restore
   18948 
   18949       DIP("%sh r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 2);
   18950       goto decode_success;
   18951    }
   18952 
   18953    case BITS5(0,1,1,1,0):   /* STRB */
   18954    case BITS5(0,1,1,1,1): { /* LDRB */
   18955       /* ------------- LDRB Rd, [Rn, #imm5] ------------- */
   18956       /* ------------- STRB Rd, [Rn, #imm5] ------------- */
   18957       /* LDRB/STRB Rd, [Rn + imm5] */
   18958       UInt    rD   = INSN0(2,0);
   18959       UInt    rN   = INSN0(5,3);
   18960       UInt    imm5 = INSN0(10,6);
   18961       UInt    isLD = INSN0(11,11);
   18962 
   18963       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5));
   18964       put_ITSTATE(old_itstate); // backout
   18965       if (isLD) {
   18966          IRTemp tD = newTemp(Ity_I32);
   18967          loadGuardedLE( tD, ILGop_8Uto32, ea, llGetIReg(rD), condT );
   18968          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18969       } else {
   18970          storeGuardedLE( ea, unop(Iop_32to8, getIRegT(rD)), condT );
   18971       }
   18972       put_ITSTATE(new_itstate); // restore
   18973 
   18974       DIP("%sb r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5);
   18975       goto decode_success;
   18976    }
   18977 
   18978    case BITS5(1,0,0,1,0):   /* STR */
   18979    case BITS5(1,0,0,1,1): { /* LDR */
   18980       /* ------------- LDR Rd, [SP, #imm8 * 4] ------------- */
   18981       /* ------------- STR Rd, [SP, #imm8 * 4] ------------- */
   18982       /* LDR/STR Rd, [SP + imm8 * 4] */
   18983       UInt rD    = INSN0(10,8);
   18984       UInt imm8  = INSN0(7,0);
   18985       UInt isLD  = INSN0(11,11);
   18986 
   18987       IRExpr* ea = binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4));
   18988       put_ITSTATE(old_itstate); // backout
   18989       if (isLD) {
   18990          IRTemp tD = newTemp(Ity_I32);
   18991          loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
   18992          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18993       } else {
   18994          storeGuardedLE(ea, getIRegT(rD), condT);
   18995       }
   18996       put_ITSTATE(new_itstate); // restore
   18997 
   18998       DIP("%s r%u, [sp, #%u]\n", isLD ? "ldr" : "str", rD, imm8 * 4);
   18999       goto decode_success;
   19000    }
   19001 
   19002    case BITS5(1,1,0,0,1): {
   19003       /* ------------- LDMIA Rn!, {reglist} ------------- */
   19004       Int i, nRegs = 0;
   19005       UInt rN   = INSN0(10,8);
   19006       UInt list = INSN0(7,0);
   19007       /* Empty lists aren't allowed. */
   19008       if (list != 0) {
   19009          mk_skip_over_T16_if_cond_is_false(condT);
   19010          condT = IRTemp_INVALID;
   19011          put_ITSTATE(old_itstate);
   19012          // now uncond
   19013 
   19014          IRTemp oldRn = newTemp(Ity_I32);
   19015          IRTemp base  = newTemp(Ity_I32);
   19016          assign(oldRn, getIRegT(rN));
   19017          assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
   19018          for (i = 0; i < 8; i++) {
   19019             if (0 == (list & (1 << i)))
   19020                continue;
   19021             nRegs++;
   19022             putIRegT(
   19023                i, loadLE(Ity_I32,
   19024                          binop(Iop_Add32, mkexpr(base),
   19025                                           mkU32(nRegs * 4 - 4))),
   19026                IRTemp_INVALID
   19027             );
   19028          }
   19029          /* Only do the writeback for rN if it isn't in the list of
   19030             registers to be transferred. */
   19031          if (0 == (list & (1 << rN))) {
   19032             putIRegT(rN,
   19033                      binop(Iop_Add32, mkexpr(oldRn),
   19034                                       mkU32(nRegs * 4)),
   19035                      IRTemp_INVALID
   19036             );
   19037          }
   19038 
   19039          /* Reinstate the ITSTATE update. */
   19040          put_ITSTATE(new_itstate);
   19041 
   19042          DIP("ldmia r%u!, {0x%04x}\n", rN, list);
   19043          goto decode_success;
   19044       }
   19045       break;
   19046    }
   19047 
   19048    case BITS5(1,1,0,0,0): {
   19049       /* ------------- STMIA Rn!, {reglist} ------------- */
   19050       Int i, nRegs = 0;
   19051       UInt rN   = INSN0(10,8);
   19052       UInt list = INSN0(7,0);
   19053       /* Empty lists aren't allowed.  Also, if rN is in the list then
   19054          it must be the lowest numbered register in the list. */
   19055       Bool valid = list != 0;
   19056       if (valid && 0 != (list & (1 << rN))) {
   19057          for (i = 0; i < rN; i++) {
   19058             if (0 != (list & (1 << i)))
   19059                valid = False;
   19060          }
   19061       }
   19062       if (valid) {
   19063          mk_skip_over_T16_if_cond_is_false(condT);
   19064          condT = IRTemp_INVALID;
   19065          put_ITSTATE(old_itstate);
   19066          // now uncond
   19067 
   19068          IRTemp oldRn = newTemp(Ity_I32);
   19069          IRTemp base = newTemp(Ity_I32);
   19070          assign(oldRn, getIRegT(rN));
   19071          assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
   19072          for (i = 0; i < 8; i++) {
   19073             if (0 == (list & (1 << i)))
   19074                continue;
   19075             nRegs++;
   19076             storeLE( binop(Iop_Add32, mkexpr(base), mkU32(nRegs * 4 - 4)),
   19077                      getIRegT(i) );
   19078          }
   19079          /* Always do the writeback. */
   19080          putIRegT(rN,
   19081                   binop(Iop_Add32, mkexpr(oldRn),
   19082                                    mkU32(nRegs * 4)),
   19083                   IRTemp_INVALID);
   19084 
   19085          /* Reinstate the ITSTATE update. */
   19086          put_ITSTATE(new_itstate);
   19087 
   19088          DIP("stmia r%u!, {0x%04x}\n", rN, list);
   19089          goto decode_success;
   19090       }
   19091       break;
   19092    }
   19093 
   19094    case BITS5(0,0,0,0,0):   /* LSLS */
   19095    case BITS5(0,0,0,0,1):   /* LSRS */
   19096    case BITS5(0,0,0,1,0): { /* ASRS */
   19097       /* ---------------- LSLS Rd, Rm, #imm5 ---------------- */
   19098       /* ---------------- LSRS Rd, Rm, #imm5 ---------------- */
   19099       /* ---------------- ASRS Rd, Rm, #imm5 ---------------- */
   19100       UInt   rD   = INSN0(2,0);
   19101       UInt   rM   = INSN0(5,3);
   19102       UInt   imm5 = INSN0(10,6);
   19103       IRTemp res  = newTemp(Ity_I32);
   19104       IRTemp resC = newTemp(Ity_I32);
   19105       IRTemp rMt  = newTemp(Ity_I32);
   19106       IRTemp oldV = newTemp(Ity_I32);
   19107       const HChar* wot  = "???";
   19108       assign(rMt, getIRegT(rM));
   19109       assign(oldV, mk_armg_calculate_flag_v());
   19110       /* Looks like INSN0(12,11) are the standard 'how' encoding.
   19111          Could compactify if the ROR case later appears. */
   19112       switch (INSN0(15,11)) {
   19113          case BITS5(0,0,0,0,0):
   19114             compute_result_and_C_after_LSL_by_imm5(
   19115                dis_buf, &res, &resC, rMt, imm5, rM
   19116             );
   19117             wot = "lsl";
   19118             break;
   19119          case BITS5(0,0,0,0,1):
   19120             compute_result_and_C_after_LSR_by_imm5(
   19121                dis_buf, &res, &resC, rMt, imm5, rM
   19122             );
   19123             wot = "lsr";
   19124             break;
   19125          case BITS5(0,0,0,1,0):
   19126             compute_result_and_C_after_ASR_by_imm5(
   19127                dis_buf, &res, &resC, rMt, imm5, rM
   19128             );
   19129             wot = "asr";
   19130             break;
   19131          default:
   19132             /*NOTREACHED*/vassert(0);
   19133       }
   19134       // not safe to read guest state after this point
   19135       putIRegT(rD, mkexpr(res), condT);
   19136       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
   19137                          cond_AND_notInIT_T );
   19138       /* ignore buf and roll our own output */
   19139       DIP("%ss r%u, r%u, #%u\n", wot, rD, rM, imm5);
   19140       goto decode_success;
   19141    }
   19142 
   19143    case BITS5(1,1,1,0,0): {
   19144       /* ---------------- B #simm11 ---------------- */
   19145       Int  simm11 = INSN0(10,0);
   19146            simm11 = (simm11 << 21) >> 20;
   19147       UInt dst    = simm11 + guest_R15_curr_instr_notENC + 4;
   19148       /* Only allowed outside or last-in IT block; SIGILL if not so. */
   19149       gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   19150       // and skip this insn if not selected; being cleverer is too
   19151       // difficult
   19152       mk_skip_over_T16_if_cond_is_false(condT);
   19153       condT = IRTemp_INVALID;
   19154       // now uncond
   19155       llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
   19156       dres.jk_StopHere = Ijk_Boring;
   19157       dres.whatNext    = Dis_StopHere;
   19158       DIP("b 0x%x\n", dst);
   19159       goto decode_success;
   19160    }
   19161 
   19162    default:
   19163       break; /* examine the next shortest prefix */
   19164 
   19165    }
   19166 
   19167 
   19168    /* ================ 16-bit 15:12 cases ================ */
   19169 
   19170    switch (INSN0(15,12)) {
   19171 
   19172    case BITS4(1,1,0,1): {
   19173       /* ---------------- Bcond #simm8 ---------------- */
   19174       UInt cond  = INSN0(11,8);
   19175       Int  simm8 = INSN0(7,0);
   19176            simm8 = (simm8 << 24) >> 23;
   19177       UInt dst   = simm8 + guest_R15_curr_instr_notENC + 4;
   19178       if (cond != ARMCondAL && cond != ARMCondNV) {
   19179          /* Not allowed in an IT block; SIGILL if so. */
   19180          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   19181 
   19182          IRTemp kondT = newTemp(Ity_I32);
   19183          assign( kondT, mk_armg_calculate_condition(cond) );
   19184          stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
   19185                             Ijk_Boring,
   19186                             IRConst_U32(dst | 1/*CPSR.T*/),
   19187                             OFFB_R15T ));
   19188          llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2)
   19189                               | 1 /*CPSR.T*/ ));
   19190          dres.jk_StopHere = Ijk_Boring;
   19191          dres.whatNext    = Dis_StopHere;
   19192          DIP("b%s 0x%x\n", nCC(cond), dst);
   19193          goto decode_success;
   19194       }
   19195       break;
   19196    }
   19197 
   19198    default:
   19199       break; /* hmm, nothing matched */
   19200 
   19201    }
   19202 
   19203    /* ================ 16-bit misc cases ================ */
   19204 
   19205    switch (INSN0(15,0)) {
   19206       case 0xBF00:
   19207          /* ------ NOP ------ */
   19208          DIP("nop\n");
   19209          goto decode_success;
   19210       case 0xBF10: // YIELD
   19211       case 0xBF20: // WFE
   19212          /* ------ WFE, YIELD ------ */
   19213          /* Both appear to get used as a spin-loop hints.  Do the usual thing,
   19214             which is to continue after yielding. */
   19215          stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
   19216                             Ijk_Yield,
   19217                             IRConst_U32((guest_R15_curr_instr_notENC + 2)
   19218                                         | 1 /*CPSR.T*/),
   19219                             OFFB_R15T ));
   19220          Bool isWFE = INSN0(15,0) == 0xBF20;
   19221          DIP(isWFE ? "wfe\n" : "yield\n");
   19222          goto decode_success;
   19223       case 0xBF40:
   19224          /* ------ SEV ------ */
   19225          /* Treat this as a no-op.  Any matching WFEs won't really
   19226             cause the host CPU to snooze; they just cause V to try to
   19227             run some other thread for a while.  So there's no point in
   19228             really doing anything for SEV. */
   19229          DIP("sev\n");
   19230          goto decode_success;
   19231       default:
   19232          break; /* fall through */
   19233    }
   19234 
   19235    /* ----------------------------------------------------------- */
   19236    /* --                                                       -- */
   19237    /* -- Thumb 32-bit integer instructions                     -- */
   19238    /* --                                                       -- */
   19239    /* ----------------------------------------------------------- */
   19240 
   19241 #  define INSN1(_bMax,_bMin)  SLICE_UInt(((UInt)insn1), (_bMax), (_bMin))
   19242 
   19243    /* second 16 bits of the instruction, if any */
   19244    vassert(insn1 == 0);
   19245    insn1 = getUShortLittleEndianly( guest_instr+2 );
   19246 
   19247    anOp   = Iop_INVALID; /* paranoia */
   19248    anOpNm = NULL;        /* paranoia */
   19249 
   19250    /* Change result defaults to suit 32-bit insns. */
   19251    vassert(dres.whatNext   == Dis_Continue);
   19252    vassert(dres.len        == 2);
   19253    vassert(dres.continueAt == 0);
   19254    dres.len = 4;
   19255 
   19256    /* ---------------- BL/BLX simm26 ---------------- */
   19257    if (BITS5(1,1,1,1,0) == INSN0(15,11) && BITS2(1,1) == INSN1(15,14)) {
   19258       UInt isBL = INSN1(12,12);
   19259       UInt bS   = INSN0(10,10);
   19260       UInt bJ1  = INSN1(13,13);
   19261       UInt bJ2  = INSN1(11,11);
   19262       UInt bI1  = 1 ^ (bJ1 ^ bS);
   19263       UInt bI2  = 1 ^ (bJ2 ^ bS);
   19264       Int simm25
   19265          =   (bS          << (1 + 1 + 10 + 11 + 1))
   19266            | (bI1         << (1 + 10 + 11 + 1))
   19267            | (bI2         << (10 + 11 + 1))
   19268            | (INSN0(9,0)  << (11 + 1))
   19269            | (INSN1(10,0) << 1);
   19270       simm25 = (simm25 << 7) >> 7;
   19271 
   19272       vassert(0 == (guest_R15_curr_instr_notENC & 1));
   19273       UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
   19274 
   19275       /* One further validity case to check: in the case of BLX
   19276          (not-BL), that insn1[0] must be zero. */
   19277       Bool valid = True;
   19278       if (isBL == 0 && INSN1(0,0) == 1) valid = False;
   19279       if (valid) {
   19280          /* Only allowed outside or last-in IT block; SIGILL if not so. */
   19281          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   19282          // and skip this insn if not selected; being cleverer is too
   19283          // difficult
   19284          mk_skip_over_T32_if_cond_is_false(condT);
   19285          condT = IRTemp_INVALID;
   19286          // now uncond
   19287 
   19288          /* We're returning to Thumb code, hence "| 1" */
   19289          putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 4) | 1 ),
   19290                    IRTemp_INVALID);
   19291          if (isBL) {
   19292             /* BL: unconditional T -> T call */
   19293             /* we're calling Thumb code, hence "| 1" */
   19294             llPutIReg(15, mkU32( dst | 1 ));
   19295             DIP("bl 0x%x (stay in Thumb mode)\n", dst);
   19296          } else {
   19297             /* BLX: unconditional T -> A call */
   19298             /* we're calling ARM code, hence "& 3" to align to a
   19299                valid ARM insn address */
   19300             llPutIReg(15, mkU32( dst & ~3 ));
   19301             DIP("blx 0x%x (switch to ARM mode)\n", dst & ~3);
   19302          }
   19303          dres.whatNext    = Dis_StopHere;
   19304          dres.jk_StopHere = Ijk_Call;
   19305          goto decode_success;
   19306       }
   19307    }
   19308 
   19309    /* ---------------- {LD,ST}M{IA,DB} ---------------- */
   19310    if (0x3a2 == INSN0(15,6) // {LD,ST}MIA
   19311        || 0x3a4 == INSN0(15,6)) { // {LD,ST}MDB
   19312       UInt bW      = INSN0(5,5); /* writeback Rn ? */
   19313       UInt bL      = INSN0(4,4);
   19314       UInt rN      = INSN0(3,0);
   19315       UInt bP      = INSN1(15,15); /* reglist entry for r15 */
   19316       UInt bM      = INSN1(14,14); /* reglist entry for r14 */
   19317       UInt rLmost  = INSN1(12,0);  /* reglist entry for r0 .. 12 */
   19318       UInt rL13    = INSN1(13,13); /* must be zero */
   19319       UInt regList = 0;
   19320       Bool valid   = True;
   19321 
   19322       UInt bINC    = 1;
   19323       UInt bBEFORE = 0;
   19324       if (INSN0(15,6) == 0x3a4) {
   19325          bINC    = 0;
   19326          bBEFORE = 1;
   19327       }
   19328 
   19329       /* detect statically invalid cases, and construct the final
   19330          reglist */
   19331       if (rL13 == 1)
   19332          valid = False;
   19333 
   19334       if (bL == 1) {
   19335          regList = (bP << 15) | (bM << 14) | rLmost;
   19336          if (rN == 15)                       valid = False;
   19337          if (popcount32(regList) < 2)        valid = False;
   19338          if (bP == 1 && bM == 1)             valid = False;
   19339          if (bW == 1 && (regList & (1<<rN))) valid = False;
   19340       } else {
   19341          regList = (bM << 14) | rLmost;
   19342          if (bP == 1)                        valid = False;
   19343          if (rN == 15)                       valid = False;
   19344          if (popcount32(regList) < 2)        valid = False;
   19345          if (bW == 1 && (regList & (1<<rN))) valid = False;
   19346       }
   19347 
   19348       if (valid) {
   19349          if (bL == 1 && bP == 1) {
   19350             // We'll be writing the PC.  Hence:
   19351             /* Only allowed outside or last-in IT block; SIGILL if not so. */
   19352             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   19353          }
   19354 
   19355          /* Go uncond: */
   19356          mk_skip_over_T32_if_cond_is_false(condT);
   19357          condT = IRTemp_INVALID;
   19358          // now uncond
   19359 
   19360          /* Generate the IR.  This might generate a write to R15. */
   19361          mk_ldm_stm(False/*!arm*/, rN, bINC, bBEFORE, bW, bL, regList);
   19362 
   19363          if (bL == 1 && (regList & (1<<15))) {
   19364             // If we wrote to R15, we have an interworking return to
   19365             // deal with.
   19366             llPutIReg(15, llGetIReg(15));
   19367             dres.jk_StopHere = Ijk_Ret;
   19368             dres.whatNext    = Dis_StopHere;
   19369          }
   19370 
   19371          DIP("%sm%c%c r%u%s, {0x%04x}\n",
   19372               bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
   19373               rN, bW ? "!" : "", regList);
   19374 
   19375          goto decode_success;
   19376       }
   19377    }
   19378 
   19379    /* -------------- (T3) ADD{S}.W Rd, Rn, #constT -------------- */
   19380    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19381        && INSN0(9,5) == BITS5(0,1,0,0,0)
   19382        && INSN1(15,15) == 0) {
   19383       UInt bS = INSN0(4,4);
   19384       UInt rN = INSN0(3,0);
   19385       UInt rD = INSN1(11,8);
   19386       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
   19387       /* but allow "add.w reg, sp, #constT" for reg != PC */
   19388       if (!valid && rD <= 14 && rN == 13)
   19389          valid = True;
   19390       if (valid) {
   19391          IRTemp argL  = newTemp(Ity_I32);
   19392          IRTemp argR  = newTemp(Ity_I32);
   19393          IRTemp res   = newTemp(Ity_I32);
   19394          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
   19395          assign(argL, getIRegT(rN));
   19396          assign(argR, mkU32(imm32));
   19397          assign(res,  binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
   19398          putIRegT(rD, mkexpr(res), condT);
   19399          if (bS == 1)
   19400             setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
   19401          DIP("add%s.w r%u, r%u, #%u\n",
   19402              bS == 1 ? "s" : "", rD, rN, imm32);
   19403          goto decode_success;
   19404       }
   19405    }
   19406 
   19407    /* ---------------- (T4) ADDW Rd, Rn, #uimm12 -------------- */
   19408    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19409        && INSN0(9,4) == BITS6(1,0,0,0,0,0)
   19410        && INSN1(15,15) == 0) {
   19411       UInt rN = INSN0(3,0);
   19412       UInt rD = INSN1(11,8);
   19413       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
   19414       /* but allow "addw reg, sp, #uimm12" for reg != PC */
   19415       if (!valid && rD <= 14 && rN == 13)
   19416          valid = True;
   19417       if (valid) {
   19418          IRTemp argL = newTemp(Ity_I32);
   19419          IRTemp argR = newTemp(Ity_I32);
   19420          IRTemp res  = newTemp(Ity_I32);
   19421          UInt imm12  = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
   19422          assign(argL, getIRegT(rN));
   19423          assign(argR, mkU32(imm12));
   19424          assign(res,  binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
   19425          putIRegT(rD, mkexpr(res), condT);
   19426          DIP("addw r%u, r%u, #%u\n", rD, rN, imm12);
   19427          goto decode_success;
   19428       }
   19429    }
   19430 
   19431    /* ---------------- (T2) CMP.W Rn, #constT ---------------- */
   19432    /* ---------------- (T2) CMN.W Rn, #constT ---------------- */
   19433    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19434        && (   INSN0(9,4) == BITS6(0,1,1,0,1,1)  // CMP
   19435            || INSN0(9,4) == BITS6(0,1,0,0,0,1)) // CMN
   19436        && INSN1(15,15) == 0
   19437        && INSN1(11,8) == BITS4(1,1,1,1)) {
   19438       UInt rN = INSN0(3,0);
   19439       if (rN != 15) {
   19440          IRTemp argL  = newTemp(Ity_I32);
   19441          IRTemp argR  = newTemp(Ity_I32);
   19442          Bool   isCMN = INSN0(9,4) == BITS6(0,1,0,0,0,1);
   19443          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
   19444          assign(argL, getIRegT(rN));
   19445          assign(argR, mkU32(imm32));
   19446          setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
   19447                          argL, argR, condT );
   19448          DIP("%s.w r%u, #%u\n", isCMN ? "cmn" : "cmp", rN, imm32);
   19449          goto decode_success;
   19450       }
   19451    }
   19452 
   19453    /* -------------- (T1) TST.W Rn, #constT -------------- */
   19454    /* -------------- (T1) TEQ.W Rn, #constT -------------- */
   19455    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19456        && (   INSN0(9,4) == BITS6(0,0,0,0,0,1)  // TST
   19457            || INSN0(9,4) == BITS6(0,0,1,0,0,1)) // TEQ
   19458        && INSN1(15,15) == 0
   19459        && INSN1(11,8) == BITS4(1,1,1,1)) {
   19460       UInt rN = INSN0(3,0);
   19461       if (!isBadRegT(rN)) { // yes, really, it's inconsistent with CMP.W
   19462          Bool  isTST  = INSN0(9,4) == BITS6(0,0,0,0,0,1);
   19463          IRTemp argL  = newTemp(Ity_I32);
   19464          IRTemp argR  = newTemp(Ity_I32);
   19465          IRTemp res   = newTemp(Ity_I32);
   19466          IRTemp oldV  = newTemp(Ity_I32);
   19467          IRTemp oldC  = newTemp(Ity_I32);
   19468          Bool   updC  = False;
   19469          UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
   19470          assign(argL, getIRegT(rN));
   19471          assign(argR, mkU32(imm32));
   19472          assign(res,  binop(isTST ? Iop_And32 : Iop_Xor32,
   19473                             mkexpr(argL), mkexpr(argR)));
   19474          assign( oldV, mk_armg_calculate_flag_v() );
   19475          assign( oldC, updC
   19476                        ? mkU32((imm32 >> 31) & 1)
   19477                        : mk_armg_calculate_flag_c() );
   19478          setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
   19479          DIP("%s.w r%u, #%u\n", isTST ? "tst" : "teq", rN, imm32);
   19480          goto decode_success;
   19481       }
   19482    }
   19483 
   19484    /* -------------- (T3) SUB{S}.W Rd, Rn, #constT -------------- */
   19485    /* -------------- (T3) RSB{S}.W Rd, Rn, #constT -------------- */
   19486    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19487        && (INSN0(9,5) == BITS5(0,1,1,0,1) // SUB
   19488            || INSN0(9,5) == BITS5(0,1,1,1,0)) // RSB
   19489        && INSN1(15,15) == 0) {
   19490       Bool isRSB = INSN0(9,5) == BITS5(0,1,1,1,0);
   19491       UInt bS    = INSN0(4,4);
   19492       UInt rN    = INSN0(3,0);
   19493       UInt rD    = INSN1(11,8);
   19494       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
   19495       /* but allow "sub{s}.w reg, sp, #constT
   19496          this is (T2) of "SUB (SP minus immediate)" */
   19497       if (!valid && !isRSB && rN == 13 && rD != 15)
   19498          valid = True;
   19499       if (valid) {
   19500          IRTemp argL  = newTemp(Ity_I32);
   19501          IRTemp argR  = newTemp(Ity_I32);
   19502          IRTemp res   = newTemp(Ity_I32);
   19503          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
   19504          assign(argL, getIRegT(rN));
   19505          assign(argR, mkU32(imm32));
   19506          assign(res,  isRSB
   19507                       ? binop(Iop_Sub32, mkexpr(argR), mkexpr(argL))
   19508                       : binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
   19509          putIRegT(rD, mkexpr(res), condT);
   19510          if (bS == 1) {
   19511             if (isRSB)
   19512                setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
   19513             else
   19514                setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
   19515          }
   19516          DIP("%s%s.w r%u, r%u, #%u\n",
   19517              isRSB ? "rsb" : "sub", bS == 1 ? "s" : "", rD, rN, imm32);
   19518          goto decode_success;
   19519       }
   19520    }
   19521 
   19522    /* -------------- (T4) SUBW Rd, Rn, #uimm12 ------------------- */
   19523    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19524        && INSN0(9,4) == BITS6(1,0,1,0,1,0)
   19525        && INSN1(15,15) == 0) {
   19526       UInt rN = INSN0(3,0);
   19527       UInt rD = INSN1(11,8);
   19528       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
   19529       /* but allow "subw sp, sp, #uimm12" */
   19530       if (!valid && rD == 13 && rN == 13)
   19531          valid = True;
   19532       if (valid) {
   19533          IRTemp argL  = newTemp(Ity_I32);
   19534          IRTemp argR  = newTemp(Ity_I32);
   19535          IRTemp res   = newTemp(Ity_I32);
   19536          UInt imm12   = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
   19537          assign(argL, getIRegT(rN));
   19538          assign(argR, mkU32(imm12));
   19539          assign(res,  binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
   19540          putIRegT(rD, mkexpr(res), condT);
   19541          DIP("subw r%u, r%u, #%u\n", rD, rN, imm12);
   19542          goto decode_success;
   19543       }
   19544    }
   19545 
   19546    /* -------------- (T1) ADC{S}.W Rd, Rn, #constT -------------- */
   19547    /* -------------- (T1) SBC{S}.W Rd, Rn, #constT -------------- */
   19548    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19549        && (   INSN0(9,5) == BITS5(0,1,0,1,0)  // ADC
   19550            || INSN0(9,5) == BITS5(0,1,0,1,1)) // SBC
   19551        && INSN1(15,15) == 0) {
   19552       /* ADC:  Rd = Rn + constT + oldC */
   19553       /* SBC:  Rd = Rn - constT - (oldC ^ 1) */
   19554       UInt bS    = INSN0(4,4);
   19555       UInt rN    = INSN0(3,0);
   19556       UInt rD    = INSN1(11,8);
   19557       if (!isBadRegT(rN) && !isBadRegT(rD)) {
   19558          IRTemp argL  = newTemp(Ity_I32);
   19559          IRTemp argR  = newTemp(Ity_I32);
   19560          IRTemp res   = newTemp(Ity_I32);
   19561          IRTemp oldC  = newTemp(Ity_I32);
   19562          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
   19563          assign(argL, getIRegT(rN));
   19564          assign(argR, mkU32(imm32));
   19565          assign(oldC, mk_armg_calculate_flag_c() );
   19566          const HChar* nm  = "???";
   19567          switch (INSN0(9,5)) {
   19568             case BITS5(0,1,0,1,0): // ADC
   19569                nm = "adc";
   19570                assign(res,
   19571                       binop(Iop_Add32,
   19572                             binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
   19573                             mkexpr(oldC) ));
   19574                putIRegT(rD, mkexpr(res), condT);
   19575                if (bS)
   19576                   setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
   19577                                      argL, argR, oldC, condT );
   19578                break;
   19579             case BITS5(0,1,0,1,1): // SBC
   19580                nm = "sbc";
   19581                assign(res,
   19582                       binop(Iop_Sub32,
   19583                             binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
   19584                             binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
   19585                putIRegT(rD, mkexpr(res), condT);
   19586                if (bS)
   19587                   setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
   19588                                      argL, argR, oldC, condT );
   19589                break;
   19590             default:
   19591               vassert(0);
   19592          }
   19593          DIP("%s%s.w r%u, r%u, #%u\n",
   19594              nm, bS == 1 ? "s" : "", rD, rN, imm32);
   19595          goto decode_success;
   19596       }
   19597    }
   19598 
   19599    /* -------------- (T1) ORR{S}.W Rd, Rn, #constT -------------- */
   19600    /* -------------- (T1) AND{S}.W Rd, Rn, #constT -------------- */
   19601    /* -------------- (T1) BIC{S}.W Rd, Rn, #constT -------------- */
   19602    /* -------------- (T1) EOR{S}.W Rd, Rn, #constT -------------- */
   19603    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19604        && (   INSN0(9,5) == BITS5(0,0,0,1,0)  // ORR
   19605            || INSN0(9,5) == BITS5(0,0,0,0,0)  // AND
   19606            || INSN0(9,5) == BITS5(0,0,0,0,1)  // BIC
   19607            || INSN0(9,5) == BITS5(0,0,1,0,0)  // EOR
   19608            || INSN0(9,5) == BITS5(0,0,0,1,1)) // ORN
   19609        && INSN1(15,15) == 0) {
   19610       UInt bS = INSN0(4,4);
   19611       UInt rN = INSN0(3,0);
   19612       UInt rD = INSN1(11,8);
   19613       if (!isBadRegT(rN) && !isBadRegT(rD)) {
   19614          Bool   notArgR = False;
   19615          IROp   op      = Iop_INVALID;
   19616          const HChar* nm = "???";
   19617          switch (INSN0(9,5)) {
   19618             case BITS5(0,0,0,1,0): op = Iop_Or32;  nm = "orr"; break;
   19619             case BITS5(0,0,0,0,0): op = Iop_And32; nm = "and"; break;
   19620             case BITS5(0,0,0,0,1): op = Iop_And32; nm = "bic";
   19621                                    notArgR = True; break;
   19622             case BITS5(0,0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
   19623             case BITS5(0,0,0,1,1): op = Iop_Or32;  nm = "orn";
   19624                                    notArgR = True; break;
   19625             default: vassert(0);
   19626          }
   19627          IRTemp argL  = newTemp(Ity_I32);
   19628          IRTemp argR  = newTemp(Ity_I32);
   19629          IRTemp res   = newTemp(Ity_I32);
   19630          Bool   updC  = False;
   19631          UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
   19632          assign(argL, getIRegT(rN));
   19633          assign(argR, mkU32(notArgR ? ~imm32 : imm32));
   19634          assign(res,  binop(op, mkexpr(argL), mkexpr(argR)));
   19635          putIRegT(rD, mkexpr(res), condT);
   19636          if (bS) {
   19637             IRTemp oldV = newTemp(Ity_I32);
   19638             IRTemp oldC = newTemp(Ity_I32);
   19639             assign( oldV, mk_armg_calculate_flag_v() );
   19640             assign( oldC, updC
   19641                           ? mkU32((imm32 >> 31) & 1)
   19642                           : mk_armg_calculate_flag_c() );
   19643             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   19644                                condT );
   19645          }
   19646          DIP("%s%s.w r%u, r%u, #%u\n",
   19647              nm, bS == 1 ? "s" : "", rD, rN, imm32);
   19648          goto decode_success;
   19649       }
   19650    }
   19651 
   19652    /* ---------- (T3) ADD{S}.W Rd, Rn, Rm, {shift} ---------- */
   19653    /* ---------- (T3) SUB{S}.W Rd, Rn, Rm, {shift} ---------- */
   19654    /* ---------- (T3) RSB{S}.W Rd, Rn, Rm, {shift} ---------- */
   19655    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
   19656        && (   INSN0(8,5) == BITS4(1,0,0,0)  // add subopc
   19657            || INSN0(8,5) == BITS4(1,1,0,1)  // sub subopc
   19658            || INSN0(8,5) == BITS4(1,1,1,0)) // rsb subopc
   19659        && INSN1(15,15) == 0) {
   19660       UInt rN   = INSN0(3,0);
   19661       UInt rD   = INSN1(11,8);
   19662       UInt rM   = INSN1(3,0);
   19663       UInt bS   = INSN0(4,4);
   19664       UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
   19665       UInt how  = INSN1(5,4);
   19666 
   19667       Bool valid = !isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM);
   19668       /* but allow "add.w reg, sp, reg, lsl #N for N=0,1,2 or 3
   19669          (T3) "ADD (SP plus register) */
   19670       if (!valid && INSN0(8,5) == BITS4(1,0,0,0) // add
   19671           && rD != 15 && rN == 13 && imm5 <= 3 && how == 0) {
   19672          valid = True;
   19673       }
   19674       /* also allow "sub.w reg, sp, reg   lsl #N for N=0,1,2 or 3
   19675          (T1) "SUB (SP minus register) */
   19676       if (!valid && INSN0(8,5) == BITS4(1,1,0,1) // sub
   19677           && rD != 15 && rN == 13 && imm5 <= 3 && how == 0) {
   19678          valid = True;
   19679       }
   19680       if (valid) {
   19681          Bool   swap = False;
   19682          IROp   op   = Iop_INVALID;
   19683          const HChar* nm = "???";
   19684          switch (INSN0(8,5)) {
   19685             case BITS4(1,0,0,0): op = Iop_Add32; nm = "add"; break;
   19686             case BITS4(1,1,0,1): op = Iop_Sub32; nm = "sub"; break;
   19687             case BITS4(1,1,1,0): op = Iop_Sub32; nm = "rsb";
   19688                                  swap = True; break;
   19689             default: vassert(0);
   19690          }
   19691 
   19692          IRTemp argL = newTemp(Ity_I32);
   19693          assign(argL, getIRegT(rN));
   19694 
   19695          IRTemp rMt = newTemp(Ity_I32);
   19696          assign(rMt, getIRegT(rM));
   19697 
   19698          IRTemp argR = newTemp(Ity_I32);
   19699          compute_result_and_C_after_shift_by_imm5(
   19700             dis_buf, &argR, NULL, rMt, how, imm5, rM
   19701          );
   19702 
   19703          IRTemp res = newTemp(Ity_I32);
   19704          assign(res, swap
   19705                      ? binop(op, mkexpr(argR), mkexpr(argL))
   19706                      : binop(op, mkexpr(argL), mkexpr(argR)));
   19707 
   19708          putIRegT(rD, mkexpr(res), condT);
   19709          if (bS) {
   19710             switch (op) {
   19711                case Iop_Add32:
   19712                   setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
   19713                   break;
   19714                case Iop_Sub32:
   19715                   if (swap)
   19716                      setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
   19717                   else
   19718                      setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
   19719                   break;
   19720                default:
   19721                   vassert(0);
   19722             }
   19723          }
   19724 
   19725          DIP("%s%s.w r%u, r%u, %s\n",
   19726              nm, bS ? "s" : "", rD, rN, dis_buf);
   19727          goto decode_success;
   19728       }
   19729    }
   19730 
   19731    /* ---------- (T3) ADC{S}.W Rd, Rn, Rm, {shift} ---------- */
   19732    /* ---------- (T2) SBC{S}.W Rd, Rn, Rm, {shift} ---------- */
   19733    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
   19734        && (   INSN0(8,5) == BITS4(1,0,1,0)   // adc subopc
   19735            || INSN0(8,5) == BITS4(1,0,1,1))  // sbc subopc
   19736        && INSN1(15,15) == 0) {
   19737       /* ADC:  Rd = Rn + shifter_operand + oldC */
   19738       /* SBC:  Rd = Rn - shifter_operand - (oldC ^ 1) */
   19739       UInt rN = INSN0(3,0);
   19740       UInt rD = INSN1(11,8);
   19741       UInt rM = INSN1(3,0);
   19742       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   19743          UInt bS   = INSN0(4,4);
   19744          UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
   19745          UInt how  = INSN1(5,4);
   19746 
   19747          IRTemp argL = newTemp(Ity_I32);
   19748          assign(argL, getIRegT(rN));
   19749 
   19750          IRTemp rMt = newTemp(Ity_I32);
   19751          assign(rMt, getIRegT(rM));
   19752 
   19753          IRTemp oldC = newTemp(Ity_I32);
   19754          assign(oldC, mk_armg_calculate_flag_c());
   19755 
   19756          IRTemp argR = newTemp(Ity_I32);
   19757          compute_result_and_C_after_shift_by_imm5(
   19758             dis_buf, &argR, NULL, rMt, how, imm5, rM
   19759          );
   19760 
   19761          const HChar* nm  = "???";
   19762          IRTemp res = newTemp(Ity_I32);
   19763          switch (INSN0(8,5)) {
   19764             case BITS4(1,0,1,0): // ADC
   19765                nm = "adc";
   19766                assign(res,
   19767                       binop(Iop_Add32,
   19768                             binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
   19769                             mkexpr(oldC) ));
   19770                putIRegT(rD, mkexpr(res), condT);
   19771                if (bS)
   19772                   setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
   19773                                      argL, argR, oldC, condT );
   19774                break;
   19775             case BITS4(1,0,1,1): // SBC
   19776                nm = "sbc";
   19777                assign(res,
   19778                       binop(Iop_Sub32,
   19779                             binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
   19780                             binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
   19781                putIRegT(rD, mkexpr(res), condT);
   19782                if (bS)
   19783                   setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
   19784                                      argL, argR, oldC, condT );
   19785                break;
   19786             default:
   19787                vassert(0);
   19788          }
   19789 
   19790          DIP("%s%s.w r%u, r%u, %s\n",
   19791              nm, bS ? "s" : "", rD, rN, dis_buf);
   19792          goto decode_success;
   19793       }
   19794    }
   19795 
   19796    /* ---------- (T3) AND{S}.W Rd, Rn, Rm, {shift} ---------- */
   19797    /* ---------- (T3) ORR{S}.W Rd, Rn, Rm, {shift} ---------- */
   19798    /* ---------- (T3) EOR{S}.W Rd, Rn, Rm, {shift} ---------- */
   19799    /* ---------- (T3) BIC{S}.W Rd, Rn, Rm, {shift} ---------- */
   19800    /* ---------- (T1) ORN{S}.W Rd, Rn, Rm, {shift} ---------- */
   19801    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
   19802        && (   INSN0(8,5) == BITS4(0,0,0,0)  // and subopc
   19803            || INSN0(8,5) == BITS4(0,0,1,0)  // orr subopc
   19804            || INSN0(8,5) == BITS4(0,1,0,0)  // eor subopc
   19805            || INSN0(8,5) == BITS4(0,0,0,1)  // bic subopc
   19806            || INSN0(8,5) == BITS4(0,0,1,1)) // orn subopc
   19807        && INSN1(15,15) == 0) {
   19808       UInt rN = INSN0(3,0);
   19809       UInt rD = INSN1(11,8);
   19810       UInt rM = INSN1(3,0);
   19811       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   19812          Bool notArgR = False;
   19813          IROp op      = Iop_INVALID;
   19814          const HChar* nm  = "???";
   19815          switch (INSN0(8,5)) {
   19816             case BITS4(0,0,0,0): op = Iop_And32; nm = "and"; break;
   19817             case BITS4(0,0,1,0): op = Iop_Or32;  nm = "orr"; break;
   19818             case BITS4(0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
   19819             case BITS4(0,0,0,1): op = Iop_And32; nm = "bic";
   19820                                  notArgR = True; break;
   19821             case BITS4(0,0,1,1): op = Iop_Or32; nm = "orn";
   19822                                  notArgR = True; break;
   19823             default: vassert(0);
   19824          }
   19825          UInt bS   = INSN0(4,4);
   19826          UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
   19827          UInt how  = INSN1(5,4);
   19828 
   19829          IRTemp rNt = newTemp(Ity_I32);
   19830          assign(rNt, getIRegT(rN));
   19831 
   19832          IRTemp rMt = newTemp(Ity_I32);
   19833          assign(rMt, getIRegT(rM));
   19834 
   19835          IRTemp argR = newTemp(Ity_I32);
   19836          IRTemp oldC = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
   19837 
   19838          compute_result_and_C_after_shift_by_imm5(
   19839             dis_buf, &argR, bS ? &oldC : NULL, rMt, how, imm5, rM
   19840          );
   19841 
   19842          IRTemp res = newTemp(Ity_I32);
   19843          if (notArgR) {
   19844             vassert(op == Iop_And32 || op == Iop_Or32);
   19845             assign(res, binop(op, mkexpr(rNt),
   19846                                   unop(Iop_Not32, mkexpr(argR))));
   19847          } else {
   19848             assign(res, binop(op, mkexpr(rNt), mkexpr(argR)));
   19849          }
   19850 
   19851          putIRegT(rD, mkexpr(res), condT);
   19852          if (bS) {
   19853             IRTemp oldV = newTemp(Ity_I32);
   19854             assign( oldV, mk_armg_calculate_flag_v() );
   19855             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   19856                                condT );
   19857          }
   19858 
   19859          DIP("%s%s.w r%u, r%u, %s\n",
   19860              nm, bS ? "s" : "", rD, rN, dis_buf);
   19861          goto decode_success;
   19862       }
   19863    }
   19864 
   19865    /* -------------- (T?) LSL{S}.W Rd, Rn, Rm -------------- */
   19866    /* -------------- (T?) LSR{S}.W Rd, Rn, Rm -------------- */
   19867    /* -------------- (T?) ASR{S}.W Rd, Rn, Rm -------------- */
   19868    /* -------------- (T?) ROR{S}.W Rd, Rn, Rm -------------- */
   19869    if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,0,0)
   19870        && INSN1(15,12) == BITS4(1,1,1,1)
   19871        && INSN1(7,4) == BITS4(0,0,0,0)) {
   19872       UInt how = INSN0(6,5); // standard encoding
   19873       UInt rN  = INSN0(3,0);
   19874       UInt rD  = INSN1(11,8);
   19875       UInt rM  = INSN1(3,0);
   19876       UInt bS  = INSN0(4,4);
   19877       Bool valid = !isBadRegT(rN) && !isBadRegT(rM) && !isBadRegT(rD);
   19878       if (valid) {
   19879          IRTemp rNt    = newTemp(Ity_I32);
   19880          IRTemp rMt    = newTemp(Ity_I32);
   19881          IRTemp res    = newTemp(Ity_I32);
   19882          IRTemp oldC   = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
   19883          IRTemp oldV   = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
   19884          const HChar* nms[4] = { "lsl", "lsr", "asr", "ror" };
   19885          const HChar* nm     = nms[how];
   19886          assign(rNt, getIRegT(rN));
   19887          assign(rMt, getIRegT(rM));
   19888          compute_result_and_C_after_shift_by_reg(
   19889             dis_buf, &res, bS ? &oldC : NULL,
   19890             rNt, how, rMt, rN, rM
   19891          );
   19892          if (bS)
   19893             assign(oldV, mk_armg_calculate_flag_v());
   19894          putIRegT(rD, mkexpr(res), condT);
   19895          if (bS) {
   19896             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   19897                                condT );
   19898          }
   19899          DIP("%s%s.w r%u, r%u, r%u\n",
   19900              nm, bS ? "s" : "", rD, rN, rM);
   19901          goto decode_success;
   19902       }
   19903    }
   19904 
   19905    /* ------------ (T?) MOV{S}.W Rd, Rn, {shift} ------------ */
   19906    /* ------------ (T?) MVN{S}.W Rd, Rn, {shift} ------------ */
   19907    if ((INSN0(15,0) & 0xFFCF) == 0xEA4F
   19908        && INSN1(15,15) == 0) {
   19909       UInt rD      = INSN1(11,8);
   19910       UInt rN      = INSN1(3,0);
   19911       UInt bS      = INSN0(4,4);
   19912       UInt isMVN   = INSN0(5,5);
   19913       Bool regsOK  = (bS || isMVN)
   19914                         ? (!isBadRegT(rD) && !isBadRegT(rN))
   19915                         : (rD != 15 && rN != 15 && (rD != 13 || rN != 13));
   19916       if (regsOK) {
   19917          UInt imm5  = (INSN1(14,12) << 2) | INSN1(7,6);
   19918          UInt how   = INSN1(5,4);
   19919 
   19920          IRTemp rNt = newTemp(Ity_I32);
   19921          assign(rNt, getIRegT(rN));
   19922 
   19923          IRTemp oldRn = newTemp(Ity_I32);
   19924          IRTemp oldC  = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
   19925          compute_result_and_C_after_shift_by_imm5(
   19926             dis_buf, &oldRn, bS ? &oldC : NULL, rNt, how, imm5, rN
   19927          );
   19928 
   19929          IRTemp res = newTemp(Ity_I32);
   19930          assign(res, isMVN ? unop(Iop_Not32, mkexpr(oldRn))
   19931                            : mkexpr(oldRn));
   19932 
   19933          putIRegT(rD, mkexpr(res), condT);
   19934          if (bS) {
   19935             IRTemp oldV = newTemp(Ity_I32);
   19936             assign( oldV, mk_armg_calculate_flag_v() );
   19937             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT);
   19938          }
   19939          DIP("%s%s.w r%u, %s\n",
   19940              isMVN ? "mvn" : "mov", bS ? "s" : "", rD, dis_buf);
   19941          goto decode_success;
   19942       }
   19943    }
   19944 
   19945    /* -------------- (T?) TST.W Rn, Rm, {shift} -------------- */
   19946    /* -------------- (T?) TEQ.W Rn, Rm, {shift} -------------- */
   19947    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
   19948        && (   INSN0(8,4) == BITS5(0,0,0,0,1)  // TST
   19949            || INSN0(8,4) == BITS5(0,1,0,0,1)) // TEQ
   19950        && INSN1(15,15) == 0
   19951        && INSN1(11,8) == BITS4(1,1,1,1)) {
   19952       UInt rN = INSN0(3,0);
   19953       UInt rM = INSN1(3,0);
   19954       if (!isBadRegT(rN) && !isBadRegT(rM)) {
   19955          Bool isTST = INSN0(8,4) == BITS5(0,0,0,0,1);
   19956 
   19957          UInt how  = INSN1(5,4);
   19958          UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
   19959 
   19960          IRTemp argL = newTemp(Ity_I32);
   19961          assign(argL, getIRegT(rN));
   19962 
   19963          IRTemp rMt = newTemp(Ity_I32);
   19964          assign(rMt, getIRegT(rM));
   19965 
   19966          IRTemp argR = newTemp(Ity_I32);
   19967          IRTemp oldC = newTemp(Ity_I32);
   19968          compute_result_and_C_after_shift_by_imm5(
   19969             dis_buf, &argR, &oldC, rMt, how, imm5, rM
   19970          );
   19971 
   19972          IRTemp oldV = newTemp(Ity_I32);
   19973          assign( oldV, mk_armg_calculate_flag_v() );
   19974 
   19975          IRTemp res = newTemp(Ity_I32);
   19976          assign(res, binop(isTST ? Iop_And32 : Iop_Xor32,
   19977                            mkexpr(argL), mkexpr(argR)));
   19978 
   19979          setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   19980                             condT );
   19981          DIP("%s.w r%u, %s\n", isTST ? "tst" : "teq", rN, dis_buf);
   19982          goto decode_success;
   19983       }
   19984    }
   19985 
   19986    /* -------------- (T3) CMP.W Rn, Rm, {shift} -------------- */
   19987    /* -------------- (T2) CMN.W Rn, Rm, {shift} -------------- */
   19988    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
   19989        && (   INSN0(8,4) == BITS5(1,1,0,1,1)  // CMP
   19990            || INSN0(8,4) == BITS5(1,0,0,0,1)) // CMN
   19991        && INSN1(15,15) == 0
   19992        && INSN1(11,8) == BITS4(1,1,1,1)) {
   19993       UInt rN = INSN0(3,0);
   19994       UInt rM = INSN1(3,0);
   19995       if (!isBadRegT(rN) && !isBadRegT(rM)) {
   19996          Bool isCMN = INSN0(8,4) == BITS5(1,0,0,0,1);
   19997          UInt how   = INSN1(5,4);
   19998          UInt imm5  = (INSN1(14,12) << 2) | INSN1(7,6);
   19999 
   20000          IRTemp argL = newTemp(Ity_I32);
   20001          assign(argL, getIRegT(rN));
   20002 
   20003          IRTemp rMt = newTemp(Ity_I32);
   20004          assign(rMt, getIRegT(rM));
   20005 
   20006          IRTemp argR = newTemp(Ity_I32);
   20007          compute_result_and_C_after_shift_by_imm5(
   20008             dis_buf, &argR, NULL, rMt, how, imm5, rM
   20009          );
   20010 
   20011          setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
   20012                          argL, argR, condT );
   20013 
   20014          DIP("%s.w r%u, %s\n", isCMN ? "cmn" : "cmp", rN, dis_buf);
   20015          goto decode_success;
   20016       }
   20017    }
   20018 
   20019    /* -------------- (T2) MOV{S}.W Rd, #constT -------------- */
   20020    /* -------------- (T2) MVN{S}.W Rd, #constT -------------- */
   20021    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   20022        && (   INSN0(9,5) == BITS5(0,0,0,1,0)  // MOV
   20023            || INSN0(9,5) == BITS5(0,0,0,1,1)) // MVN
   20024        && INSN0(3,0) == BITS4(1,1,1,1)
   20025        && INSN1(15,15) == 0) {
   20026       UInt rD = INSN1(11,8);
   20027       if (!isBadRegT(rD)) {
   20028          Bool   updC  = False;
   20029          UInt   bS    = INSN0(4,4);
   20030          Bool   isMVN = INSN0(5,5) == 1;
   20031          UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
   20032          IRTemp res   = newTemp(Ity_I32);
   20033          assign(res, mkU32(isMVN ? ~imm32 : imm32));
   20034          putIRegT(rD, mkexpr(res), condT);
   20035          if (bS) {
   20036             IRTemp oldV = newTemp(Ity_I32);
   20037             IRTemp oldC = newTemp(Ity_I32);
   20038             assign( oldV, mk_armg_calculate_flag_v() );
   20039             assign( oldC, updC
   20040                           ? mkU32((imm32 >> 31) & 1)
   20041                           : mk_armg_calculate_flag_c() );
   20042             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   20043                                condT );
   20044          }
   20045          DIP("%s%s.w r%u, #%u\n",
   20046              isMVN ? "mvn" : "mov", bS ? "s" : "", rD, imm32);
   20047          goto decode_success;
   20048       }
   20049    }
   20050 
   20051    /* -------------- (T3) MOVW Rd, #imm16 -------------- */
   20052    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   20053        && INSN0(9,4) == BITS6(1,0,0,1,0,0)
   20054        && INSN1(15,15) == 0) {
   20055       UInt rD = INSN1(11,8);
   20056       if (!isBadRegT(rD)) {
   20057          UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
   20058                       | (INSN1(14,12) << 8) | INSN1(7,0);
   20059          putIRegT(rD, mkU32(imm16), condT);
   20060          DIP("movw r%u, #%u\n", rD, imm16);
   20061          goto decode_success;
   20062       }
   20063    }
   20064 
   20065    /* ---------------- MOVT Rd, #imm16 ---------------- */
   20066    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   20067        && INSN0(9,4) == BITS6(1,0,1,1,0,0)
   20068        && INSN1(15,15) == 0) {
   20069       UInt rD = INSN1(11,8);
   20070       if (!isBadRegT(rD)) {
   20071          UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
   20072                       | (INSN1(14,12) << 8) | INSN1(7,0);
   20073          IRTemp res = newTemp(Ity_I32);
   20074          assign(res,
   20075                 binop(Iop_Or32,
   20076                       binop(Iop_And32, getIRegT(rD), mkU32(0xFFFF)),
   20077                       mkU32(imm16 << 16)));
   20078          putIRegT(rD, mkexpr(res), condT);
   20079          DIP("movt r%u, #%u\n", rD, imm16);
   20080          goto decode_success;
   20081       }
   20082    }
   20083 
   20084    /* ---------------- LD/ST reg+/-#imm8 ---------------- */
   20085    /* Loads and stores of the form:
   20086          op  Rt, [Rn, #-imm8]      or
   20087          op  Rt, [Rn], #+/-imm8    or
   20088          op  Rt, [Rn, #+/-imm8]!
   20089       where op is one of
   20090          ldrb ldrh ldr  ldrsb ldrsh
   20091          strb strh str
   20092    */
   20093    if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0) && INSN1(11,11) == 1) {
   20094       Bool   valid  = True;
   20095       Bool   syned  = False;
   20096       Bool   isST   = False;
   20097       IRType ty     = Ity_I8;
   20098       const HChar* nm = "???";
   20099 
   20100       switch (INSN0(8,4)) {
   20101          case BITS5(0,0,0,0,0):   // strb
   20102             nm = "strb"; isST = True; break;
   20103          case BITS5(0,0,0,0,1):   // ldrb
   20104             nm = "ldrb"; break;
   20105          case BITS5(1,0,0,0,1):   // ldrsb
   20106             nm = "ldrsb"; syned = True; break;
   20107          case BITS5(0,0,0,1,0):   // strh
   20108             nm = "strh"; ty = Ity_I16; isST = True; break;
   20109          case BITS5(0,0,0,1,1):   // ldrh
   20110             nm = "ldrh"; ty = Ity_I16; break;
   20111          case BITS5(1,0,0,1,1):   // ldrsh
   20112             nm = "ldrsh"; ty = Ity_I16; syned = True; break;
   20113          case BITS5(0,0,1,0,0):   // str
   20114             nm = "str"; ty = Ity_I32; isST = True; break;
   20115          case BITS5(0,0,1,0,1):
   20116             nm = "ldr"; ty = Ity_I32; break;  // ldr
   20117          default:
   20118             valid = False; break;
   20119       }
   20120 
   20121       UInt rN      = INSN0(3,0);
   20122       UInt rT      = INSN1(15,12);
   20123       UInt bP      = INSN1(10,10);
   20124       UInt bU      = INSN1(9,9);
   20125       UInt bW      = INSN1(8,8);
   20126       UInt imm8    = INSN1(7,0);
   20127       Bool loadsPC = False;
   20128 
   20129       if (valid) {
   20130          if (bP == 1 && bU == 1 && bW == 0)
   20131             valid = False;
   20132          if (bP == 0 && bW == 0)
   20133             valid = False;
   20134          if (rN == 15)
   20135             valid = False;
   20136          if (bW == 1 && rN == rT)
   20137             valid = False;
   20138          if (ty == Ity_I8 || ty == Ity_I16) {
   20139             if (isBadRegT(rT))
   20140                valid = False;
   20141          } else {
   20142             /* ty == Ity_I32 */
   20143             if (isST && rT == 15)
   20144                valid = False;
   20145             if (!isST && rT == 15)
   20146                loadsPC = True;
   20147          }
   20148       }
   20149 
   20150       if (valid) {
   20151          // if it's a branch, it can't happen in the middle of an IT block
   20152          // Also, if it is a branch, make it unconditional at this point.
   20153          // Doing conditional branches in-line is too complex (for now)
   20154          if (loadsPC) {
   20155             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   20156             // go uncond
   20157             mk_skip_over_T32_if_cond_is_false(condT);
   20158             condT = IRTemp_INVALID;
   20159             // now uncond
   20160          }
   20161 
   20162          IRTemp preAddr = newTemp(Ity_I32);
   20163          assign(preAddr, getIRegT(rN));
   20164 
   20165          IRTemp postAddr = newTemp(Ity_I32);
   20166          assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
   20167                                 mkexpr(preAddr), mkU32(imm8)));
   20168 
   20169          IRTemp transAddr = bP == 1 ? postAddr : preAddr;
   20170 
   20171          if (isST) {
   20172 
   20173             /* Store.  If necessary, update the base register before
   20174                the store itself, so that the common idiom of "str rX,
   20175                [sp, #-4]!" (store rX at sp-4, then do new sp = sp-4,
   20176                a.k.a "push rX") doesn't cause Memcheck to complain
   20177                that the access is below the stack pointer.  Also, not
   20178                updating sp before the store confuses Valgrind's
   20179                dynamic stack-extending logic.  So do it before the
   20180                store.  Hence we need to snarf the store data before
   20181                doing the basereg update. */
   20182 
   20183             /* get hold of the data to be stored */
   20184             IRTemp oldRt = newTemp(Ity_I32);
   20185             assign(oldRt, getIRegT(rT));
   20186 
   20187             /* Update Rn if necessary. */
   20188             if (bW == 1) {
   20189                vassert(rN != rT); // assured by validity check above
   20190                putIRegT(rN, mkexpr(postAddr), condT);
   20191             }
   20192 
   20193             /* generate the transfer */
   20194             IRExpr* data = NULL;
   20195             switch (ty) {
   20196                case Ity_I8:
   20197                   data = unop(Iop_32to8, mkexpr(oldRt));
   20198                   break;
   20199                case Ity_I16:
   20200                   data = unop(Iop_32to16, mkexpr(oldRt));
   20201                   break;
   20202                case Ity_I32:
   20203                   data = mkexpr(oldRt);
   20204                   break;
   20205                default:
   20206                   vassert(0);
   20207             }
   20208             storeGuardedLE(mkexpr(transAddr), data, condT);
   20209 
   20210          } else {
   20211 
   20212             /* Load. */
   20213             IRTemp llOldRt = newTemp(Ity_I32);
   20214             assign(llOldRt, llGetIReg(rT));
   20215 
   20216             /* generate the transfer */
   20217             IRTemp    newRt = newTemp(Ity_I32);
   20218             IRLoadGOp widen = ILGop_INVALID;
   20219             switch (ty) {
   20220                case Ity_I8:
   20221                   widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
   20222                case Ity_I16:
   20223                   widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
   20224                case Ity_I32:
   20225                   widen = ILGop_Ident32; break;
   20226                default:
   20227                   vassert(0);
   20228             }
   20229             loadGuardedLE(newRt, widen,
   20230                           mkexpr(transAddr), mkexpr(llOldRt), condT);
   20231             if (rT == 15) {
   20232                vassert(loadsPC);
   20233                /* We'll do the write to the PC just below */
   20234             } else {
   20235                vassert(!loadsPC);
   20236                /* IRTemp_INVALID is OK here because in the case where
   20237                   condT is false at run time, we're just putting the
   20238                   old rT value back. */
   20239                putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   20240             }
   20241 
   20242             /* Update Rn if necessary. */
   20243             if (bW == 1) {
   20244                vassert(rN != rT); // assured by validity check above
   20245                putIRegT(rN, mkexpr(postAddr), condT);
   20246             }
   20247 
   20248             if (loadsPC) {
   20249                /* Presumably this is an interworking branch. */
   20250                vassert(rN != 15); // assured by validity check above
   20251                vassert(rT == 15);
   20252                vassert(condT == IRTemp_INVALID); /* due to check above */
   20253                llPutIReg(15, mkexpr(newRt));
   20254                dres.jk_StopHere = Ijk_Boring;  /* or _Ret ? */
   20255                dres.whatNext    = Dis_StopHere;
   20256             }
   20257          }
   20258 
   20259          if (bP == 1 && bW == 0) {
   20260             DIP("%s.w r%u, [r%u, #%c%u]\n",
   20261                 nm, rT, rN, bU ? '+' : '-', imm8);
   20262          }
   20263          else if (bP == 1 && bW == 1) {
   20264             DIP("%s.w r%u, [r%u, #%c%u]!\n",
   20265                 nm, rT, rN, bU ? '+' : '-', imm8);
   20266          }
   20267          else {
   20268             vassert(bP == 0 && bW == 1);
   20269             DIP("%s.w r%u, [r%u], #%c%u\n",
   20270                 nm, rT, rN, bU ? '+' : '-', imm8);
   20271          }
   20272 
   20273          goto decode_success;
   20274       }
   20275    }
   20276 
   20277    /* ------------- LD/ST reg+(reg<<imm2) ------------- */
   20278    /* Loads and stores of the form:
   20279          op  Rt, [Rn, Rm, LSL #imm8]
   20280       where op is one of
   20281          ldrb ldrh ldr  ldrsb ldrsh
   20282          strb strh str
   20283    */
   20284    if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)
   20285        && INSN1(11,6) == BITS6(0,0,0,0,0,0)) {
   20286       Bool   valid  = True;
   20287       Bool   syned  = False;
   20288       Bool   isST   = False;
   20289       IRType ty     = Ity_I8;
   20290       const HChar* nm = "???";
   20291 
   20292       switch (INSN0(8,4)) {
   20293          case BITS5(0,0,0,0,0):   // strb
   20294             nm = "strb"; isST = True; break;
   20295          case BITS5(0,0,0,0,1):   // ldrb
   20296             nm = "ldrb"; break;
   20297          case BITS5(1,0,0,0,1):   // ldrsb
   20298             nm = "ldrsb"; syned = True; break;
   20299          case BITS5(0,0,0,1,0):   // strh
   20300             nm = "strh"; ty = Ity_I16; isST = True; break;
   20301          case BITS5(0,0,0,1,1):   // ldrh
   20302             nm = "ldrh"; ty = Ity_I16; break;
   20303          case BITS5(1,0,0,1,1):   // ldrsh
   20304             nm = "ldrsh"; ty = Ity_I16; syned = True; break;
   20305          case BITS5(0,0,1,0,0):   // str
   20306             nm = "str"; ty = Ity_I32; isST = True; break;
   20307          case BITS5(0,0,1,0,1):
   20308             nm = "ldr"; ty = Ity_I32; break;  // ldr
   20309          default:
   20310             valid = False; break;
   20311       }
   20312 
   20313       UInt rN      = INSN0(3,0);
   20314       UInt rM      = INSN1(3,0);
   20315       UInt rT      = INSN1(15,12);
   20316       UInt imm2    = INSN1(5,4);
   20317       Bool loadsPC = False;
   20318 
   20319       if (ty == Ity_I8 || ty == Ity_I16) {
   20320          /* all 8- and 16-bit load and store cases have the
   20321             same exclusion set. */
   20322          if (rN == 15 || isBadRegT(rT) || isBadRegT(rM))
   20323             valid = False;
   20324       } else {
   20325          vassert(ty == Ity_I32);
   20326          if (rN == 15 || isBadRegT(rM))
   20327             valid = False;
   20328          if (isST && rT == 15)
   20329             valid = False;
   20330          /* If it is a load and rT is 15, that's only allowable if we
   20331             not in an IT block, or are the last in it.  Need to insert
   20332             a dynamic check for that. */
   20333          if (!isST && rT == 15)
   20334             loadsPC = True;
   20335       }
   20336 
   20337       if (valid) {
   20338          // if it's a branch, it can't happen in the middle of an IT block
   20339          // Also, if it is a branch, make it unconditional at this point.
   20340          // Doing conditional branches in-line is too complex (for now)
   20341          if (loadsPC) {
   20342             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   20343             // go uncond
   20344             mk_skip_over_T32_if_cond_is_false(condT);
   20345             condT = IRTemp_INVALID;
   20346             // now uncond
   20347          }
   20348 
   20349          IRTemp transAddr = newTemp(Ity_I32);
   20350          assign(transAddr,
   20351                 binop( Iop_Add32,
   20352                        getIRegT(rN),
   20353                        binop(Iop_Shl32, getIRegT(rM), mkU8(imm2)) ));
   20354 
   20355          if (isST) {
   20356 
   20357             /* get hold of the data to be stored */
   20358             IRTemp oldRt = newTemp(Ity_I32);
   20359             assign(oldRt, getIRegT(rT));
   20360 
   20361             /* generate the transfer */
   20362             IRExpr* data = NULL;
   20363             switch (ty) {
   20364                case Ity_I8:
   20365                   data = unop(Iop_32to8, mkexpr(oldRt));
   20366                   break;
   20367                case Ity_I16:
   20368                   data = unop(Iop_32to16, mkexpr(oldRt));
   20369                   break;
   20370               case Ity_I32:
   20371                   data = mkexpr(oldRt);
   20372                   break;
   20373               default:
   20374                  vassert(0);
   20375             }
   20376             storeGuardedLE(mkexpr(transAddr), data, condT);
   20377 
   20378          } else {
   20379 
   20380             /* Load. */
   20381             IRTemp llOldRt = newTemp(Ity_I32);
   20382             assign(llOldRt, llGetIReg(rT));
   20383 
   20384             /* generate the transfer */
   20385             IRTemp    newRt = newTemp(Ity_I32);
   20386             IRLoadGOp widen = ILGop_INVALID;
   20387             switch (ty) {
   20388                case Ity_I8:
   20389                   widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
   20390                case Ity_I16:
   20391                   widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
   20392                case Ity_I32:
   20393                   widen = ILGop_Ident32; break;
   20394                default:
   20395                   vassert(0);
   20396             }
   20397             loadGuardedLE(newRt, widen,
   20398                           mkexpr(transAddr), mkexpr(llOldRt), condT);
   20399 
   20400             if (rT == 15) {
   20401                vassert(loadsPC);
   20402                /* We'll do the write to the PC just below */
   20403             } else {
   20404                vassert(!loadsPC);
   20405                /* IRTemp_INVALID is OK here because in the case where
   20406                   condT is false at run time, we're just putting the
   20407                   old rT value back. */
   20408                putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   20409             }
   20410 
   20411             if (loadsPC) {
   20412                /* Presumably this is an interworking branch. */
   20413                vassert(rN != 15); // assured by validity check above
   20414                vassert(rT == 15);
   20415                vassert(condT == IRTemp_INVALID); /* due to check above */
   20416                llPutIReg(15, mkexpr(newRt));
   20417                dres.jk_StopHere = Ijk_Boring;  /* or _Ret ? */
   20418                dres.whatNext    = Dis_StopHere;
   20419             }
   20420          }
   20421 
   20422          DIP("%s.w r%u, [r%u, r%u, LSL #%u]\n",
   20423              nm, rT, rN, rM, imm2);
   20424 
   20425          goto decode_success;
   20426       }
   20427    }
   20428 
   20429    /* --------------- LD/ST reg+imm12 --------------- */
   20430    /* Loads and stores of the form:
   20431          op  Rt, [Rn, #+-imm12]
   20432       where op is one of
   20433          ldrb ldrh ldr  ldrsb ldrsh
   20434          strb strh str
   20435    */
   20436    if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)) {
   20437       Bool   valid  = True;
   20438       Bool   syned  = INSN0(8,8) == 1;
   20439       Bool   isST   = False;
   20440       IRType ty     = Ity_I8;
   20441       UInt   bU     = INSN0(7,7); // 1: +imm   0: -imm
   20442                                   // -imm is only supported by literal versions
   20443       const HChar* nm = "???";
   20444 
   20445       switch (INSN0(6,4)) {
   20446          case BITS3(0,0,0):   // strb
   20447             nm = "strb"; isST = True; break;
   20448          case BITS3(0,0,1):   // ldrb
   20449             nm = syned ? "ldrsb" : "ldrb"; break;
   20450          case BITS3(0,1,0):   // strh
   20451             nm = "strh"; ty = Ity_I16; isST = True; break;
   20452          case BITS3(0,1,1):   // ldrh
   20453             nm = syned ? "ldrsh" : "ldrh"; ty = Ity_I16; break;
   20454          case BITS3(1,0,0):   // str
   20455             nm = "str"; ty = Ity_I32; isST = True; break;
   20456          case BITS3(1,0,1):
   20457             nm = "ldr"; ty = Ity_I32; break;  // ldr
   20458          default:
   20459             valid = False; break;
   20460       }
   20461 
   20462       UInt rN      = INSN0(3,0);
   20463       UInt rT      = INSN1(15,12);
   20464       UInt imm12   = INSN1(11,0);
   20465       Bool loadsPC = False;
   20466 
   20467       if (rN != 15 && bU == 0) {
   20468          // only pc supports #-imm12
   20469          valid = False;
   20470       }
   20471 
   20472       if (isST) {
   20473          if (syned) valid = False;
   20474          if (rN == 15 || rT == 15)
   20475             valid = False;
   20476       } else {
   20477          /* For a 32-bit load, rT == 15 is only allowable if we are not
   20478             in an IT block, or are the last in it.  Need to insert
   20479             a dynamic check for that.  Also, in this particular
   20480             case, rN == 15 is allowable.  In this case however, the
   20481             value obtained for rN is (apparently)
   20482             "word-align(address of current insn + 4)". */
   20483          if (rT == 15) {
   20484             if (ty == Ity_I32)
   20485                loadsPC = True;
   20486             else // Can't do it for B/H loads
   20487                valid = False;
   20488          }
   20489       }
   20490 
   20491       if (valid) {
   20492          // if it's a branch, it can't happen in the middle of an IT block
   20493          // Also, if it is a branch, make it unconditional at this point.
   20494          // Doing conditional branches in-line is too complex (for now)
   20495          if (loadsPC) {
   20496             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   20497             // go uncond
   20498             mk_skip_over_T32_if_cond_is_false(condT);
   20499             condT = IRTemp_INVALID;
   20500             // now uncond
   20501          }
   20502 
   20503          IRTemp rNt = newTemp(Ity_I32);
   20504          if (rN == 15) {
   20505             vassert(!isST);
   20506             assign(rNt, binop(Iop_And32, getIRegT(15), mkU32(~3)));
   20507          } else {
   20508             assign(rNt, getIRegT(rN));
   20509          }
   20510 
   20511          IRTemp transAddr = newTemp(Ity_I32);
   20512          assign(transAddr,
   20513                 binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
   20514                       mkexpr(rNt), mkU32(imm12)));
   20515 
   20516          IRTemp oldRt = newTemp(Ity_I32);
   20517          assign(oldRt, getIRegT(rT));
   20518 
   20519          IRTemp llOldRt = newTemp(Ity_I32);
   20520          assign(llOldRt, llGetIReg(rT));
   20521 
   20522          if (isST) {
   20523             IRExpr* data = NULL;
   20524             switch (ty) {
   20525                case Ity_I8:
   20526                   data = unop(Iop_32to8, mkexpr(oldRt));
   20527                   break;
   20528                case Ity_I16:
   20529                   data = unop(Iop_32to16, mkexpr(oldRt));
   20530                   break;
   20531               case Ity_I32:
   20532                   data = mkexpr(oldRt);
   20533                   break;
   20534               default:
   20535                  vassert(0);
   20536             }
   20537             storeGuardedLE(mkexpr(transAddr), data, condT);
   20538          } else {
   20539             IRTemp    newRt = newTemp(Ity_I32);
   20540             IRLoadGOp widen = ILGop_INVALID;
   20541             switch (ty) {
   20542                case Ity_I8:
   20543                   widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
   20544                case Ity_I16:
   20545                   widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
   20546                case Ity_I32:
   20547                   widen = ILGop_Ident32; break;
   20548                default:
   20549                   vassert(0);
   20550             }
   20551             loadGuardedLE(newRt, widen,
   20552                           mkexpr(transAddr), mkexpr(llOldRt), condT);
   20553             if (rT == 15) {
   20554                vassert(loadsPC);
   20555                /* We'll do the write to the PC just below */
   20556             } else {
   20557                vassert(!loadsPC);
   20558                /* IRTemp_INVALID is OK here because in the case where
   20559                   condT is false at run time, we're just putting the
   20560                   old rT value back. */
   20561                putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   20562             }
   20563 
   20564             if (loadsPC) {
   20565                /* Presumably this is an interworking branch. */
   20566                vassert(rT == 15);
   20567                vassert(condT == IRTemp_INVALID); /* due to check above */
   20568                llPutIReg(15, mkexpr(newRt));
   20569                dres.jk_StopHere = Ijk_Boring;
   20570                dres.whatNext    = Dis_StopHere;
   20571             }
   20572          }
   20573 
   20574          DIP("%s.w r%u, [r%u, +#%u]\n", nm, rT, rN, imm12);
   20575 
   20576          goto decode_success;
   20577       }
   20578    }
   20579 
   20580    /* -------------- LDRD/STRD reg+/-#imm8 -------------- */
   20581    /* Doubleword loads and stores of the form:
   20582          ldrd/strd  Rt, Rt2, [Rn, #+/-imm8]    or
   20583          ldrd/strd  Rt, Rt2, [Rn], #+/-imm8    or
   20584          ldrd/strd  Rt, Rt2, [Rn, #+/-imm8]!
   20585    */
   20586    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,0) && INSN0(6,6) == 1) {
   20587       UInt bP   = INSN0(8,8);
   20588       UInt bU   = INSN0(7,7);
   20589       UInt bW   = INSN0(5,5);
   20590       UInt bL   = INSN0(4,4);  // 1: load  0: store
   20591       UInt rN   = INSN0(3,0);
   20592       UInt rT   = INSN1(15,12);
   20593       UInt rT2  = INSN1(11,8);
   20594       UInt imm8 = INSN1(7,0);
   20595 
   20596       Bool valid = True;
   20597       if (bP == 0 && bW == 0)                 valid = False;
   20598       if (bW == 1 && (rN == rT || rN == rT2)) valid = False;
   20599       if (isBadRegT(rT) || isBadRegT(rT2))    valid = False;
   20600       if (bL == 1 && rT == rT2)               valid = False;
   20601       /* It's OK to use PC as the base register only in the
   20602          following case: ldrd Rt, Rt2, [PC, #+/-imm8] */
   20603       if (rN == 15 && (bL == 0/*store*/
   20604                        || bW == 1/*wb*/))     valid = False;
   20605 
   20606       if (valid) {
   20607          IRTemp preAddr = newTemp(Ity_I32);
   20608          assign(preAddr, 15 == rN
   20609                            ? binop(Iop_And32, getIRegT(15), mkU32(~3U))
   20610                            : getIRegT(rN));
   20611 
   20612          IRTemp postAddr = newTemp(Ity_I32);
   20613          assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
   20614                                 mkexpr(preAddr), mkU32(imm8 << 2)));
   20615 
   20616          IRTemp transAddr = bP == 1 ? postAddr : preAddr;
   20617 
   20618          /* For almost all cases, we do the writeback after the transfers.
   20619             However, that leaves the stack "uncovered" in this case:
   20620                strd    rD, [sp, #-8]
   20621             In which case, do the writeback to SP now, instead of later.
   20622             This is bad in that it makes the insn non-restartable if the
   20623             accesses fault, but at least keeps Memcheck happy. */
   20624          Bool writeback_already_done = False;
   20625          if (bL == 0/*store*/ && bW == 1/*wb*/
   20626              && rN == 13 && rN != rT && rN != rT2
   20627              && bU == 0/*minus*/ && (imm8 << 2) == 8) {
   20628             putIRegT(rN, mkexpr(postAddr), condT);
   20629             writeback_already_done = True;
   20630          }
   20631 
   20632          if (bL == 0) {
   20633             IRTemp oldRt  = newTemp(Ity_I32);
   20634             IRTemp oldRt2 = newTemp(Ity_I32);
   20635             assign(oldRt,  getIRegT(rT));
   20636             assign(oldRt2, getIRegT(rT2));
   20637             storeGuardedLE( mkexpr(transAddr),
   20638                             mkexpr(oldRt), condT );
   20639             storeGuardedLE( binop(Iop_Add32, mkexpr(transAddr), mkU32(4)),
   20640                             mkexpr(oldRt2), condT );
   20641          } else {
   20642             IRTemp oldRt  = newTemp(Ity_I32);
   20643             IRTemp oldRt2 = newTemp(Ity_I32);
   20644             IRTemp newRt  = newTemp(Ity_I32);
   20645             IRTemp newRt2 = newTemp(Ity_I32);
   20646             assign(oldRt,  llGetIReg(rT));
   20647             assign(oldRt2, llGetIReg(rT2));
   20648             loadGuardedLE( newRt, ILGop_Ident32,
   20649                            mkexpr(transAddr),
   20650                            mkexpr(oldRt), condT );
   20651             loadGuardedLE( newRt2, ILGop_Ident32,
   20652                            binop(Iop_Add32, mkexpr(transAddr), mkU32(4)),
   20653                            mkexpr(oldRt2), condT );
   20654             /* Put unconditionally, since we already switched on the condT
   20655                in the guarded loads. */
   20656             putIRegT(rT,  mkexpr(newRt),  IRTemp_INVALID);
   20657             putIRegT(rT2, mkexpr(newRt2), IRTemp_INVALID);
   20658          }
   20659 
   20660          if (bW == 1 && !writeback_already_done) {
   20661             putIRegT(rN, mkexpr(postAddr), condT);
   20662          }
   20663 
   20664          const HChar* nm = bL ? "ldrd" : "strd";
   20665 
   20666          if (bP == 1 && bW == 0) {
   20667             DIP("%s.w r%u, r%u, [r%u, #%c%u]\n",
   20668                 nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
   20669          }
   20670          else if (bP == 1 && bW == 1) {
   20671             DIP("%s.w r%u, r%u, [r%u, #%c%u]!\n",
   20672                 nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
   20673          }
   20674          else {
   20675             vassert(bP == 0 && bW == 1);
   20676             DIP("%s.w r%u, r%u, [r%u], #%c%u\n",
   20677                 nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
   20678          }
   20679 
   20680          goto decode_success;
   20681       }
   20682    }
   20683 
   20684    /* -------------- (T3) Bcond.W label -------------- */
   20685    /* This variant carries its own condition, so can't be part of an
   20686       IT block ... */
   20687    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   20688        && INSN1(15,14) == BITS2(1,0)
   20689        && INSN1(12,12) == 0) {
   20690       UInt cond = INSN0(9,6);
   20691       if (cond != ARMCondAL && cond != ARMCondNV) {
   20692          Int simm21
   20693             =   (INSN0(10,10) << (1 + 1 + 6 + 11 + 1))
   20694               | (INSN1(11,11) << (1 + 6 + 11 + 1))
   20695               | (INSN1(13,13) << (6 + 11 + 1))
   20696               | (INSN0(5,0)   << (11 + 1))
   20697               | (INSN1(10,0)  << 1);
   20698          simm21 = (simm21 << 11) >> 11;
   20699 
   20700          vassert(0 == (guest_R15_curr_instr_notENC & 1));
   20701          UInt dst = simm21 + guest_R15_curr_instr_notENC + 4;
   20702 
   20703          /* Not allowed in an IT block; SIGILL if so. */
   20704          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   20705 
   20706          IRTemp kondT = newTemp(Ity_I32);
   20707          assign( kondT, mk_armg_calculate_condition(cond) );
   20708          stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
   20709                             Ijk_Boring,
   20710                             IRConst_U32(dst | 1/*CPSR.T*/),
   20711                             OFFB_R15T ));
   20712          llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 4)
   20713                               | 1 /*CPSR.T*/ ));
   20714          dres.jk_StopHere = Ijk_Boring;
   20715          dres.whatNext    = Dis_StopHere;
   20716          DIP("b%s.w 0x%x\n", nCC(cond), dst);
   20717          goto decode_success;
   20718       }
   20719    }
   20720 
   20721    /* ---------------- (T4) B.W label ---------------- */
   20722    /* ... whereas this variant doesn't carry its own condition, so it
   20723       has to be either unconditional or the conditional by virtue of
   20724       being the last in an IT block.  The upside is that there's 4
   20725       more bits available for the jump offset, so it has a 16-times
   20726       greater branch range than the T3 variant. */
   20727    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   20728        && INSN1(15,14) == BITS2(1,0)
   20729        && INSN1(12,12) == 1) {
   20730       if (1) {
   20731          UInt bS  = INSN0(10,10);
   20732          UInt bJ1 = INSN1(13,13);
   20733          UInt bJ2 = INSN1(11,11);
   20734          UInt bI1 = 1 ^ (bJ1 ^ bS);
   20735          UInt bI2 = 1 ^ (bJ2 ^ bS);
   20736          Int simm25
   20737             =   (bS          << (1 + 1 + 10 + 11 + 1))
   20738               | (bI1         << (1 + 10 + 11 + 1))
   20739               | (bI2         << (10 + 11 + 1))
   20740               | (INSN0(9,0)  << (11 + 1))
   20741               | (INSN1(10,0) << 1);
   20742          simm25 = (simm25 << 7) >> 7;
   20743 
   20744          vassert(0 == (guest_R15_curr_instr_notENC & 1));
   20745          UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
   20746 
   20747          /* If in an IT block, must be the last insn. */
   20748          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   20749 
   20750          // go uncond
   20751          mk_skip_over_T32_if_cond_is_false(condT);
   20752          condT = IRTemp_INVALID;
   20753          // now uncond
   20754 
   20755          // branch to dst
   20756          llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
   20757          dres.jk_StopHere = Ijk_Boring;
   20758          dres.whatNext    = Dis_StopHere;
   20759          DIP("b.w 0x%x\n", dst);
   20760          goto decode_success;
   20761       }
   20762    }
   20763 
   20764    /* ------------------ TBB, TBH ------------------ */
   20765    if (INSN0(15,4) == 0xE8D && INSN1(15,5) == 0x780) {
   20766       UInt rN = INSN0(3,0);
   20767       UInt rM = INSN1(3,0);
   20768       UInt bH = INSN1(4,4);
   20769       if (bH/*ATC*/ || (rN != 13 && !isBadRegT(rM))) {
   20770          /* Must be last or not-in IT block */
   20771          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   20772          /* Go uncond */
   20773          mk_skip_over_T32_if_cond_is_false(condT);
   20774          condT = IRTemp_INVALID;
   20775 
   20776          IRExpr* ea
   20777              = binop(Iop_Add32,
   20778                      getIRegT(rN),
   20779                      bH ? binop(Iop_Shl32, getIRegT(rM), mkU8(1))
   20780                         : getIRegT(rM));
   20781 
   20782          IRTemp delta = newTemp(Ity_I32);
   20783          if (bH) {
   20784             assign(delta, unop(Iop_16Uto32, loadLE(Ity_I16, ea)));
   20785          } else {
   20786             assign(delta, unop(Iop_8Uto32, loadLE(Ity_I8, ea)));
   20787          }
   20788 
   20789          llPutIReg(
   20790             15,
   20791             binop(Iop_Or32,
   20792                   binop(Iop_Add32,
   20793                         getIRegT(15),
   20794                         binop(Iop_Shl32, mkexpr(delta), mkU8(1))
   20795                   ),
   20796                   mkU32(1)
   20797          ));
   20798          dres.jk_StopHere = Ijk_Boring;
   20799          dres.whatNext    = Dis_StopHere;
   20800          DIP("tb%c [r%u, r%u%s]\n",
   20801              bH ? 'h' : 'b', rN, rM, bH ? ", LSL #1" : "");
   20802          goto decode_success;
   20803       }
   20804    }
   20805 
   20806    /* ------------------ UBFX ------------------ */
   20807    /* ------------------ SBFX ------------------ */
   20808    /* There's also ARM versions of same, but it doesn't seem worth the
   20809       hassle to common up the handling (it's only a couple of C
   20810       statements). */
   20811    if ((INSN0(15,4) == 0xF3C // UBFX
   20812         || INSN0(15,4) == 0xF34) // SBFX
   20813        && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
   20814       UInt rN  = INSN0(3,0);
   20815       UInt rD  = INSN1(11,8);
   20816       UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
   20817       UInt wm1 = INSN1(4,0);
   20818       UInt msb =  lsb + wm1;
   20819       if (!isBadRegT(rD) && !isBadRegT(rN) && msb <= 31) {
   20820          Bool   isU  = INSN0(15,4) == 0xF3C;
   20821          IRTemp src  = newTemp(Ity_I32);
   20822          IRTemp tmp  = newTemp(Ity_I32);
   20823          IRTemp res  = newTemp(Ity_I32);
   20824          UInt   mask = ((1 << wm1) - 1) + (1 << wm1);
   20825          vassert(msb >= 0 && msb <= 31);
   20826          vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
   20827 
   20828          assign(src, getIRegT(rN));
   20829          assign(tmp, binop(Iop_And32,
   20830                            binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
   20831                            mkU32(mask)));
   20832          assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
   20833                            binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
   20834                            mkU8(31-wm1)));
   20835 
   20836          putIRegT(rD, mkexpr(res), condT);
   20837 
   20838          DIP("%s r%u, r%u, #%u, #%u\n",
   20839              isU ? "ubfx" : "sbfx", rD, rN, lsb, wm1 + 1);
   20840          goto decode_success;
   20841       }
   20842    }
   20843 
   20844    /* ------------------ UXTB ------------------ */
   20845    /* ------------------ UXTH ------------------ */
   20846    /* ------------------ SXTB ------------------ */
   20847    /* ------------------ SXTH ------------------ */
   20848    /* ----------------- UXTB16 ----------------- */
   20849    /* ----------------- SXTB16 ----------------- */
   20850    /* FIXME: this is an exact duplicate of the ARM version.  They
   20851       should be commoned up. */
   20852    if ((INSN0(15,0) == 0xFA5F     // UXTB
   20853         || INSN0(15,0) == 0xFA1F  // UXTH
   20854         || INSN0(15,0) == 0xFA4F  // SXTB
   20855         || INSN0(15,0) == 0xFA0F  // SXTH
   20856         || INSN0(15,0) == 0xFA3F  // UXTB16
   20857         || INSN0(15,0) == 0xFA2F) // SXTB16
   20858        && INSN1(15,12) == BITS4(1,1,1,1)
   20859        && INSN1(7,6) == BITS2(1,0)) {
   20860       UInt rD = INSN1(11,8);
   20861       UInt rM = INSN1(3,0);
   20862       UInt rot = INSN1(5,4);
   20863       if (!isBadRegT(rD) && !isBadRegT(rM)) {
   20864          const HChar* nm = "???";
   20865          IRTemp srcT = newTemp(Ity_I32);
   20866          IRTemp rotT = newTemp(Ity_I32);
   20867          IRTemp dstT = newTemp(Ity_I32);
   20868          assign(srcT, getIRegT(rM));
   20869          assign(rotT, genROR32(srcT, 8 * rot));
   20870          switch (INSN0(15,0)) {
   20871             case 0xFA5F: // UXTB
   20872                nm = "uxtb";
   20873                assign(dstT, unop(Iop_8Uto32,
   20874                                  unop(Iop_32to8, mkexpr(rotT))));
   20875                break;
   20876             case 0xFA1F: // UXTH
   20877                nm = "uxth";
   20878                assign(dstT, unop(Iop_16Uto32,
   20879                                  unop(Iop_32to16, mkexpr(rotT))));
   20880                break;
   20881             case 0xFA4F: // SXTB
   20882                nm = "sxtb";
   20883                assign(dstT, unop(Iop_8Sto32,
   20884                                  unop(Iop_32to8, mkexpr(rotT))));
   20885                break;
   20886             case 0xFA0F: // SXTH
   20887                nm = "sxth";
   20888                assign(dstT, unop(Iop_16Sto32,
   20889                                  unop(Iop_32to16, mkexpr(rotT))));
   20890                break;
   20891             case 0xFA3F: // UXTB16
   20892                nm = "uxtb16";
   20893                assign(dstT, binop(Iop_And32, mkexpr(rotT),
   20894                                              mkU32(0x00FF00FF)));
   20895                break;
   20896             case 0xFA2F: { // SXTB16
   20897                nm = "sxtb16";
   20898                IRTemp lo32 = newTemp(Ity_I32);
   20899                IRTemp hi32 = newTemp(Ity_I32);
   20900                assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
   20901                assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
   20902                assign(
   20903                   dstT,
   20904                   binop(Iop_Or32,
   20905                         binop(Iop_And32,
   20906                               unop(Iop_8Sto32,
   20907                                    unop(Iop_32to8, mkexpr(lo32))),
   20908                               mkU32(0xFFFF)),
   20909                         binop(Iop_Shl32,
   20910                               unop(Iop_8Sto32,
   20911                                    unop(Iop_32to8, mkexpr(hi32))),
   20912                               mkU8(16))
   20913                ));
   20914                break;
   20915             }
   20916             default:
   20917                vassert(0);
   20918          }
   20919          putIRegT(rD, mkexpr(dstT), condT);
   20920          DIP("%s r%u, r%u, ror #%u\n", nm, rD, rM, 8 * rot);
   20921          goto decode_success;
   20922       }
   20923    }
   20924 
   20925    /* -------------- MUL.W Rd, Rn, Rm -------------- */
   20926    if (INSN0(15,4) == 0xFB0
   20927        && (INSN1(15,0) & 0xF0F0) == 0xF000) {
   20928       UInt rN = INSN0(3,0);
   20929       UInt rD = INSN1(11,8);
   20930       UInt rM = INSN1(3,0);
   20931       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   20932          IRTemp res = newTemp(Ity_I32);
   20933          assign(res, binop(Iop_Mul32, getIRegT(rN), getIRegT(rM)));
   20934          putIRegT(rD, mkexpr(res), condT);
   20935          DIP("mul.w r%u, r%u, r%u\n", rD, rN, rM);
   20936          goto decode_success;
   20937       }
   20938    }
   20939 
   20940    /* -------------- SDIV.W Rd, Rn, Rm -------------- */
   20941    if (INSN0(15,4) == 0xFB9
   20942        && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
   20943       UInt rN = INSN0(3,0);
   20944       UInt rD = INSN1(11,8);
   20945       UInt rM = INSN1(3,0);
   20946       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   20947          IRTemp res  = newTemp(Ity_I32);
   20948          IRTemp argL = newTemp(Ity_I32);
   20949          IRTemp argR = newTemp(Ity_I32);
   20950          assign(argL, getIRegT(rN));
   20951          assign(argR, getIRegT(rM));
   20952          assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
   20953          putIRegT(rD, mkexpr(res), condT);
   20954          DIP("sdiv.w r%u, r%u, r%u\n", rD, rN, rM);
   20955          goto decode_success;
   20956       }
   20957    }
   20958 
   20959    /* -------------- UDIV.W Rd, Rn, Rm -------------- */
   20960    if (INSN0(15,4) == 0xFBB
   20961        && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
   20962       UInt rN = INSN0(3,0);
   20963       UInt rD = INSN1(11,8);
   20964       UInt rM = INSN1(3,0);
   20965       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   20966          IRTemp res  = newTemp(Ity_I32);
   20967          IRTemp argL = newTemp(Ity_I32);
   20968          IRTemp argR = newTemp(Ity_I32);
   20969          assign(argL, getIRegT(rN));
   20970          assign(argR, getIRegT(rM));
   20971          assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
   20972          putIRegT(rD, mkexpr(res), condT);
   20973          DIP("udiv.w r%u, r%u, r%u\n", rD, rN, rM);
   20974          goto decode_success;
   20975       }
   20976    }
   20977 
   20978    /* ------------------ {U,S}MULL ------------------ */
   20979    if ((INSN0(15,4) == 0xFB8 || INSN0(15,4) == 0xFBA)
   20980        && INSN1(7,4) == BITS4(0,0,0,0)) {
   20981       UInt isU  = INSN0(5,5);
   20982       UInt rN   = INSN0(3,0);
   20983       UInt rDlo = INSN1(15,12);
   20984       UInt rDhi = INSN1(11,8);
   20985       UInt rM   = INSN1(3,0);
   20986       if (!isBadRegT(rDhi) && !isBadRegT(rDlo)
   20987           && !isBadRegT(rN) && !isBadRegT(rM) && rDlo != rDhi) {
   20988          IRTemp res   = newTemp(Ity_I64);
   20989          assign(res, binop(isU ? Iop_MullU32 : Iop_MullS32,
   20990                            getIRegT(rN), getIRegT(rM)));
   20991          putIRegT( rDhi, unop(Iop_64HIto32, mkexpr(res)), condT );
   20992          putIRegT( rDlo, unop(Iop_64to32, mkexpr(res)), condT );
   20993          DIP("%cmull r%u, r%u, r%u, r%u\n",
   20994              isU ? 'u' : 's', rDlo, rDhi, rN, rM);
   20995          goto decode_success;
   20996       }
   20997    }
   20998 
   20999    /* ------------------ ML{A,S} ------------------ */
   21000    if (INSN0(15,4) == 0xFB0
   21001        && (   INSN1(7,4) == BITS4(0,0,0,0)    // MLA
   21002            || INSN1(7,4) == BITS4(0,0,0,1))) { // MLS
   21003       UInt rN = INSN0(3,0);
   21004       UInt rA = INSN1(15,12);
   21005       UInt rD = INSN1(11,8);
   21006       UInt rM = INSN1(3,0);
   21007       if (!isBadRegT(rD) && !isBadRegT(rN)
   21008           && !isBadRegT(rM) && !isBadRegT(rA)) {
   21009          Bool   isMLA = INSN1(7,4) == BITS4(0,0,0,0);
   21010          IRTemp res   = newTemp(Ity_I32);
   21011          assign(res,
   21012                 binop(isMLA ? Iop_Add32 : Iop_Sub32,
   21013                       getIRegT(rA),
   21014                       binop(Iop_Mul32, getIRegT(rN), getIRegT(rM))));
   21015          putIRegT(rD, mkexpr(res), condT);
   21016          DIP("%s r%u, r%u, r%u, r%u\n",
   21017              isMLA ? "mla" : "mls", rD, rN, rM, rA);
   21018          goto decode_success;
   21019       }
   21020    }
   21021 
   21022    /* ------------------ (T3) ADR ------------------ */
   21023    if ((INSN0(15,0) == 0xF20F || INSN0(15,0) == 0xF60F)
   21024        && INSN1(15,15) == 0) {
   21025       /* rD = align4(PC) + imm32 */
   21026       UInt rD = INSN1(11,8);
   21027       if (!isBadRegT(rD)) {
   21028          UInt imm32 = (INSN0(10,10) << 11)
   21029                       | (INSN1(14,12) << 8) | INSN1(7,0);
   21030          putIRegT(rD, binop(Iop_Add32,
   21031                             binop(Iop_And32, getIRegT(15), mkU32(~3U)),
   21032                             mkU32(imm32)),
   21033                       condT);
   21034          DIP("add r%u, pc, #%u\n", rD, imm32);
   21035          goto decode_success;
   21036       }
   21037    }
   21038 
   21039    /* ----------------- (T1) UMLAL ----------------- */
   21040    /* ----------------- (T1) SMLAL ----------------- */
   21041    if ((INSN0(15,4) == 0xFBE // UMLAL
   21042         || INSN0(15,4) == 0xFBC) // SMLAL
   21043        && INSN1(7,4) == BITS4(0,0,0,0)) {
   21044       UInt rN   = INSN0(3,0);
   21045       UInt rDlo = INSN1(15,12);
   21046       UInt rDhi = INSN1(11,8);
   21047       UInt rM   = INSN1(3,0);
   21048       if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
   21049           && !isBadRegT(rM) && rDhi != rDlo) {
   21050          Bool   isS   = INSN0(15,4) == 0xFBC;
   21051          IRTemp argL  = newTemp(Ity_I32);
   21052          IRTemp argR  = newTemp(Ity_I32);
   21053          IRTemp old   = newTemp(Ity_I64);
   21054          IRTemp res   = newTemp(Ity_I64);
   21055          IRTemp resHi = newTemp(Ity_I32);
   21056          IRTemp resLo = newTemp(Ity_I32);
   21057          IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
   21058          assign( argL, getIRegT(rM));
   21059          assign( argR, getIRegT(rN));
   21060          assign( old, binop(Iop_32HLto64, getIRegT(rDhi), getIRegT(rDlo)) );
   21061          assign( res, binop(Iop_Add64,
   21062                             mkexpr(old),
   21063                             binop(mulOp, mkexpr(argL), mkexpr(argR))) );
   21064          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
   21065          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
   21066          putIRegT( rDhi, mkexpr(resHi), condT );
   21067          putIRegT( rDlo, mkexpr(resLo), condT );
   21068          DIP("%cmlal r%u, r%u, r%u, r%u\n",
   21069              isS ? 's' : 'u', rDlo, rDhi, rN, rM);
   21070          goto decode_success;
   21071       }
   21072    }
   21073 
   21074    /* ------------------ (T1) UMAAL ------------------ */
   21075    if (INSN0(15,4) == 0xFBE && INSN1(7,4) == BITS4(0,1,1,0)) {
   21076       UInt rN   = INSN0(3,0);
   21077       UInt rDlo = INSN1(15,12);
   21078       UInt rDhi = INSN1(11,8);
   21079       UInt rM   = INSN1(3,0);
   21080       if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
   21081           && !isBadRegT(rM) && rDhi != rDlo) {
   21082          IRTemp argN   = newTemp(Ity_I32);
   21083          IRTemp argM   = newTemp(Ity_I32);
   21084          IRTemp argDhi = newTemp(Ity_I32);
   21085          IRTemp argDlo = newTemp(Ity_I32);
   21086          IRTemp res    = newTemp(Ity_I64);
   21087          IRTemp resHi  = newTemp(Ity_I32);
   21088          IRTemp resLo  = newTemp(Ity_I32);
   21089          assign( argN,   getIRegT(rN) );
   21090          assign( argM,   getIRegT(rM) );
   21091          assign( argDhi, getIRegT(rDhi) );
   21092          assign( argDlo, getIRegT(rDlo) );
   21093          assign( res,
   21094                  binop(Iop_Add64,
   21095                        binop(Iop_Add64,
   21096                              binop(Iop_MullU32, mkexpr(argN), mkexpr(argM)),
   21097                              unop(Iop_32Uto64, mkexpr(argDhi))),
   21098                        unop(Iop_32Uto64, mkexpr(argDlo))) );
   21099          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
   21100          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
   21101          putIRegT( rDhi, mkexpr(resHi), condT );
   21102          putIRegT( rDlo, mkexpr(resLo), condT );
   21103          DIP("umaal r%u, r%u, r%u, r%u\n", rDlo, rDhi, rN, rM);
   21104          goto decode_success;
   21105       }
   21106    }
   21107 
   21108    /* ------------------- (T1) SMMUL{R} ------------------ */
   21109    if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
   21110        && INSN0(6,4) == BITS3(1,0,1)
   21111        && INSN1(15,12) == BITS4(1,1,1,1)
   21112        && INSN1(7,5) == BITS3(0,0,0)) {
   21113       UInt bitR = INSN1(4,4);
   21114       UInt rD = INSN1(11,8);
   21115       UInt rM = INSN1(3,0);
   21116       UInt rN = INSN0(3,0);
   21117       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   21118          IRExpr* res
   21119          = unop(Iop_64HIto32,
   21120                 binop(Iop_Add64,
   21121                       binop(Iop_MullS32, getIRegT(rN), getIRegT(rM)),
   21122                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
   21123          putIRegT(rD, res, condT);
   21124          DIP("smmul%s r%u, r%u, r%u\n",
   21125              bitR ? "r" : "", rD, rN, rM);
   21126          goto decode_success;
   21127       }
   21128    }
   21129 
   21130    /* ------------------- (T1) SMMLA{R} ------------------ */
   21131    if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
   21132        && INSN0(6,4) == BITS3(1,0,1)
   21133        && INSN1(7,5) == BITS3(0,0,0)) {
   21134       UInt bitR = INSN1(4,4);
   21135       UInt rA = INSN1(15,12);
   21136       UInt rD = INSN1(11,8);
   21137       UInt rM = INSN1(3,0);
   21138       UInt rN = INSN0(3,0);
   21139       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && (rA != 13)) {
   21140          IRExpr* res
   21141          = unop(Iop_64HIto32,
   21142                 binop(Iop_Add64,
   21143                       binop(Iop_Add64,
   21144                             binop(Iop_32HLto64, getIRegT(rA), mkU32(0)),
   21145                             binop(Iop_MullS32, getIRegT(rN), getIRegT(rM))),
   21146                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
   21147          putIRegT(rD, res, condT);
   21148          DIP("smmla%s r%u, r%u, r%u, r%u\n",
   21149              bitR ? "r" : "", rD, rN, rM, rA);
   21150          goto decode_success;
   21151       }
   21152    }
   21153 
   21154    /* ------------------ (T2) ADR ------------------ */
   21155    if ((INSN0(15,0) == 0xF2AF || INSN0(15,0) == 0xF6AF)
   21156        && INSN1(15,15) == 0) {
   21157       /* rD = align4(PC) - imm32 */
   21158       UInt rD = INSN1(11,8);
   21159       if (!isBadRegT(rD)) {
   21160          UInt imm32 = (INSN0(10,10) << 11)
   21161                       | (INSN1(14,12) << 8) | INSN1(7,0);
   21162          putIRegT(rD, binop(Iop_Sub32,
   21163                             binop(Iop_And32, getIRegT(15), mkU32(~3U)),
   21164                             mkU32(imm32)),
   21165                       condT);
   21166          DIP("sub r%u, pc, #%u\n", rD, imm32);
   21167          goto decode_success;
   21168       }
   21169    }
   21170 
   21171    /* ------------------- (T1) BFI ------------------- */
   21172    /* ------------------- (T1) BFC ------------------- */
   21173    if (INSN0(15,4) == 0xF36 && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
   21174       UInt rD  = INSN1(11,8);
   21175       UInt rN  = INSN0(3,0);
   21176       UInt msb = INSN1(4,0);
   21177       UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
   21178       if (isBadRegT(rD) || rN == 13 || msb < lsb) {
   21179          /* undecodable; fall through */
   21180       } else {
   21181          IRTemp src    = newTemp(Ity_I32);
   21182          IRTemp olddst = newTemp(Ity_I32);
   21183          IRTemp newdst = newTemp(Ity_I32);
   21184          UInt   mask = 1 << (msb - lsb);
   21185          mask = (mask - 1) + mask;
   21186          vassert(mask != 0); // guaranteed by "msb < lsb" check above
   21187          mask <<= lsb;
   21188 
   21189          assign(src, rN == 15 ? mkU32(0) : getIRegT(rN));
   21190          assign(olddst, getIRegT(rD));
   21191          assign(newdst,
   21192                 binop(Iop_Or32,
   21193                    binop(Iop_And32,
   21194                          binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
   21195                          mkU32(mask)),
   21196                    binop(Iop_And32,
   21197                          mkexpr(olddst),
   21198                          mkU32(~mask)))
   21199                );
   21200 
   21201          putIRegT(rD, mkexpr(newdst), condT);
   21202 
   21203          if (rN == 15) {
   21204             DIP("bfc r%u, #%u, #%u\n",
   21205                 rD, lsb, msb-lsb+1);
   21206          } else {
   21207             DIP("bfi r%u, r%u, #%u, #%u\n",
   21208                 rD, rN, lsb, msb-lsb+1);
   21209          }
   21210          goto decode_success;
   21211       }
   21212    }
   21213 
   21214    /* ------------------- (T1) SXTAH ------------------- */
   21215    /* ------------------- (T1) UXTAH ------------------- */
   21216    if ((INSN0(15,4) == 0xFA1      // UXTAH
   21217         || INSN0(15,4) == 0xFA0)  // SXTAH
   21218        && INSN1(15,12) == BITS4(1,1,1,1)
   21219        && INSN1(7,6) == BITS2(1,0)) {
   21220       Bool isU = INSN0(15,4) == 0xFA1;
   21221       UInt rN  = INSN0(3,0);
   21222       UInt rD  = INSN1(11,8);
   21223       UInt rM  = INSN1(3,0);
   21224       UInt rot = INSN1(5,4);
   21225       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   21226          IRTemp srcL = newTemp(Ity_I32);
   21227          IRTemp srcR = newTemp(Ity_I32);
   21228          IRTemp res  = newTemp(Ity_I32);
   21229          assign(srcR, getIRegT(rM));
   21230          assign(srcL, getIRegT(rN));
   21231          assign(res,  binop(Iop_Add32,
   21232                             mkexpr(srcL),
   21233                             unop(isU ? Iop_16Uto32 : Iop_16Sto32,
   21234                                  unop(Iop_32to16,
   21235                                       genROR32(srcR, 8 * rot)))));
   21236          putIRegT(rD, mkexpr(res), condT);
   21237          DIP("%cxtah r%u, r%u, r%u, ror #%u\n",
   21238              isU ? 'u' : 's', rD, rN, rM, rot);
   21239          goto decode_success;
   21240       }
   21241    }
   21242 
   21243    /* ------------------- (T1) SXTAB ------------------- */
   21244    /* ------------------- (T1) UXTAB ------------------- */
   21245    if ((INSN0(15,4) == 0xFA5      // UXTAB
   21246         || INSN0(15,4) == 0xFA4)  // SXTAB
   21247        && INSN1(15,12) == BITS4(1,1,1,1)
   21248        && INSN1(7,6) == BITS2(1,0)) {
   21249       Bool isU = INSN0(15,4) == 0xFA5;
   21250       UInt rN  = INSN0(3,0);
   21251       UInt rD  = INSN1(11,8);
   21252       UInt rM  = INSN1(3,0);
   21253       UInt rot = INSN1(5,4);
   21254       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   21255          IRTemp srcL = newTemp(Ity_I32);
   21256          IRTemp srcR = newTemp(Ity_I32);
   21257          IRTemp res  = newTemp(Ity_I32);
   21258          assign(srcR, getIRegT(rM));
   21259          assign(srcL, getIRegT(rN));
   21260          assign(res,  binop(Iop_Add32,
   21261                             mkexpr(srcL),
   21262                             unop(isU ? Iop_8Uto32 : Iop_8Sto32,
   21263                                  unop(Iop_32to8,
   21264                                       genROR32(srcR, 8 * rot)))));
   21265          putIRegT(rD, mkexpr(res), condT);
   21266          DIP("%cxtab r%u, r%u, r%u, ror #%u\n",
   21267              isU ? 'u' : 's', rD, rN, rM, rot);
   21268          goto decode_success;
   21269       }
   21270    }
   21271 
   21272    /* ------------------- (T1) CLZ ------------------- */
   21273    if (INSN0(15,4) == 0xFAB
   21274        && INSN1(15,12) == BITS4(1,1,1,1)
   21275        && INSN1(7,4) == BITS4(1,0,0,0)) {
   21276       UInt rM1 = INSN0(3,0);
   21277       UInt rD  = INSN1(11,8);
   21278       UInt rM2 = INSN1(3,0);
   21279       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
   21280          IRTemp arg = newTemp(Ity_I32);
   21281          IRTemp res = newTemp(Ity_I32);
   21282          assign(arg, getIRegT(rM1));
   21283          assign(res, IRExpr_ITE(
   21284                         binop(Iop_CmpEQ32, mkexpr(arg), mkU32(0)),
   21285                         mkU32(32),
   21286                         unop(Iop_Clz32, mkexpr(arg))
   21287          ));
   21288          putIRegT(rD, mkexpr(res), condT);
   21289          DIP("clz r%u, r%u\n", rD, rM1);
   21290          goto decode_success;
   21291       }
   21292    }
   21293 
   21294    /* ------------------- (T1) RBIT ------------------- */
   21295    if (INSN0(15,4) == 0xFA9
   21296        && INSN1(15,12) == BITS4(1,1,1,1)
   21297        && INSN1(7,4) == BITS4(1,0,1,0)) {
   21298       UInt rM1 = INSN0(3,0);
   21299       UInt rD  = INSN1(11,8);
   21300       UInt rM2 = INSN1(3,0);
   21301       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
   21302          IRTemp arg = newTemp(Ity_I32);
   21303          assign(arg, getIRegT(rM1));
   21304          IRTemp res = gen_BITREV(arg);
   21305          putIRegT(rD, mkexpr(res), condT);
   21306          DIP("rbit r%u, r%u\n", rD, rM1);
   21307          goto decode_success;
   21308       }
   21309    }
   21310 
   21311    /* ------------------- (T2) REV   ------------------- */
   21312    /* ------------------- (T2) REV16 ------------------- */
   21313    if (INSN0(15,4) == 0xFA9
   21314        && INSN1(15,12) == BITS4(1,1,1,1)
   21315        && (   INSN1(7,4) == BITS4(1,0,0,0)     // REV
   21316            || INSN1(7,4) == BITS4(1,0,0,1))) { // REV16
   21317       UInt rM1   = INSN0(3,0);
   21318       UInt rD    = INSN1(11,8);
   21319       UInt rM2   = INSN1(3,0);
   21320       Bool isREV = INSN1(7,4) == BITS4(1,0,0,0);
   21321       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
   21322          IRTemp arg = newTemp(Ity_I32);
   21323          assign(arg, getIRegT(rM1));
   21324          IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
   21325          putIRegT(rD, mkexpr(res), condT);
   21326          DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM1);
   21327          goto decode_success;
   21328       }
   21329    }
   21330 
   21331    /* ------------------- (T2) REVSH ------------------ */
   21332    if (INSN0(15,4) == 0xFA9
   21333        && INSN1(15,12) == BITS4(1,1,1,1)
   21334        && INSN1(7,4) == BITS4(1,0,1,1)) {
   21335       UInt rM1 = INSN0(3,0);
   21336       UInt rM2 = INSN1(3,0);
   21337       UInt rD  = INSN1(11,8);
   21338       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
   21339          IRTemp irt_rM  = newTemp(Ity_I32);
   21340          IRTemp irt_hi  = newTemp(Ity_I32);
   21341          IRTemp irt_low = newTemp(Ity_I32);
   21342          IRTemp irt_res = newTemp(Ity_I32);
   21343          assign(irt_rM, getIRegT(rM1));
   21344          assign(irt_hi,
   21345                 binop(Iop_Sar32,
   21346                       binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
   21347                       mkU8(16)
   21348                 )
   21349          );
   21350          assign(irt_low,
   21351                 binop(Iop_And32,
   21352                       binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
   21353                       mkU32(0xFF)
   21354                 )
   21355          );
   21356          assign(irt_res,
   21357                 binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
   21358          );
   21359          putIRegT(rD, mkexpr(irt_res), condT);
   21360          DIP("revsh r%u, r%u\n", rD, rM1);
   21361          goto decode_success;
   21362       }
   21363    }
   21364 
   21365    /* -------------- (T1) MSR apsr, reg -------------- */
   21366    if (INSN0(15,4) == 0xF38
   21367        && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(9,0) == 0x000) {
   21368       UInt rN          = INSN0(3,0);
   21369       UInt write_ge    = INSN1(10,10);
   21370       UInt write_nzcvq = INSN1(11,11);
   21371       if (!isBadRegT(rN) && (write_nzcvq || write_ge)) {
   21372          IRTemp rNt = newTemp(Ity_I32);
   21373          assign(rNt, getIRegT(rN));
   21374          desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
   21375          DIP("msr cpsr_%s%s, r%u\n",
   21376              write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
   21377          goto decode_success;
   21378       }
   21379    }
   21380 
   21381    /* -------------- (T1) MRS reg, apsr -------------- */
   21382    if (INSN0(15,0) == 0xF3EF
   21383        && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(7,0) == 0x00) {
   21384       UInt rD = INSN1(11,8);
   21385       if (!isBadRegT(rD)) {
   21386          IRTemp apsr = synthesise_APSR();
   21387          putIRegT( rD, mkexpr(apsr), condT );
   21388          DIP("mrs r%u, cpsr\n", rD);
   21389          goto decode_success;
   21390       }
   21391    }
   21392 
   21393    /* ----------------- (T1) LDREX ----------------- */
   21394    if (INSN0(15,4) == 0xE85 && INSN1(11,8) == BITS4(1,1,1,1)) {
   21395       UInt rN   = INSN0(3,0);
   21396       UInt rT   = INSN1(15,12);
   21397       UInt imm8 = INSN1(7,0);
   21398       if (!isBadRegT(rT) && rN != 15) {
   21399          IRTemp res;
   21400          // go uncond
   21401          mk_skip_over_T32_if_cond_is_false( condT );
   21402          // now uncond
   21403          res = newTemp(Ity_I32);
   21404          stmt( IRStmt_LLSC(Iend_LE,
   21405                            res,
   21406                            binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
   21407                            NULL/*this is a load*/ ));
   21408          putIRegT(rT, mkexpr(res), IRTemp_INVALID);
   21409          DIP("ldrex r%u, [r%u, #+%u]\n", rT, rN, imm8 * 4);
   21410          goto decode_success;
   21411       }
   21412    }
   21413 
   21414    /* --------------- (T1) LDREX{B,H} --------------- */
   21415    if (INSN0(15,4) == 0xE8D
   21416        && (INSN1(11,0) == 0xF4F || INSN1(11,0) == 0xF5F)) {
   21417       UInt rN  = INSN0(3,0);
   21418       UInt rT  = INSN1(15,12);
   21419       Bool isH = INSN1(11,0) == 0xF5F;
   21420       if (!isBadRegT(rT) && rN != 15) {
   21421          IRTemp res;
   21422          // go uncond
   21423          mk_skip_over_T32_if_cond_is_false( condT );
   21424          // now uncond
   21425          res = newTemp(isH ? Ity_I16 : Ity_I8);
   21426          stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
   21427                            NULL/*this is a load*/ ));
   21428          putIRegT(rT, unop(isH ? Iop_16Uto32 : Iop_8Uto32, mkexpr(res)),
   21429                       IRTemp_INVALID);
   21430          DIP("ldrex%c r%u, [r%u]\n", isH ? 'h' : 'b', rT, rN);
   21431          goto decode_success;
   21432       }
   21433    }
   21434 
   21435    /* --------------- (T1) LDREXD --------------- */
   21436    if (INSN0(15,4) == 0xE8D && INSN1(7,0) == 0x7F) {
   21437       UInt rN  = INSN0(3,0);
   21438       UInt rT  = INSN1(15,12);
   21439       UInt rT2 = INSN1(11,8);
   21440       if (!isBadRegT(rT) && !isBadRegT(rT2) && rT != rT2 && rN != 15) {
   21441          IRTemp res;
   21442          // go uncond
   21443          mk_skip_over_T32_if_cond_is_false( condT );
   21444          // now uncond
   21445          res = newTemp(Ity_I64);
   21446          // FIXME: assumes little-endian guest
   21447          stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
   21448                            NULL/*this is a load*/ ));
   21449          // FIXME: assumes little-endian guest
   21450          putIRegT(rT,  unop(Iop_64to32,   mkexpr(res)), IRTemp_INVALID);
   21451          putIRegT(rT2, unop(Iop_64HIto32, mkexpr(res)), IRTemp_INVALID);
   21452          DIP("ldrexd r%u, r%u, [r%u]\n", rT, rT2, rN);
   21453          goto decode_success;
   21454       }
   21455    }
   21456 
   21457    /* ----------------- (T1) STREX ----------------- */
   21458    if (INSN0(15,4) == 0xE84) {
   21459       UInt rN   = INSN0(3,0);
   21460       UInt rT   = INSN1(15,12);
   21461       UInt rD   = INSN1(11,8);
   21462       UInt imm8 = INSN1(7,0);
   21463       if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15
   21464           && rD != rN && rD != rT) {
   21465          IRTemp resSC1, resSC32;
   21466          // go uncond
   21467          mk_skip_over_T32_if_cond_is_false( condT );
   21468          // now uncond
   21469          /* Ok, now we're unconditional.  Do the store. */
   21470          resSC1 = newTemp(Ity_I1);
   21471          stmt( IRStmt_LLSC(Iend_LE,
   21472                            resSC1,
   21473                            binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
   21474                            getIRegT(rT)) );
   21475          /* Set rD to 1 on failure, 0 on success.  Currently we have
   21476             resSC1 == 0 on failure, 1 on success. */
   21477          resSC32 = newTemp(Ity_I32);
   21478          assign(resSC32,
   21479                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
   21480          putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
   21481          DIP("strex r%u, r%u, [r%u, #+%u]\n", rD, rT, rN, imm8 * 4);
   21482          goto decode_success;
   21483       }
   21484    }
   21485 
   21486    /* --------------- (T1) STREX{B,H} --------------- */
   21487    if (INSN0(15,4) == 0xE8C
   21488        && (INSN1(11,4) == 0xF4 || INSN1(11,4) == 0xF5)) {
   21489       UInt rN  = INSN0(3,0);
   21490       UInt rT  = INSN1(15,12);
   21491       UInt rD  = INSN1(3,0);
   21492       Bool isH = INSN1(11,4) == 0xF5;
   21493       if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15
   21494           && rD != rN && rD != rT) {
   21495          IRTemp resSC1, resSC32;
   21496          // go uncond
   21497          mk_skip_over_T32_if_cond_is_false( condT );
   21498          // now uncond
   21499          /* Ok, now we're unconditional.  Do the store. */
   21500          resSC1 = newTemp(Ity_I1);
   21501          stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN),
   21502                            unop(isH ? Iop_32to16 : Iop_32to8,
   21503                                 getIRegT(rT))) );
   21504          /* Set rD to 1 on failure, 0 on success.  Currently we have
   21505             resSC1 == 0 on failure, 1 on success. */
   21506          resSC32 = newTemp(Ity_I32);
   21507          assign(resSC32,
   21508                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
   21509          putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
   21510          DIP("strex%c r%u, r%u, [r%u]\n", isH ? 'h' : 'b', rD, rT, rN);
   21511          goto decode_success;
   21512       }
   21513    }
   21514 
   21515    /* ---------------- (T1) STREXD ---------------- */
   21516    if (INSN0(15,4) == 0xE8C && INSN1(7,4) == BITS4(0,1,1,1)) {
   21517       UInt rN  = INSN0(3,0);
   21518       UInt rT  = INSN1(15,12);
   21519       UInt rT2 = INSN1(11,8);
   21520       UInt rD  = INSN1(3,0);
   21521       if (!isBadRegT(rD) && !isBadRegT(rT) && !isBadRegT(rT2)
   21522           && rN != 15 && rD != rN && rD != rT && rD != rT2) {
   21523          IRTemp resSC1, resSC32, data;
   21524          // go uncond
   21525          mk_skip_over_T32_if_cond_is_false( condT );
   21526          // now uncond
   21527          /* Ok, now we're unconditional.  Do the store. */
   21528          resSC1 = newTemp(Ity_I1);
   21529          data = newTemp(Ity_I64);
   21530          // FIXME: assumes little-endian guest
   21531          assign(data, binop(Iop_32HLto64, getIRegT(rT2), getIRegT(rT)));
   21532          // FIXME: assumes little-endian guest
   21533          stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN), mkexpr(data)));
   21534          /* Set rD to 1 on failure, 0 on success.  Currently we have
   21535             resSC1 == 0 on failure, 1 on success. */
   21536          resSC32 = newTemp(Ity_I32);
   21537          assign(resSC32,
   21538                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
   21539          putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
   21540          DIP("strexd r%u, r%u, r%u, [r%u]\n", rD, rT, rT2, rN);
   21541          goto decode_success;
   21542       }
   21543    }
   21544 
   21545    /* -------------- v7 barrier insns -------------- */
   21546    if (INSN0(15,0) == 0xF3BF && (INSN1(15,0) & 0xFF00) == 0x8F00) {
   21547       /* FIXME: should this be unconditional? */
   21548       /* XXX this isn't really right, is it?  The generated IR does
   21549          them unconditionally.  I guess it doesn't matter since it
   21550          doesn't do any harm to do them even when the guarding
   21551          condition is false -- it's just a performance loss. */
   21552       switch (INSN1(7,0)) {
   21553          case 0x4F: /* DSB sy */
   21554          case 0x4E: /* DSB st */
   21555          case 0x4B: /* DSB ish */
   21556          case 0x4A: /* DSB ishst */
   21557          case 0x47: /* DSB nsh */
   21558          case 0x46: /* DSB nshst */
   21559          case 0x43: /* DSB osh */
   21560          case 0x42: /* DSB oshst */
   21561             stmt( IRStmt_MBE(Imbe_Fence) );
   21562             DIP("DSB\n");
   21563             goto decode_success;
   21564          case 0x5F: /* DMB sy */
   21565          case 0x5E: /* DMB st */
   21566          case 0x5B: /* DMB ish */
   21567          case 0x5A: /* DMB ishst */
   21568          case 0x57: /* DMB nsh */
   21569          case 0x56: /* DMB nshst */
   21570          case 0x53: /* DMB osh */
   21571          case 0x52: /* DMB oshst */
   21572             stmt( IRStmt_MBE(Imbe_Fence) );
   21573             DIP("DMB\n");
   21574             goto decode_success;
   21575          case 0x6F: /* ISB */
   21576             stmt( IRStmt_MBE(Imbe_Fence) );
   21577             DIP("ISB\n");
   21578             goto decode_success;
   21579          default:
   21580             break;
   21581       }
   21582    }
   21583 
   21584    /* ---------------------- PLD{,W} ---------------------- */
   21585    if ((INSN0(15,4) & 0xFFD) == 0xF89 && INSN1(15,12) == 0xF) {
   21586       /* FIXME: should this be unconditional? */
   21587       /* PLD/PLDW immediate, encoding T1 */
   21588       UInt rN    = INSN0(3,0);
   21589       UInt bW    = INSN0(5,5);
   21590       UInt imm12 = INSN1(11,0);
   21591       DIP("pld%s [r%u, #%u]\n", bW ? "w" : "",  rN, imm12);
   21592       goto decode_success;
   21593    }
   21594 
   21595    if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,8) == 0xFC) {
   21596       /* FIXME: should this be unconditional? */
   21597       /* PLD/PLDW immediate, encoding T2 */
   21598       UInt rN    = INSN0(3,0);
   21599       UInt bW    = INSN0(5,5);
   21600       UInt imm8  = INSN1(7,0);
   21601       DIP("pld%s [r%u, #-%u]\n", bW ? "w" : "",  rN, imm8);
   21602       goto decode_success;
   21603    }
   21604 
   21605    if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,6) == 0x3C0) {
   21606       /* FIXME: should this be unconditional? */
   21607       /* PLD/PLDW register, encoding T1 */
   21608       UInt rN   = INSN0(3,0);
   21609       UInt rM   = INSN1(3,0);
   21610       UInt bW   = INSN0(5,5);
   21611       UInt imm2 = INSN1(5,4);
   21612       if (!isBadRegT(rM)) {
   21613          DIP("pld%s [r%u, r%u, lsl %u]\n", bW ? "w" : "", rN, rM, imm2);
   21614          goto decode_success;
   21615       }
   21616       /* fall through */
   21617    }
   21618 
   21619    /* -------------- read CP15 TPIDRURO register ------------- */
   21620    /* mrc     p15, 0,  r0, c13, c0, 3  up to
   21621       mrc     p15, 0, r14, c13, c0, 3
   21622    */
   21623    /* I don't know whether this is really v7-only.  But anyway, we
   21624       have to support it since arm-linux uses TPIDRURO as a thread
   21625       state register. */
   21626    if ((INSN0(15,0) == 0xEE1D) && (INSN1(11,0) == 0x0F70)) {
   21627       UInt rD = INSN1(15,12);
   21628       if (!isBadRegT(rD)) {
   21629          putIRegT(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32), condT);
   21630          DIP("mrc p15,0, r%u, c13, c0, 3\n", rD);
   21631          goto decode_success;
   21632       }
   21633       /* fall through */
   21634    }
   21635 
   21636    /* -------------- read CP15 PMUSRENR register ------------- */
   21637    /* mrc     p15, 0, r0,  c9, c14, 0  up to
   21638       mrc     p15, 0, r14, c9, c14, 0
   21639       See comment on the ARM equivalent of this (above) for details.
   21640    */
   21641    if ((INSN0(15,0) == 0xEE19) && (INSN1(11,0) == 0x0F1E)) {
   21642       UInt rD = INSN1(15,12);
   21643       if (!isBadRegT(rD)) {
   21644          putIRegT(rD, mkU32(0), condT);
   21645          DIP("mrc p15,0, r%u, c9, c14, 0\n", rD);
   21646          goto decode_success;
   21647       }
   21648       /* fall through */
   21649    }
   21650 
   21651    /* ------------------- CLREX ------------------ */
   21652    if (INSN0(15,0) == 0xF3BF && INSN1(15,0) == 0x8F2F) {
   21653       /* AFAICS, this simply cancels a (all?) reservations made by a
   21654          (any?) preceding LDREX(es).  Arrange to hand it through to
   21655          the back end. */
   21656       mk_skip_over_T32_if_cond_is_false( condT );
   21657       stmt( IRStmt_MBE(Imbe_CancelReservation) );
   21658       DIP("clrex\n");
   21659       goto decode_success;
   21660    }
   21661 
   21662    /* ------------------- NOP ------------------ */
   21663    if (INSN0(15,0) == 0xF3AF && INSN1(15,0) == 0x8000) {
   21664       DIP("nop\n");
   21665       goto decode_success;
   21666    }
   21667 
   21668    /* -------------- (T1) LDRT reg+#imm8 -------------- */
   21669    /* Load Register Unprivileged:
   21670       ldrt Rt, [Rn, #imm8]
   21671    */
   21672    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,1) && INSN0(5,4) == BITS2(0,1)
   21673        && INSN1(11,8) == BITS4(1,1,1,0)) {
   21674       UInt rT    = INSN1(15,12);
   21675       UInt rN    = INSN0(3,0);
   21676       UInt imm8  = INSN1(7,0);
   21677       Bool valid = True;
   21678       if (rN == 15 || isBadRegT(rT)) valid = False;
   21679       if (valid) {
   21680          put_ITSTATE(old_itstate);
   21681          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   21682          IRTemp newRt = newTemp(Ity_I32);
   21683          loadGuardedLE( newRt, ILGop_Ident32, ea, llGetIReg(rT), condT );
   21684          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   21685          put_ITSTATE(new_itstate);
   21686          DIP("ldrt r%u, [r%u, #%u]\n", rT, rN, imm8);
   21687          goto decode_success;
   21688       }
   21689    }
   21690 
   21691    /* -------------- (T1) STRT reg+#imm8 -------------- */
   21692    /* Store Register Unprivileged:
   21693       strt Rt, [Rn, #imm8]
   21694    */
   21695    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,1) && INSN0(5,4) == BITS2(0,0)
   21696        && INSN1(11,8) == BITS4(1,1,1,0)) {
   21697       UInt rT    = INSN1(15,12);
   21698       UInt rN    = INSN0(3,0);
   21699       UInt imm8  = INSN1(7,0);
   21700       Bool valid = True;
   21701       if (rN == 15 || isBadRegT(rT)) valid = False;
   21702       if (valid) {
   21703          put_ITSTATE(old_itstate);
   21704          IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   21705          storeGuardedLE( address, llGetIReg(rT), condT );
   21706          put_ITSTATE(new_itstate);
   21707          DIP("strt r%u, [r%u, #%u]\n", rT, rN, imm8);
   21708          goto decode_success;
   21709       }
   21710    }
   21711 
   21712    /* -------------- (T1) STRBT reg+#imm8 -------------- */
   21713    /* Store Register Byte Unprivileged:
   21714       strbt Rt, [Rn, #imm8]
   21715    */
   21716    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(0,0)
   21717        && INSN1(11,8) == BITS4(1,1,1,0)) {
   21718       UInt rT    = INSN1(15,12);
   21719       UInt rN    = INSN0(3,0);
   21720       UInt imm8  = INSN1(7,0);
   21721       Bool valid = True;
   21722       if (rN == 15 || isBadRegT(rT)) valid = False;
   21723       if (valid) {
   21724          put_ITSTATE(old_itstate);
   21725          IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   21726          IRExpr* data = unop(Iop_32to8, llGetIReg(rT));
   21727          storeGuardedLE( address, data, condT );
   21728          put_ITSTATE(new_itstate);
   21729          DIP("strbt r%u, [r%u, #%u]\n", rT, rN, imm8);
   21730          goto decode_success;
   21731       }
   21732    }
   21733 
   21734    /* -------------- (T1) LDRHT reg+#imm8 -------------- */
   21735    /* Load Register Halfword Unprivileged:
   21736       ldrht Rt, [Rn, #imm8]
   21737    */
   21738    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(1,1)
   21739        && INSN1(11,8) == BITS4(1,1,1,0)) {
   21740       UInt rN    = INSN0(3,0);
   21741       Bool valid = True;
   21742       if (rN == 15) {
   21743          /* In this case our instruction is LDRH (literal), in fact:
   21744             LDRH (literal) was realized earlier, so we don't want to
   21745             make it twice. */
   21746          valid = False;
   21747       }
   21748       UInt rT    = INSN1(15,12);
   21749       UInt imm8  = INSN1(7,0);
   21750       if (isBadRegT(rT)) valid = False;
   21751       if (valid) {
   21752          put_ITSTATE(old_itstate);
   21753          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   21754          IRTemp newRt = newTemp(Ity_I32);
   21755          loadGuardedLE( newRt, ILGop_16Uto32, ea, llGetIReg(rT), condT );
   21756          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   21757          put_ITSTATE(new_itstate);
   21758          DIP("ldrht r%u, [r%u, #%u]\n", rT, rN, imm8);
   21759          goto decode_success;
   21760       }
   21761    }
   21762 
   21763    /* -------------- (T1) LDRSHT reg+#imm8 -------------- */
   21764    /* Load Register Signed Halfword Unprivileged:
   21765       ldrsht Rt, [Rn, #imm8]
   21766    */
   21767    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(1,1)
   21768        && INSN1(11,8) == BITS4(1,1,1,0)) {
   21769       UInt rN    = INSN0(3,0);
   21770       Bool valid = True;
   21771       if (rN == 15) {
   21772          /* In this case our instruction is LDRSH (literal), in fact:
   21773             LDRSH (literal) was realized earlier, so we don't want to
   21774             make it twice. */
   21775          valid = False;
   21776       }
   21777       UInt rT    = INSN1(15,12);
   21778       UInt imm8  = INSN1(7,0);
   21779       if (isBadRegT(rT)) valid = False;
   21780       if (valid) {
   21781          put_ITSTATE(old_itstate);
   21782          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   21783          IRTemp newRt = newTemp(Ity_I32);
   21784          loadGuardedLE( newRt, ILGop_16Sto32, ea, llGetIReg(rT), condT );
   21785          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   21786          put_ITSTATE(new_itstate);
   21787          DIP("ldrsht r%u, [r%u, #%u]\n", rT, rN, imm8);
   21788          goto decode_success;
   21789       }
   21790    }
   21791 
   21792    /* -------------- (T1) STRHT reg+#imm8 -------------- */
   21793    /* Store Register Halfword Unprivileged:
   21794       strht Rt, [Rn, #imm8]
   21795    */
   21796    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(1,0)
   21797        && INSN1(11,8) == BITS4(1,1,1,0)) {
   21798       UInt rT    = INSN1(15,12);
   21799       UInt rN    = INSN0(3,0);
   21800       UInt imm8  = INSN1(7,0);
   21801       Bool valid = True;
   21802       if (rN == 15 || isBadRegT(rT)) valid = False;
   21803       if (valid) {
   21804          put_ITSTATE(old_itstate);
   21805          IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   21806          IRExpr* data = unop(Iop_32to16, llGetIReg(rT));
   21807          storeGuardedLE( address, data, condT );
   21808          put_ITSTATE(new_itstate);
   21809          DIP("strht r%u, [r%u, #%u]\n", rT, rN, imm8);
   21810          goto decode_success;
   21811       }
   21812    }
   21813 
   21814    /* -------------- (T1) LDRBT reg+#imm8 -------------- */
   21815    /* Load Register Byte Unprivileged:
   21816       ldrbt Rt, [Rn, #imm8]
   21817    */
   21818    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(0,1)
   21819        && INSN1(11,8) == BITS4(1,1,1,0)) {
   21820       UInt rN    = INSN0(3,0);
   21821       UInt rT    = INSN1(15,12);
   21822       UInt imm8  = INSN1(7,0);
   21823       Bool valid = True;
   21824       if (rN == 15 /* insn is LDRB (literal) */) valid = False;
   21825       if (isBadRegT(rT)) valid = False;
   21826       if (valid) {
   21827          put_ITSTATE(old_itstate);
   21828          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   21829          IRTemp newRt = newTemp(Ity_I32);
   21830          loadGuardedLE( newRt, ILGop_8Uto32, ea, llGetIReg(rT), condT );
   21831          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   21832          put_ITSTATE(new_itstate);
   21833          DIP("ldrbt r%u, [r%u, #%u]\n", rT, rN, imm8);
   21834          goto decode_success;
   21835       }
   21836    }
   21837 
   21838    /* -------------- (T1) LDRSBT reg+#imm8 -------------- */
   21839    /* Load Register Signed Byte Unprivileged:
   21840       ldrsbt Rt, [Rn, #imm8]
   21841    */
   21842    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(0,1)
   21843        && INSN1(11,8) == BITS4(1,1,1,0)) {
   21844       UInt rN    = INSN0(3,0);
   21845       Bool valid = True;
   21846       UInt rT    = INSN1(15,12);
   21847       UInt imm8  = INSN1(7,0);
   21848       if (rN == 15 /* insn is LDRSB (literal) */) valid = False;
   21849       if (isBadRegT(rT)) valid = False;
   21850       if (valid) {
   21851          put_ITSTATE(old_itstate);
   21852          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   21853          IRTemp newRt = newTemp(Ity_I32);
   21854          loadGuardedLE( newRt, ILGop_8Sto32, ea, llGetIReg(rT), condT );
   21855          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   21856          put_ITSTATE(new_itstate);
   21857          DIP("ldrsbt r%u, [r%u, #%u]\n", rT, rN, imm8);
   21858          goto decode_success;
   21859       }
   21860    }
   21861 
   21862    /* -------------- (T1) PLI reg+#imm12 -------------- */
   21863    /* Preload Instruction:
   21864       pli [Rn, #imm12]
   21865    */
   21866    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,1,0) && INSN0(5,4) == BITS2(0,1)
   21867        && INSN1(15,12) == BITS4(1,1,1,1)) {
   21868       UInt rN    = INSN0(3,0);
   21869       UInt imm12 = INSN1(11,0);
   21870       if (rN != 15) {
   21871          DIP("pli [r%u, #%u]\n", rN, imm12);
   21872          goto decode_success;
   21873       }
   21874    }
   21875 
   21876    /* -------------- (T2) PLI reg-#imm8 -------------- */
   21877    /* Preload Instruction:
   21878       pli [Rn, #-imm8]
   21879    */
   21880    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(0,1)
   21881        && INSN1(15,8) == BITS8(1,1,1,1,1,1,0,0)) {
   21882       UInt rN   = INSN0(3,0);
   21883       UInt imm8 = INSN1(7,0);
   21884       if (rN != 15) {
   21885          DIP("pli [r%u, #-%u]\n", rN, imm8);
   21886          goto decode_success;
   21887       }
   21888    }
   21889 
   21890    /* -------------- (T3) PLI PC+/-#imm12 -------------- */
   21891    /* Preload Instruction:
   21892       pli [PC, #+/-imm12]
   21893    */
   21894    if (INSN0(15,8) == BITS8(1,1,1,1,1,0,0,1)
   21895        && INSN0(6,0) == BITS7(0,0,1,1,1,1,1)
   21896        && INSN1(15,12) == BITS4(1,1,1,1)) {
   21897       UInt imm12 = INSN1(11,0);
   21898       UInt bU    = INSN0(7,7);
   21899       DIP("pli [pc, #%c%u]\n", bU == 1 ? '+' : '-', imm12);
   21900       goto decode_success;
   21901    }
   21902 
   21903    /* ----------------------------------------------------------- */
   21904    /* -- VFP (CP 10, CP 11) instructions (in Thumb mode)       -- */
   21905    /* ----------------------------------------------------------- */
   21906 
   21907    if (INSN0(15,12) == BITS4(1,1,1,0)) {
   21908       UInt insn28 = (INSN0(11,0) << 16) | INSN1(15,0);
   21909       Bool ok_vfp = decode_CP10_CP11_instruction (
   21910                        &dres, insn28, condT, ARMCondAL/*bogus*/,
   21911                        True/*isT*/
   21912                     );
   21913       if (ok_vfp)
   21914          goto decode_success;
   21915    }
   21916 
   21917    /* ----------------------------------------------------------- */
   21918    /* -- NEON instructions (in Thumb mode)                     -- */
   21919    /* ----------------------------------------------------------- */
   21920 
   21921    if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
   21922       UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
   21923       Bool ok_neon = decode_NEON_instruction(
   21924                         &dres, insn32, condT, True/*isT*/
   21925                      );
   21926       if (ok_neon)
   21927          goto decode_success;
   21928    }
   21929 
   21930    /* ----------------------------------------------------------- */
   21931    /* -- v6 media instructions (in Thumb mode)                 -- */
   21932    /* ----------------------------------------------------------- */
   21933 
   21934    { UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
   21935      Bool ok_v6m = decode_V6MEDIA_instruction(
   21936                       &dres, insn32, condT, ARMCondAL/*bogus*/,
   21937                       True/*isT*/
   21938                    );
   21939      if (ok_v6m)
   21940         goto decode_success;
   21941    }
   21942 
   21943    /* ----------------------------------------------------------- */
   21944    /* -- Undecodable                                           -- */
   21945    /* ----------------------------------------------------------- */
   21946 
   21947    goto decode_failure;
   21948    /*NOTREACHED*/
   21949 
   21950   decode_failure:
   21951    /* All decode failures end up here. */
   21952    if (sigill_diag)
   21953       vex_printf("disInstr(thumb): unhandled instruction: "
   21954                  "0x%04x 0x%04x\n", (UInt)insn0, (UInt)insn1);
   21955 
   21956    /* Back up ITSTATE to the initial value for this instruction.
   21957       If we don't do that, any subsequent restart of the instruction
   21958       will restart with the wrong value. */
   21959    if (old_itstate != IRTemp_INVALID)
   21960       put_ITSTATE(old_itstate);
   21961 
   21962    /* Tell the dispatcher that this insn cannot be decoded, and so has
   21963       not been executed, and (is currently) the next to be executed.
   21964       R15 should be up-to-date since it made so at the start of each
   21965       insn, but nevertheless be paranoid and update it again right
   21966       now. */
   21967    vassert(0 == (guest_R15_curr_instr_notENC & 1));
   21968    llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC | 1) );
   21969    dres.len         = 0;
   21970    dres.whatNext    = Dis_StopHere;
   21971    dres.jk_StopHere = Ijk_NoDecode;
   21972    dres.continueAt  = 0;
   21973    return dres;
   21974 
   21975   decode_success:
   21976    /* All decode successes end up here. */
   21977    vassert(dres.len == 4 || dres.len == 2 || dres.len == 20);
   21978    switch (dres.whatNext) {
   21979       case Dis_Continue:
   21980          llPutIReg(15, mkU32(dres.len + (guest_R15_curr_instr_notENC | 1)));
   21981          break;
   21982       case Dis_ResteerU:
   21983       case Dis_ResteerC:
   21984          llPutIReg(15, mkU32(dres.continueAt));
   21985          break;
   21986       case Dis_StopHere:
   21987          break;
   21988       default:
   21989          vassert(0);
   21990    }
   21991 
   21992    DIP("\n");
   21993 
   21994    return dres;
   21995 
   21996 #  undef INSN0
   21997 #  undef INSN1
   21998 }
   21999 
   22000 #undef DIP
   22001 #undef DIS
   22002 
   22003 
   22004 /* Helper table for figuring out how many insns an IT insn
   22005    conditionalises.
   22006 
   22007    An ITxyz instruction of the format "1011 1111 firstcond mask"
   22008    conditionalises some number of instructions, as indicated by the
   22009    following table.  A value of zero indicates the instruction is
   22010    invalid in some way.
   22011 
   22012    mask = 0 means this isn't an IT instruction
   22013    fc = 15 (NV) means unpredictable
   22014 
   22015    The line fc = 14 (AL) is different from the others; there are
   22016    additional constraints in this case.
   22017 
   22018           mask(0 ..                   15)
   22019         +--------------------------------
   22020    fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22021    ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22022         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22023         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22024         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22025         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22026         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22027         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22028         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22029         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22030         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22031         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22032         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22033         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22034         | 0 4 3 0 2 0 0 0 1 0 0 0 0 0 0 0
   22035    15)  | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
   22036 
   22037    To be conservative with the analysis, let's rule out the mask = 0
   22038    case, since that isn't an IT insn at all.  But for all the other
   22039    cases where the table contains zero, that means unpredictable, so
   22040    let's say 4 to be conservative.  Hence we have a safe value for any
   22041    IT (mask,fc) pair that the CPU would actually identify as an IT
   22042    instruction.  The final table is
   22043 
   22044           mask(0 ..                   15)
   22045         +--------------------------------
   22046    fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22047    ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22048         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22049         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22050         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22051         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22052         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22053         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22054         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22055         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22056         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22057         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22058         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22059         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   22060         | 0 4 3 4 2 4 4 4 1 4 4 4 4 4 4 4
   22061    15)  | 0 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
   22062 */
   22063 static const UChar it_length_table[256]
   22064    = { 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   22065        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   22066        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   22067        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   22068        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   22069        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   22070        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   22071        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   22072        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   22073        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   22074        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   22075        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   22076        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   22077        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   22078        0, 4, 3, 4, 2, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4,
   22079        0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
   22080      };
   22081 
   22082 
   22083 /*------------------------------------------------------------*/
   22084 /*--- Top-level fn                                         ---*/
   22085 /*------------------------------------------------------------*/
   22086 
   22087 /* Disassemble a single instruction into IR.  The instruction
   22088    is located in host memory at &guest_code[delta]. */
   22089 
   22090 DisResult disInstr_ARM ( IRSB*        irsb_IN,
   22091                          Bool         (*resteerOkFn) ( void*, Addr ),
   22092                          Bool         resteerCisOk,
   22093                          void*        callback_opaque,
   22094                          const UChar* guest_code_IN,
   22095                          Long         delta_ENCODED,
   22096                          Addr         guest_IP_ENCODED,
   22097                          VexArch      guest_arch,
   22098                          const VexArchInfo* archinfo,
   22099                          const VexAbiInfo*  abiinfo,
   22100                          VexEndness   host_endness_IN,
   22101                          Bool         sigill_diag_IN )
   22102 {
   22103    DisResult dres;
   22104    Bool isThumb = (Bool)(guest_IP_ENCODED & 1);
   22105 
   22106    /* Set globals (see top of this file) */
   22107    vassert(guest_arch == VexArchARM);
   22108 
   22109    irsb            = irsb_IN;
   22110    host_endness    = host_endness_IN;
   22111    __curr_is_Thumb = isThumb;
   22112 
   22113    if (isThumb) {
   22114       guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED - 1;
   22115    } else {
   22116       guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED;
   22117    }
   22118 
   22119    if (isThumb) {
   22120       dres = disInstr_THUMB_WRK ( resteerOkFn,
   22121                                   resteerCisOk, callback_opaque,
   22122                                   &guest_code_IN[delta_ENCODED - 1],
   22123                                   archinfo, abiinfo, sigill_diag_IN );
   22124    } else {
   22125       dres = disInstr_ARM_WRK ( resteerOkFn,
   22126                                 resteerCisOk, callback_opaque,
   22127                                 &guest_code_IN[delta_ENCODED],
   22128                                 archinfo, abiinfo, sigill_diag_IN );
   22129    }
   22130 
   22131    return dres;
   22132 }
   22133 
   22134 /* Test program for the conversion of IRCmpF64Result values to VFP
   22135    nzcv values.  See handling of FCMPD et al above. */
   22136 /*
   22137 UInt foo ( UInt x )
   22138 {
   22139    UInt ix    = ((x >> 5) & 3) | (x & 1);
   22140    UInt termL = (((((ix ^ 1) << 30) - 1) >> 29) + 1);
   22141    UInt termR = (ix & (ix >> 1) & 1);
   22142    return termL  -  termR;
   22143 }
   22144 
   22145 void try ( char* s, UInt ir, UInt req )
   22146 {
   22147    UInt act = foo(ir);
   22148    printf("%s 0x%02x -> req %d%d%d%d act %d%d%d%d (0x%x)\n",
   22149           s, ir, (req >> 3) & 1, (req >> 2) & 1,
   22150                  (req >> 1) & 1, (req >> 0) & 1,
   22151                  (act >> 3) & 1, (act >> 2) & 1,
   22152                  (act >> 1) & 1, (act >> 0) & 1, act);
   22153 
   22154 }
   22155 
   22156 int main ( void )
   22157 {
   22158    printf("\n");
   22159    try("UN", 0x45, 0b0011);
   22160    try("LT", 0x01, 0b1000);
   22161    try("GT", 0x00, 0b0010);
   22162    try("EQ", 0x40, 0b0110);
   22163    printf("\n");
   22164    return 0;
   22165 }
   22166 */
   22167 
   22168 /* Spare code for doing reference implementations of various 64-bit
   22169    SIMD interleaves/deinterleaves/concatenation ops. */
   22170 /*
   22171 // Split a 64 bit value into 4 16 bit ones, in 32-bit IRTemps with
   22172 // the top halves guaranteed to be zero.
   22173 static void break64to16s ( IRTemp* out3, IRTemp* out2, IRTemp* out1,
   22174                            IRTemp* out0, IRTemp v64 )
   22175 {
   22176   if (out3) *out3 = newTemp(Ity_I32);
   22177   if (out2) *out2 = newTemp(Ity_I32);
   22178   if (out1) *out1 = newTemp(Ity_I32);
   22179   if (out0) *out0 = newTemp(Ity_I32);
   22180   IRTemp hi32 = newTemp(Ity_I32);
   22181   IRTemp lo32 = newTemp(Ity_I32);
   22182   assign(hi32, unop(Iop_64HIto32, mkexpr(v64)) );
   22183   assign(lo32, unop(Iop_64to32, mkexpr(v64)) );
   22184   if (out3) assign(*out3, binop(Iop_Shr32, mkexpr(hi32), mkU8(16)));
   22185   if (out2) assign(*out2, binop(Iop_And32, mkexpr(hi32), mkU32(0xFFFF)));
   22186   if (out1) assign(*out1, binop(Iop_Shr32, mkexpr(lo32), mkU8(16)));
   22187   if (out0) assign(*out0, binop(Iop_And32, mkexpr(lo32), mkU32(0xFFFF)));
   22188 }
   22189 
   22190 // Make a 64 bit value from 4 16 bit ones, each of which is in a 32 bit
   22191 // IRTemp.
   22192 static IRTemp mk64from16s ( IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
   22193 {
   22194   IRTemp hi32 = newTemp(Ity_I32);
   22195   IRTemp lo32 = newTemp(Ity_I32);
   22196   assign(hi32,
   22197          binop(Iop_Or32,
   22198                binop(Iop_Shl32, mkexpr(in3), mkU8(16)),
   22199                binop(Iop_And32, mkexpr(in2), mkU32(0xFFFF))));
   22200   assign(lo32,
   22201          binop(Iop_Or32,
   22202                binop(Iop_Shl32, mkexpr(in1), mkU8(16)),
   22203                binop(Iop_And32, mkexpr(in0), mkU32(0xFFFF))));
   22204   IRTemp res = newTemp(Ity_I64);
   22205   assign(res, binop(Iop_32HLto64, mkexpr(hi32), mkexpr(lo32)));
   22206   return res;
   22207 }
   22208 
   22209 static IRExpr* mk_InterleaveLO16x4 ( IRTemp a3210, IRTemp b3210 )
   22210 {
   22211   // returns a1 b1 a0 b0
   22212   IRTemp a1, a0, b1, b0;
   22213   break64to16s(NULL, NULL, &a1, &a0, a3210);
   22214   break64to16s(NULL, NULL, &b1, &b0, b3210);
   22215   return mkexpr(mk64from16s(a1, b1, a0, b0));
   22216 }
   22217 
   22218 static IRExpr* mk_InterleaveHI16x4 ( IRTemp a3210, IRTemp b3210 )
   22219 {
   22220   // returns a3 b3 a2 b2
   22221   IRTemp a3, a2, b3, b2;
   22222   break64to16s(&a3, &a2, NULL, NULL, a3210);
   22223   break64to16s(&b3, &b2, NULL, NULL, b3210);
   22224   return mkexpr(mk64from16s(a3, b3, a2, b2));
   22225 }
   22226 
   22227 static IRExpr* mk_CatEvenLanes16x4 ( IRTemp a3210, IRTemp b3210 )
   22228 {
   22229   // returns a2 a0 b2 b0
   22230   IRTemp a2, a0, b2, b0;
   22231   break64to16s(NULL, &a2, NULL, &a0, a3210);
   22232   break64to16s(NULL, &b2, NULL, &b0, b3210);
   22233   return mkexpr(mk64from16s(a2, a0, b2, b0));
   22234 }
   22235 
   22236 static IRExpr* mk_CatOddLanes16x4 ( IRTemp a3210, IRTemp b3210 )
   22237 {
   22238   // returns a3 a1 b3 b1
   22239   IRTemp a3, a1, b3, b1;
   22240   break64to16s(&a3, NULL, &a1, NULL, a3210);
   22241   break64to16s(&b3, NULL, &b1, NULL, b3210);
   22242   return mkexpr(mk64from16s(a3, a1, b3, b1));
   22243 }
   22244 
   22245 static IRExpr* mk_InterleaveOddLanes16x4 ( IRTemp a3210, IRTemp b3210 )
   22246 {
   22247   // returns a3 b3 a1 b1
   22248   IRTemp a3, b3, a1, b1;
   22249   break64to16s(&a3, NULL, &a1, NULL, a3210);
   22250   break64to16s(&b3, NULL, &b1, NULL, b3210);
   22251   return mkexpr(mk64from16s(a3, b3, a1, b1));
   22252 }
   22253 
   22254 static IRExpr* mk_InterleaveEvenLanes16x4 ( IRTemp a3210, IRTemp b3210 )
   22255 {
   22256   // returns a2 b2 a0 b0
   22257   IRTemp a2, b2, a0, b0;
   22258   break64to16s(NULL, &a2, NULL, &a0, a3210);
   22259   break64to16s(NULL, &b2, NULL, &b0, b3210);
   22260   return mkexpr(mk64from16s(a2, b2, a0, b0));
   22261 }
   22262 
   22263 static void break64to8s ( IRTemp* out7, IRTemp* out6, IRTemp* out5,
   22264                           IRTemp* out4, IRTemp* out3, IRTemp* out2,
   22265                           IRTemp* out1,IRTemp* out0, IRTemp v64 )
   22266 {
   22267   if (out7) *out7 = newTemp(Ity_I32);
   22268   if (out6) *out6 = newTemp(Ity_I32);
   22269   if (out5) *out5 = newTemp(Ity_I32);
   22270   if (out4) *out4 = newTemp(Ity_I32);
   22271   if (out3) *out3 = newTemp(Ity_I32);
   22272   if (out2) *out2 = newTemp(Ity_I32);
   22273   if (out1) *out1 = newTemp(Ity_I32);
   22274   if (out0) *out0 = newTemp(Ity_I32);
   22275   IRTemp hi32 = newTemp(Ity_I32);
   22276   IRTemp lo32 = newTemp(Ity_I32);
   22277   assign(hi32, unop(Iop_64HIto32, mkexpr(v64)) );
   22278   assign(lo32, unop(Iop_64to32, mkexpr(v64)) );
   22279   if (out7)
   22280     assign(*out7, binop(Iop_And32,
   22281                         binop(Iop_Shr32, mkexpr(hi32), mkU8(24)),
   22282                         mkU32(0xFF)));
   22283   if (out6)
   22284     assign(*out6, binop(Iop_And32,
   22285                         binop(Iop_Shr32, mkexpr(hi32), mkU8(16)),
   22286                         mkU32(0xFF)));
   22287   if (out5)
   22288     assign(*out5, binop(Iop_And32,
   22289                         binop(Iop_Shr32, mkexpr(hi32), mkU8(8)),
   22290                         mkU32(0xFF)));
   22291   if (out4)
   22292     assign(*out4, binop(Iop_And32, mkexpr(hi32), mkU32(0xFF)));
   22293   if (out3)
   22294     assign(*out3, binop(Iop_And32,
   22295                         binop(Iop_Shr32, mkexpr(lo32), mkU8(24)),
   22296                         mkU32(0xFF)));
   22297   if (out2)
   22298     assign(*out2, binop(Iop_And32,
   22299                         binop(Iop_Shr32, mkexpr(lo32), mkU8(16)),
   22300                         mkU32(0xFF)));
   22301   if (out1)
   22302     assign(*out1, binop(Iop_And32,
   22303                         binop(Iop_Shr32, mkexpr(lo32), mkU8(8)),
   22304                         mkU32(0xFF)));
   22305   if (out0)
   22306     assign(*out0, binop(Iop_And32, mkexpr(lo32), mkU32(0xFF)));
   22307 }
   22308 
   22309 static IRTemp mk64from8s ( IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4,
   22310                            IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
   22311 {
   22312   IRTemp hi32 = newTemp(Ity_I32);
   22313   IRTemp lo32 = newTemp(Ity_I32);
   22314   assign(hi32,
   22315          binop(Iop_Or32,
   22316                binop(Iop_Or32,
   22317                      binop(Iop_Shl32,
   22318                            binop(Iop_And32, mkexpr(in7), mkU32(0xFF)),
   22319                            mkU8(24)),
   22320                      binop(Iop_Shl32,
   22321                            binop(Iop_And32, mkexpr(in6), mkU32(0xFF)),
   22322                            mkU8(16))),
   22323                binop(Iop_Or32,
   22324                      binop(Iop_Shl32,
   22325                            binop(Iop_And32, mkexpr(in5), mkU32(0xFF)), mkU8(8)),
   22326                      binop(Iop_And32,
   22327                            mkexpr(in4), mkU32(0xFF)))));
   22328   assign(lo32,
   22329          binop(Iop_Or32,
   22330                binop(Iop_Or32,
   22331                      binop(Iop_Shl32,
   22332                            binop(Iop_And32, mkexpr(in3), mkU32(0xFF)),
   22333                            mkU8(24)),
   22334                      binop(Iop_Shl32,
   22335                            binop(Iop_And32, mkexpr(in2), mkU32(0xFF)),
   22336                            mkU8(16))),
   22337                binop(Iop_Or32,
   22338                      binop(Iop_Shl32,
   22339                            binop(Iop_And32, mkexpr(in1), mkU32(0xFF)), mkU8(8)),
   22340                      binop(Iop_And32,
   22341                            mkexpr(in0), mkU32(0xFF)))));
   22342   IRTemp res = newTemp(Ity_I64);
   22343   assign(res, binop(Iop_32HLto64, mkexpr(hi32), mkexpr(lo32)));
   22344   return res;
   22345 }
   22346 
   22347 static IRExpr* mk_InterleaveLO8x8 ( IRTemp a76543210, IRTemp b76543210 )
   22348 {
   22349   // returns a3 b3 a2 b2 a1 b1 a0 b0
   22350   IRTemp a3, b3, a2, b2, a1, a0, b1, b0;
   22351   break64to8s(NULL, NULL, NULL, NULL, &a3, &a2, &a1, &a0, a76543210);
   22352   break64to8s(NULL, NULL, NULL, NULL, &b3, &b2, &b1, &b0, b76543210);
   22353   return mkexpr(mk64from8s(a3, b3, a2, b2, a1, b1, a0, b0));
   22354 }
   22355 
   22356 static IRExpr* mk_InterleaveHI8x8 ( IRTemp a76543210, IRTemp b76543210 )
   22357 {
   22358   // returns a7 b7 a6 b6 a5 b5 a4 b4
   22359   IRTemp a7, b7, a6, b6, a5, b5, a4, b4;
   22360   break64to8s(&a7, &a6, &a5, &a4, NULL, NULL, NULL, NULL, a76543210);
   22361   break64to8s(&b7, &b6, &b5, &b4, NULL, NULL, NULL, NULL, b76543210);
   22362   return mkexpr(mk64from8s(a7, b7, a6, b6, a5, b5, a4, b4));
   22363 }
   22364 
   22365 static IRExpr* mk_CatEvenLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
   22366 {
   22367   // returns a6 a4 a2 a0 b6 b4 b2 b0
   22368   IRTemp a6, a4, a2, a0, b6, b4, b2, b0;
   22369   break64to8s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
   22370   break64to8s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
   22371   return mkexpr(mk64from8s(a6, a4, a2, a0, b6, b4, b2, b0));
   22372 }
   22373 
   22374 static IRExpr* mk_CatOddLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
   22375 {
   22376   // returns a7 a5 a3 a1 b7 b5 b3 b1
   22377   IRTemp a7, a5, a3, a1, b7, b5, b3, b1;
   22378   break64to8s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
   22379   break64to8s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
   22380   return mkexpr(mk64from8s(a7, a5, a3, a1, b7, b5, b3, b1));
   22381 }
   22382 
   22383 static IRExpr* mk_InterleaveEvenLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
   22384 {
   22385   // returns a6 b6 a4 b4 a2 b2 a0 b0
   22386   IRTemp a6, b6, a4, b4, a2, b2, a0, b0;
   22387   break64to8s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
   22388   break64to8s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
   22389   return mkexpr(mk64from8s(a6, b6, a4, b4, a2, b2, a0, b0));
   22390 }
   22391 
   22392 static IRExpr* mk_InterleaveOddLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
   22393 {
   22394   // returns a7 b7 a5 b5 a3 b3 a1 b1
   22395   IRTemp a7, b7, a5, b5, a3, b3, a1, b1;
   22396   break64to8s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
   22397   break64to8s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
   22398   return mkexpr(mk64from8s(a7, b7, a5, b5, a3, b3, a1, b1));
   22399 }
   22400 
   22401 static IRExpr* mk_InterleaveLO32x2 ( IRTemp a10, IRTemp b10 )
   22402 {
   22403   // returns a0 b0
   22404   return binop(Iop_32HLto64, unop(Iop_64to32, mkexpr(a10)),
   22405                              unop(Iop_64to32, mkexpr(b10)));
   22406 }
   22407 
   22408 static IRExpr* mk_InterleaveHI32x2 ( IRTemp a10, IRTemp b10 )
   22409 {
   22410   // returns a1 b1
   22411   return binop(Iop_32HLto64, unop(Iop_64HIto32, mkexpr(a10)),
   22412                              unop(Iop_64HIto32, mkexpr(b10)));
   22413 }
   22414 */
   22415 
   22416 /*--------------------------------------------------------------------*/
   22417 /*--- end                                         guest_arm_toIR.c ---*/
   22418 /*--------------------------------------------------------------------*/
   22419