Home | History | Annotate | Download | only in priv
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- begin                                       guest_arm_toIR.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2013 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    NEON support is
     14    Copyright (C) 2010-2013 Samsung Electronics
     15    contributed by Dmitry Zhurikhin <zhur (at) ispras.ru>
     16               and Kirill Batuzov <batuzovk (at) ispras.ru>
     17 
     18    This program is free software; you can redistribute it and/or
     19    modify it under the terms of the GNU General Public License as
     20    published by the Free Software Foundation; either version 2 of the
     21    License, or (at your option) any later version.
     22 
     23    This program is distributed in the hope that it will be useful, but
     24    WITHOUT ANY WARRANTY; without even the implied warranty of
     25    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     26    General Public License for more details.
     27 
     28    You should have received a copy of the GNU General Public License
     29    along with this program; if not, write to the Free Software
     30    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     31    02110-1301, USA.
     32 
     33    The GNU General Public License is contained in the file COPYING.
     34 */
     35 
     36 /* XXXX thumb to check:
     37    that all cases where putIRegT writes r15, we generate a jump.
     38 
     39    All uses of newTemp assign to an IRTemp and not a UInt
     40 
     41    For all thumb loads and stores, including VFP ones, new-ITSTATE is
     42    backed out before the memory op, and restored afterwards.  This
     43    needs to happen even after we go uncond.  (and for sure it doesn't
     44    happen for VFP loads/stores right now).
     45 
     46    VFP on thumb: check that we exclude all r13/r15 cases that we
     47    should.
     48 
     49    XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
     50    taking into account the number of insns guarded by an IT.
     51 
     52    remove the nasty hack, in the spechelper, of looking for Or32(...,
     53    0xE0) in as the first arg to armg_calculate_condition, and instead
     54    use Slice44 as specified in comments in the spechelper.
     55 
     56    add specialisations for armg_calculate_flag_c and _v, as they
     57    are moderately often needed in Thumb code.
     58 
     59    Correctness: ITSTATE handling in Thumb SVCs is wrong.
     60 
     61    Correctness (obscure): in m_transtab, when invalidating code
     62    address ranges, invalidate up to 18 bytes after the end of the
     63    range.  This is because the ITSTATE optimisation at the top of
     64    _THUMB_WRK below analyses up to 18 bytes before the start of any
     65    given instruction, and so might depend on the invalidated area.
     66 */
     67 
     68 /* Limitations, etc
     69 
     70    - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD.
     71      These instructions are non-restartable in the case where the
     72      transfer(s) fault.
     73 
     74    - SWP: the restart jump back is Ijk_Boring; it should be
     75      Ijk_NoRedir but that's expensive.  See comments on casLE() in
     76      guest_x86_toIR.c.
     77 */
     78 
     79 /* "Special" instructions.
     80 
     81    This instruction decoder can decode four special instructions
     82    which mean nothing natively (are no-ops as far as regs/mem are
     83    concerned) but have meaning for supporting Valgrind.  A special
     84    instruction is flagged by a 16-byte preamble:
     85 
     86       E1A0C1EC E1A0C6EC E1A0CEEC E1A0C9EC
     87       (mov r12, r12, ROR #3;   mov r12, r12, ROR #13;
     88        mov r12, r12, ROR #29;  mov r12, r12, ROR #19)
     89 
     90    Following that, one of the following 3 are allowed
     91    (standard interpretation in parentheses):
     92 
     93       E18AA00A (orr r10,r10,r10)   R3 = client_request ( R4 )
     94       E18BB00B (orr r11,r11,r11)   R3 = guest_NRADDR
     95       E18CC00C (orr r12,r12,r12)   branch-and-link-to-noredir R4
     96       E1899009 (orr r9,r9,r9)      IR injection
     97 
     98    Any other bytes following the 16-byte preamble are illegal and
     99    constitute a failure in instruction decoding.  This all assumes
    100    that the preamble will never occur except in specific code
    101    fragments designed for Valgrind to catch.
    102 */
    103 
    104 /* Translates ARM(v5) code to IR. */
    105 
    106 #include "libvex_basictypes.h"
    107 #include "libvex_ir.h"
    108 #include "libvex.h"
    109 #include "libvex_guest_arm.h"
    110 
    111 #include "main_util.h"
    112 #include "main_globals.h"
    113 #include "guest_generic_bb_to_IR.h"
    114 #include "guest_arm_defs.h"
    115 
    116 
    117 /*------------------------------------------------------------*/
    118 /*--- Globals                                              ---*/
    119 /*------------------------------------------------------------*/
    120 
    121 /* These are set at the start of the translation of a instruction, so
    122    that we don't have to pass them around endlessly.  CONST means does
    123    not change during translation of the instruction.
    124 */
    125 
    126 /* CONST: what is the host's endianness?  This has to do with float vs
    127    double register accesses on VFP, but it's complex and not properly
    128    thought out. */
    129 static VexEndness host_endness;
    130 
    131 /* CONST: The guest address for the instruction currently being
    132    translated.  This is the real, "decoded" address (not subject
    133    to the CPSR.T kludge). */
    134 static Addr32 guest_R15_curr_instr_notENC;
    135 
    136 /* CONST, FOR ASSERTIONS ONLY.  Indicates whether currently processed
    137    insn is Thumb (True) or ARM (False). */
    138 static Bool __curr_is_Thumb;
    139 
    140 /* MOD: The IRSB* into which we're generating code. */
    141 static IRSB* irsb;
    142 
    143 /* These are to do with handling writes to r15.  They are initially
    144    set at the start of disInstr_ARM_WRK to indicate no update,
    145    possibly updated during the routine, and examined again at the end.
    146    If they have been set to indicate a r15 update then a jump is
    147    generated.  Note, "explicit" jumps (b, bx, etc) are generated
    148    directly, not using this mechanism -- this is intended to handle
    149    the implicit-style jumps resulting from (eg) assigning to r15 as
    150    the result of insns we wouldn't normally consider branchy. */
    151 
    152 /* MOD.  Initially False; set to True iff abovementioned handling is
    153    required. */
    154 static Bool r15written;
    155 
    156 /* MOD.  Initially IRTemp_INVALID.  If the r15 branch to be generated
    157    is conditional, this holds the gating IRTemp :: Ity_I32.  If the
    158    branch to be generated is unconditional, this remains
    159    IRTemp_INVALID. */
    160 static IRTemp r15guard; /* :: Ity_I32, 0 or 1 */
    161 
    162 /* MOD.  Initially Ijk_Boring.  If an r15 branch is to be generated,
    163    this holds the jump kind. */
    164 static IRTemp r15kind;
    165 
    166 
    167 /*------------------------------------------------------------*/
    168 /*--- Debugging output                                     ---*/
    169 /*------------------------------------------------------------*/
    170 
    171 #define DIP(format, args...)           \
    172    if (vex_traceflags & VEX_TRACE_FE)  \
    173       vex_printf(format, ## args)
    174 
    175 #define DIS(buf, format, args...)      \
    176    if (vex_traceflags & VEX_TRACE_FE)  \
    177       vex_sprintf(buf, format, ## args)
    178 
    179 #define ASSERT_IS_THUMB \
    180    do { vassert(__curr_is_Thumb); } while (0)
    181 
    182 #define ASSERT_IS_ARM \
    183    do { vassert(! __curr_is_Thumb); } while (0)
    184 
    185 
    186 /*------------------------------------------------------------*/
    187 /*--- Helper bits and pieces for deconstructing the        ---*/
    188 /*--- arm insn stream.                                     ---*/
    189 /*------------------------------------------------------------*/
    190 
    191 /* Do a little-endian load of a 32-bit word, regardless of the
    192    endianness of the underlying host. */
    193 static inline UInt getUIntLittleEndianly ( const UChar* p )
    194 {
    195    UInt w = 0;
    196    w = (w << 8) | p[3];
    197    w = (w << 8) | p[2];
    198    w = (w << 8) | p[1];
    199    w = (w << 8) | p[0];
    200    return w;
    201 }
    202 
    203 /* Do a little-endian load of a 16-bit word, regardless of the
    204    endianness of the underlying host. */
    205 static inline UShort getUShortLittleEndianly ( const UChar* p )
    206 {
    207    UShort w = 0;
    208    w = (w << 8) | p[1];
    209    w = (w << 8) | p[0];
    210    return w;
    211 }
    212 
    213 static UInt ROR32 ( UInt x, UInt sh ) {
    214    vassert(sh >= 0 && sh < 32);
    215    if (sh == 0)
    216       return x;
    217    else
    218       return (x << (32-sh)) | (x >> sh);
    219 }
    220 
    221 static Int popcount32 ( UInt x )
    222 {
    223    Int res = 0, i;
    224    for (i = 0; i < 32; i++) {
    225       res += (x & 1);
    226       x >>= 1;
    227    }
    228    return res;
    229 }
    230 
    231 static UInt setbit32 ( UInt x, Int ix, UInt b )
    232 {
    233    UInt mask = 1 << ix;
    234    x &= ~mask;
    235    x |= ((b << ix) & mask);
    236    return x;
    237 }
    238 
    239 #define BITS2(_b1,_b0) \
    240    (((_b1) << 1) | (_b0))
    241 
    242 #define BITS3(_b2,_b1,_b0)                      \
    243   (((_b2) << 2) | ((_b1) << 1) | (_b0))
    244 
    245 #define BITS4(_b3,_b2,_b1,_b0) \
    246    (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
    247 
    248 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    249    ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
    250     | BITS4((_b3),(_b2),(_b1),(_b0)))
    251 
    252 #define BITS5(_b4,_b3,_b2,_b1,_b0)  \
    253    (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
    254 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0)  \
    255    (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    256 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    257    (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    258 
    259 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)      \
    260    (((_b8) << 8) \
    261     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    262 
    263 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    264    (((_b9) << 9) | ((_b8) << 8)                                \
    265     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    266 
    267 /* produces _uint[_bMax:_bMin] */
    268 #define SLICE_UInt(_uint,_bMax,_bMin) \
    269    (( ((UInt)(_uint)) >> (_bMin)) \
    270     & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
    271 
    272 
    273 /*------------------------------------------------------------*/
    274 /*--- Helper bits and pieces for creating IR fragments.    ---*/
    275 /*------------------------------------------------------------*/
    276 
    277 static IRExpr* mkU64 ( ULong i )
    278 {
    279    return IRExpr_Const(IRConst_U64(i));
    280 }
    281 
    282 static IRExpr* mkU32 ( UInt i )
    283 {
    284    return IRExpr_Const(IRConst_U32(i));
    285 }
    286 
    287 static IRExpr* mkU8 ( UInt i )
    288 {
    289    vassert(i < 256);
    290    return IRExpr_Const(IRConst_U8( (UChar)i ));
    291 }
    292 
    293 static IRExpr* mkexpr ( IRTemp tmp )
    294 {
    295    return IRExpr_RdTmp(tmp);
    296 }
    297 
    298 static IRExpr* unop ( IROp op, IRExpr* a )
    299 {
    300    return IRExpr_Unop(op, a);
    301 }
    302 
    303 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
    304 {
    305    return IRExpr_Binop(op, a1, a2);
    306 }
    307 
    308 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
    309 {
    310    return IRExpr_Triop(op, a1, a2, a3);
    311 }
    312 
    313 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
    314 {
    315    return IRExpr_Load(Iend_LE, ty, addr);
    316 }
    317 
    318 /* Add a statement to the list held by "irbb". */
    319 static void stmt ( IRStmt* st )
    320 {
    321    addStmtToIRSB( irsb, st );
    322 }
    323 
    324 static void assign ( IRTemp dst, IRExpr* e )
    325 {
    326    stmt( IRStmt_WrTmp(dst, e) );
    327 }
    328 
    329 static void storeLE ( IRExpr* addr, IRExpr* data )
    330 {
    331    stmt( IRStmt_Store(Iend_LE, addr, data) );
    332 }
    333 
    334 static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
    335 {
    336    if (guardT == IRTemp_INVALID) {
    337       /* unconditional */
    338       storeLE(addr, data);
    339    } else {
    340       stmt( IRStmt_StoreG(Iend_LE, addr, data,
    341                           binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
    342    }
    343 }
    344 
    345 static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
    346                             IRExpr* addr, IRExpr* alt,
    347                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
    348 {
    349    if (guardT == IRTemp_INVALID) {
    350       /* unconditional */
    351       IRExpr* loaded = NULL;
    352       switch (cvt) {
    353          case ILGop_Ident32:
    354             loaded = loadLE(Ity_I32, addr); break;
    355          case ILGop_8Uto32:
    356             loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
    357          case ILGop_8Sto32:
    358             loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
    359          case ILGop_16Uto32:
    360             loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
    361          case ILGop_16Sto32:
    362             loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
    363          default:
    364             vassert(0);
    365       }
    366       vassert(loaded != NULL);
    367       assign(dst, loaded);
    368    } else {
    369       /* Generate a guarded load into 'dst', but apply 'cvt' to the
    370          loaded data before putting the data in 'dst'.  If the load
    371          does not take place, 'alt' is placed directly in 'dst'. */
    372       stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
    373                          binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
    374    }
    375 }
    376 
    377 /* Generate a new temporary of the given type. */
    378 static IRTemp newTemp ( IRType ty )
    379 {
    380    vassert(isPlausibleIRType(ty));
    381    return newIRTemp( irsb->tyenv, ty );
    382 }
    383 
    384 /* Produces a value in 0 .. 3, which is encoded as per the type
    385    IRRoundingMode. */
    386 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
    387 {
    388    return mkU32(Irrm_NEAREST);
    389 }
    390 
    391 /* Generate an expression for SRC rotated right by ROT. */
    392 static IRExpr* genROR32( IRTemp src, Int rot )
    393 {
    394    vassert(rot >= 0 && rot < 32);
    395    if (rot == 0)
    396       return mkexpr(src);
    397    return
    398       binop(Iop_Or32,
    399             binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
    400             binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
    401 }
    402 
    403 static IRExpr* mkU128 ( ULong i )
    404 {
    405    return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
    406 }
    407 
    408 /* Generate a 4-aligned version of the given expression if
    409    the given condition is true.  Else return it unchanged. */
    410 static IRExpr* align4if ( IRExpr* e, Bool b )
    411 {
    412    if (b)
    413       return binop(Iop_And32, e, mkU32(~3));
    414    else
    415       return e;
    416 }
    417 
    418 
    419 /*------------------------------------------------------------*/
    420 /*--- Helpers for accessing guest registers.               ---*/
    421 /*------------------------------------------------------------*/
    422 
    423 #define OFFB_R0       offsetof(VexGuestARMState,guest_R0)
    424 #define OFFB_R1       offsetof(VexGuestARMState,guest_R1)
    425 #define OFFB_R2       offsetof(VexGuestARMState,guest_R2)
    426 #define OFFB_R3       offsetof(VexGuestARMState,guest_R3)
    427 #define OFFB_R4       offsetof(VexGuestARMState,guest_R4)
    428 #define OFFB_R5       offsetof(VexGuestARMState,guest_R5)
    429 #define OFFB_R6       offsetof(VexGuestARMState,guest_R6)
    430 #define OFFB_R7       offsetof(VexGuestARMState,guest_R7)
    431 #define OFFB_R8       offsetof(VexGuestARMState,guest_R8)
    432 #define OFFB_R9       offsetof(VexGuestARMState,guest_R9)
    433 #define OFFB_R10      offsetof(VexGuestARMState,guest_R10)
    434 #define OFFB_R11      offsetof(VexGuestARMState,guest_R11)
    435 #define OFFB_R12      offsetof(VexGuestARMState,guest_R12)
    436 #define OFFB_R13      offsetof(VexGuestARMState,guest_R13)
    437 #define OFFB_R14      offsetof(VexGuestARMState,guest_R14)
    438 #define OFFB_R15T     offsetof(VexGuestARMState,guest_R15T)
    439 
    440 #define OFFB_CC_OP    offsetof(VexGuestARMState,guest_CC_OP)
    441 #define OFFB_CC_DEP1  offsetof(VexGuestARMState,guest_CC_DEP1)
    442 #define OFFB_CC_DEP2  offsetof(VexGuestARMState,guest_CC_DEP2)
    443 #define OFFB_CC_NDEP  offsetof(VexGuestARMState,guest_CC_NDEP)
    444 #define OFFB_NRADDR   offsetof(VexGuestARMState,guest_NRADDR)
    445 
    446 #define OFFB_D0       offsetof(VexGuestARMState,guest_D0)
    447 #define OFFB_D1       offsetof(VexGuestARMState,guest_D1)
    448 #define OFFB_D2       offsetof(VexGuestARMState,guest_D2)
    449 #define OFFB_D3       offsetof(VexGuestARMState,guest_D3)
    450 #define OFFB_D4       offsetof(VexGuestARMState,guest_D4)
    451 #define OFFB_D5       offsetof(VexGuestARMState,guest_D5)
    452 #define OFFB_D6       offsetof(VexGuestARMState,guest_D6)
    453 #define OFFB_D7       offsetof(VexGuestARMState,guest_D7)
    454 #define OFFB_D8       offsetof(VexGuestARMState,guest_D8)
    455 #define OFFB_D9       offsetof(VexGuestARMState,guest_D9)
    456 #define OFFB_D10      offsetof(VexGuestARMState,guest_D10)
    457 #define OFFB_D11      offsetof(VexGuestARMState,guest_D11)
    458 #define OFFB_D12      offsetof(VexGuestARMState,guest_D12)
    459 #define OFFB_D13      offsetof(VexGuestARMState,guest_D13)
    460 #define OFFB_D14      offsetof(VexGuestARMState,guest_D14)
    461 #define OFFB_D15      offsetof(VexGuestARMState,guest_D15)
    462 #define OFFB_D16      offsetof(VexGuestARMState,guest_D16)
    463 #define OFFB_D17      offsetof(VexGuestARMState,guest_D17)
    464 #define OFFB_D18      offsetof(VexGuestARMState,guest_D18)
    465 #define OFFB_D19      offsetof(VexGuestARMState,guest_D19)
    466 #define OFFB_D20      offsetof(VexGuestARMState,guest_D20)
    467 #define OFFB_D21      offsetof(VexGuestARMState,guest_D21)
    468 #define OFFB_D22      offsetof(VexGuestARMState,guest_D22)
    469 #define OFFB_D23      offsetof(VexGuestARMState,guest_D23)
    470 #define OFFB_D24      offsetof(VexGuestARMState,guest_D24)
    471 #define OFFB_D25      offsetof(VexGuestARMState,guest_D25)
    472 #define OFFB_D26      offsetof(VexGuestARMState,guest_D26)
    473 #define OFFB_D27      offsetof(VexGuestARMState,guest_D27)
    474 #define OFFB_D28      offsetof(VexGuestARMState,guest_D28)
    475 #define OFFB_D29      offsetof(VexGuestARMState,guest_D29)
    476 #define OFFB_D30      offsetof(VexGuestARMState,guest_D30)
    477 #define OFFB_D31      offsetof(VexGuestARMState,guest_D31)
    478 
    479 #define OFFB_FPSCR    offsetof(VexGuestARMState,guest_FPSCR)
    480 #define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO)
    481 #define OFFB_ITSTATE  offsetof(VexGuestARMState,guest_ITSTATE)
    482 #define OFFB_QFLAG32  offsetof(VexGuestARMState,guest_QFLAG32)
    483 #define OFFB_GEFLAG0  offsetof(VexGuestARMState,guest_GEFLAG0)
    484 #define OFFB_GEFLAG1  offsetof(VexGuestARMState,guest_GEFLAG1)
    485 #define OFFB_GEFLAG2  offsetof(VexGuestARMState,guest_GEFLAG2)
    486 #define OFFB_GEFLAG3  offsetof(VexGuestARMState,guest_GEFLAG3)
    487 
    488 #define OFFB_CMSTART  offsetof(VexGuestARMState,guest_CMSTART)
    489 #define OFFB_CMLEN    offsetof(VexGuestARMState,guest_CMLEN)
    490 
    491 
    492 /* ---------------- Integer registers ---------------- */
    493 
    494 static Int integerGuestRegOffset ( UInt iregNo )
    495 {
    496    /* Do we care about endianness here?  We do if sub-parts of integer
    497       registers are accessed, but I don't think that ever happens on
    498       ARM. */
    499    switch (iregNo) {
    500       case 0:  return OFFB_R0;
    501       case 1:  return OFFB_R1;
    502       case 2:  return OFFB_R2;
    503       case 3:  return OFFB_R3;
    504       case 4:  return OFFB_R4;
    505       case 5:  return OFFB_R5;
    506       case 6:  return OFFB_R6;
    507       case 7:  return OFFB_R7;
    508       case 8:  return OFFB_R8;
    509       case 9:  return OFFB_R9;
    510       case 10: return OFFB_R10;
    511       case 11: return OFFB_R11;
    512       case 12: return OFFB_R12;
    513       case 13: return OFFB_R13;
    514       case 14: return OFFB_R14;
    515       case 15: return OFFB_R15T;
    516       default: vassert(0);
    517    }
    518 }
    519 
    520 /* Plain ("low level") read from a reg; no +8 offset magic for r15. */
    521 static IRExpr* llGetIReg ( UInt iregNo )
    522 {
    523    vassert(iregNo < 16);
    524    return IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
    525 }
    526 
    527 /* Architected read from a reg in ARM mode.  This automagically adds 8
    528    to all reads of r15. */
    529 static IRExpr* getIRegA ( UInt iregNo )
    530 {
    531    IRExpr* e;
    532    ASSERT_IS_ARM;
    533    vassert(iregNo < 16);
    534    if (iregNo == 15) {
    535       /* If asked for r15, don't read the guest state value, as that
    536          may not be up to date in the case where loop unrolling has
    537          happened, because the first insn's write to the block is
    538          omitted; hence in the 2nd and subsequent unrollings we don't
    539          have a correct value in guest r15.  Instead produce the
    540          constant that we know would be produced at this point. */
    541       vassert(0 == (guest_R15_curr_instr_notENC & 3));
    542       e = mkU32(guest_R15_curr_instr_notENC + 8);
    543    } else {
    544       e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
    545    }
    546    return e;
    547 }
    548 
    549 /* Architected read from a reg in Thumb mode.  This automagically adds
    550    4 to all reads of r15. */
    551 static IRExpr* getIRegT ( UInt iregNo )
    552 {
    553    IRExpr* e;
    554    ASSERT_IS_THUMB;
    555    vassert(iregNo < 16);
    556    if (iregNo == 15) {
    557       /* Ditto comment in getIReg. */
    558       vassert(0 == (guest_R15_curr_instr_notENC & 1));
    559       e = mkU32(guest_R15_curr_instr_notENC + 4);
    560    } else {
    561       e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
    562    }
    563    return e;
    564 }
    565 
    566 /* Plain ("low level") write to a reg; no jump or alignment magic for
    567    r15. */
    568 static void llPutIReg ( UInt iregNo, IRExpr* e )
    569 {
    570    vassert(iregNo < 16);
    571    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
    572    stmt( IRStmt_Put(integerGuestRegOffset(iregNo), e) );
    573 }
    574 
    575 /* Architected write to an integer register in ARM mode.  If it is to
    576    r15, record info so at the end of this insn's translation, a branch
    577    to it can be made.  Also handles conditional writes to the
    578    register: if guardT == IRTemp_INVALID then the write is
    579    unconditional.  If writing r15, also 4-align it. */
    580 static void putIRegA ( UInt       iregNo,
    581                        IRExpr*    e,
    582                        IRTemp     guardT /* :: Ity_I32, 0 or 1 */,
    583                        IRJumpKind jk /* if a jump is generated */ )
    584 {
    585    /* if writing r15, force e to be 4-aligned. */
    586    // INTERWORKING FIXME.  this needs to be relaxed so that
    587    // puts caused by LDMxx which load r15 interwork right.
    588    // but is no aligned too relaxed?
    589    //if (iregNo == 15)
    590    //   e = binop(Iop_And32, e, mkU32(~3));
    591    ASSERT_IS_ARM;
    592    /* So, generate either an unconditional or a conditional write to
    593       the reg. */
    594    if (guardT == IRTemp_INVALID) {
    595       /* unconditional write */
    596       llPutIReg( iregNo, e );
    597    } else {
    598       llPutIReg( iregNo,
    599                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    600                              e, llGetIReg(iregNo) ));
    601    }
    602    if (iregNo == 15) {
    603       // assert against competing r15 updates.  Shouldn't
    604       // happen; should be ruled out by the instr matching
    605       // logic.
    606       vassert(r15written == False);
    607       vassert(r15guard   == IRTemp_INVALID);
    608       vassert(r15kind    == Ijk_Boring);
    609       r15written = True;
    610       r15guard   = guardT;
    611       r15kind    = jk;
    612    }
    613 }
    614 
    615 
    616 /* Architected write to an integer register in Thumb mode.  Writes to
    617    r15 are not allowed.  Handles conditional writes to the register:
    618    if guardT == IRTemp_INVALID then the write is unconditional. */
    619 static void putIRegT ( UInt       iregNo,
    620                        IRExpr*    e,
    621                        IRTemp     guardT /* :: Ity_I32, 0 or 1 */ )
    622 {
    623    /* So, generate either an unconditional or a conditional write to
    624       the reg. */
    625    ASSERT_IS_THUMB;
    626    vassert(iregNo >= 0 && iregNo <= 14);
    627    if (guardT == IRTemp_INVALID) {
    628       /* unconditional write */
    629       llPutIReg( iregNo, e );
    630    } else {
    631       llPutIReg( iregNo,
    632                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    633                              e, llGetIReg(iregNo) ));
    634    }
    635 }
    636 
    637 
    638 /* Thumb16 and Thumb32 only.
    639    Returns true if reg is 13 or 15.  Implements the BadReg
    640    predicate in the ARM ARM. */
    641 static Bool isBadRegT ( UInt r )
    642 {
    643    vassert(r <= 15);
    644    ASSERT_IS_THUMB;
    645    return r == 13 || r == 15;
    646 }
    647 
    648 
    649 /* ---------------- Double registers ---------------- */
    650 
    651 static Int doubleGuestRegOffset ( UInt dregNo )
    652 {
    653    /* Do we care about endianness here?  Probably do if we ever get
    654       into the situation of dealing with the single-precision VFP
    655       registers. */
    656    switch (dregNo) {
    657       case 0:  return OFFB_D0;
    658       case 1:  return OFFB_D1;
    659       case 2:  return OFFB_D2;
    660       case 3:  return OFFB_D3;
    661       case 4:  return OFFB_D4;
    662       case 5:  return OFFB_D5;
    663       case 6:  return OFFB_D6;
    664       case 7:  return OFFB_D7;
    665       case 8:  return OFFB_D8;
    666       case 9:  return OFFB_D9;
    667       case 10: return OFFB_D10;
    668       case 11: return OFFB_D11;
    669       case 12: return OFFB_D12;
    670       case 13: return OFFB_D13;
    671       case 14: return OFFB_D14;
    672       case 15: return OFFB_D15;
    673       case 16: return OFFB_D16;
    674       case 17: return OFFB_D17;
    675       case 18: return OFFB_D18;
    676       case 19: return OFFB_D19;
    677       case 20: return OFFB_D20;
    678       case 21: return OFFB_D21;
    679       case 22: return OFFB_D22;
    680       case 23: return OFFB_D23;
    681       case 24: return OFFB_D24;
    682       case 25: return OFFB_D25;
    683       case 26: return OFFB_D26;
    684       case 27: return OFFB_D27;
    685       case 28: return OFFB_D28;
    686       case 29: return OFFB_D29;
    687       case 30: return OFFB_D30;
    688       case 31: return OFFB_D31;
    689       default: vassert(0);
    690    }
    691 }
    692 
    693 /* Plain ("low level") read from a VFP Dreg. */
    694 static IRExpr* llGetDReg ( UInt dregNo )
    695 {
    696    vassert(dregNo < 32);
    697    return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_F64 );
    698 }
    699 
    700 /* Architected read from a VFP Dreg. */
    701 static IRExpr* getDReg ( UInt dregNo ) {
    702    return llGetDReg( dregNo );
    703 }
    704 
    705 /* Plain ("low level") write to a VFP Dreg. */
    706 static void llPutDReg ( UInt dregNo, IRExpr* e )
    707 {
    708    vassert(dregNo < 32);
    709    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64);
    710    stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
    711 }
    712 
    713 /* Architected write to a VFP Dreg.  Handles conditional writes to the
    714    register: if guardT == IRTemp_INVALID then the write is
    715    unconditional. */
    716 static void putDReg ( UInt    dregNo,
    717                       IRExpr* e,
    718                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    719 {
    720    /* So, generate either an unconditional or a conditional write to
    721       the reg. */
    722    if (guardT == IRTemp_INVALID) {
    723       /* unconditional write */
    724       llPutDReg( dregNo, e );
    725    } else {
    726       llPutDReg( dregNo,
    727                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    728                              e, llGetDReg(dregNo) ));
    729    }
    730 }
    731 
    732 /* And now exactly the same stuff all over again, but this time
    733    taking/returning I64 rather than F64, to support 64-bit Neon
    734    ops. */
    735 
    736 /* Plain ("low level") read from a Neon Integer Dreg. */
    737 static IRExpr* llGetDRegI64 ( UInt dregNo )
    738 {
    739    vassert(dregNo < 32);
    740    return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_I64 );
    741 }
    742 
    743 /* Architected read from a Neon Integer Dreg. */
    744 static IRExpr* getDRegI64 ( UInt dregNo ) {
    745    return llGetDRegI64( dregNo );
    746 }
    747 
    748 /* Plain ("low level") write to a Neon Integer Dreg. */
    749 static void llPutDRegI64 ( UInt dregNo, IRExpr* e )
    750 {
    751    vassert(dregNo < 32);
    752    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
    753    stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
    754 }
    755 
    756 /* Architected write to a Neon Integer Dreg.  Handles conditional
    757    writes to the register: if guardT == IRTemp_INVALID then the write
    758    is unconditional. */
    759 static void putDRegI64 ( UInt    dregNo,
    760                          IRExpr* e,
    761                          IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    762 {
    763    /* So, generate either an unconditional or a conditional write to
    764       the reg. */
    765    if (guardT == IRTemp_INVALID) {
    766       /* unconditional write */
    767       llPutDRegI64( dregNo, e );
    768    } else {
    769       llPutDRegI64( dregNo,
    770                     IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    771                                 e, llGetDRegI64(dregNo) ));
    772    }
    773 }
    774 
    775 /* ---------------- Quad registers ---------------- */
    776 
    777 static Int quadGuestRegOffset ( UInt qregNo )
    778 {
    779    /* Do we care about endianness here?  Probably do if we ever get
    780       into the situation of dealing with the 64 bit Neon registers. */
    781    switch (qregNo) {
    782       case 0:  return OFFB_D0;
    783       case 1:  return OFFB_D2;
    784       case 2:  return OFFB_D4;
    785       case 3:  return OFFB_D6;
    786       case 4:  return OFFB_D8;
    787       case 5:  return OFFB_D10;
    788       case 6:  return OFFB_D12;
    789       case 7:  return OFFB_D14;
    790       case 8:  return OFFB_D16;
    791       case 9:  return OFFB_D18;
    792       case 10: return OFFB_D20;
    793       case 11: return OFFB_D22;
    794       case 12: return OFFB_D24;
    795       case 13: return OFFB_D26;
    796       case 14: return OFFB_D28;
    797       case 15: return OFFB_D30;
    798       default: vassert(0);
    799    }
    800 }
    801 
    802 /* Plain ("low level") read from a Neon Qreg. */
    803 static IRExpr* llGetQReg ( UInt qregNo )
    804 {
    805    vassert(qregNo < 16);
    806    return IRExpr_Get( quadGuestRegOffset(qregNo), Ity_V128 );
    807 }
    808 
    809 /* Architected read from a Neon Qreg. */
    810 static IRExpr* getQReg ( UInt qregNo ) {
    811    return llGetQReg( qregNo );
    812 }
    813 
    814 /* Plain ("low level") write to a Neon Qreg. */
    815 static void llPutQReg ( UInt qregNo, IRExpr* e )
    816 {
    817    vassert(qregNo < 16);
    818    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
    819    stmt( IRStmt_Put(quadGuestRegOffset(qregNo), e) );
    820 }
    821 
    822 /* Architected write to a Neon Qreg.  Handles conditional writes to the
    823    register: if guardT == IRTemp_INVALID then the write is
    824    unconditional. */
    825 static void putQReg ( UInt    qregNo,
    826                       IRExpr* e,
    827                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    828 {
    829    /* So, generate either an unconditional or a conditional write to
    830       the reg. */
    831    if (guardT == IRTemp_INVALID) {
    832       /* unconditional write */
    833       llPutQReg( qregNo, e );
    834    } else {
    835       llPutQReg( qregNo,
    836                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    837                              e, llGetQReg(qregNo) ));
    838    }
    839 }
    840 
    841 
    842 /* ---------------- Float registers ---------------- */
    843 
    844 static Int floatGuestRegOffset ( UInt fregNo )
    845 {
    846    /* Start with the offset of the containing double, and then correct
    847       for endianness.  Actually this is completely bogus and needs
    848       careful thought. */
    849    Int off;
    850    vassert(fregNo < 32);
    851    off = doubleGuestRegOffset(fregNo >> 1);
    852    if (host_endness == VexEndnessLE) {
    853       if (fregNo & 1)
    854          off += 4;
    855    } else {
    856       vassert(0);
    857    }
    858    return off;
    859 }
    860 
    861 /* Plain ("low level") read from a VFP Freg. */
    862 static IRExpr* llGetFReg ( UInt fregNo )
    863 {
    864    vassert(fregNo < 32);
    865    return IRExpr_Get( floatGuestRegOffset(fregNo), Ity_F32 );
    866 }
    867 
    868 /* Architected read from a VFP Freg. */
    869 static IRExpr* getFReg ( UInt fregNo ) {
    870    return llGetFReg( fregNo );
    871 }
    872 
    873 /* Plain ("low level") write to a VFP Freg. */
    874 static void llPutFReg ( UInt fregNo, IRExpr* e )
    875 {
    876    vassert(fregNo < 32);
    877    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32);
    878    stmt( IRStmt_Put(floatGuestRegOffset(fregNo), e) );
    879 }
    880 
    881 /* Architected write to a VFP Freg.  Handles conditional writes to the
    882    register: if guardT == IRTemp_INVALID then the write is
    883    unconditional. */
    884 static void putFReg ( UInt    fregNo,
    885                       IRExpr* e,
    886                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    887 {
    888    /* So, generate either an unconditional or a conditional write to
    889       the reg. */
    890    if (guardT == IRTemp_INVALID) {
    891       /* unconditional write */
    892       llPutFReg( fregNo, e );
    893    } else {
    894       llPutFReg( fregNo,
    895                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    896                              e, llGetFReg(fregNo) ));
    897    }
    898 }
    899 
    900 
    901 /* ---------------- Misc registers ---------------- */
    902 
    903 static void putMiscReg32 ( UInt    gsoffset,
    904                            IRExpr* e, /* :: Ity_I32 */
    905                            IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    906 {
    907    switch (gsoffset) {
    908       case OFFB_FPSCR:   break;
    909       case OFFB_QFLAG32: break;
    910       case OFFB_GEFLAG0: break;
    911       case OFFB_GEFLAG1: break;
    912       case OFFB_GEFLAG2: break;
    913       case OFFB_GEFLAG3: break;
    914       default: vassert(0); /* awaiting more cases */
    915    }
    916    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
    917 
    918    if (guardT == IRTemp_INVALID) {
    919       /* unconditional write */
    920       stmt(IRStmt_Put(gsoffset, e));
    921    } else {
    922       stmt(IRStmt_Put(
    923          gsoffset,
    924          IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    925                      e, IRExpr_Get(gsoffset, Ity_I32) )
    926       ));
    927    }
    928 }
    929 
    930 static IRTemp get_ITSTATE ( void )
    931 {
    932    ASSERT_IS_THUMB;
    933    IRTemp t = newTemp(Ity_I32);
    934    assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
    935    return t;
    936 }
    937 
    938 static void put_ITSTATE ( IRTemp t )
    939 {
    940    ASSERT_IS_THUMB;
    941    stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
    942 }
    943 
    944 static IRTemp get_QFLAG32 ( void )
    945 {
    946    IRTemp t = newTemp(Ity_I32);
    947    assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
    948    return t;
    949 }
    950 
    951 static void put_QFLAG32 ( IRTemp t, IRTemp condT )
    952 {
    953    putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
    954 }
    955 
    956 /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
    957    Status Register) to indicate that overflow or saturation occurred.
    958    Nb: t must be zero to denote no saturation, and any nonzero
    959    value to indicate saturation. */
    960 static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
    961 {
    962    IRTemp old = get_QFLAG32();
    963    IRTemp nyu = newTemp(Ity_I32);
    964    assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
    965    put_QFLAG32(nyu, condT);
    966 }
    967 
    968 /* Generate code to set APSR.GE[flagNo]. Each fn call sets 1 bit.
    969    flagNo: which flag bit to set [3...0]
    970    lowbits_to_ignore:  0 = look at all 32 bits
    971                        8 = look at top 24 bits only
    972                       16 = look at top 16 bits only
    973                       31 = look at the top bit only
    974    e: input value to be evaluated.
    975    The new value is taken from 'e' with the lowest 'lowbits_to_ignore'
    976    masked out.  If the resulting value is zero then the GE flag is
    977    set to 0; any other value sets the flag to 1. */
    978 static void put_GEFLAG32 ( Int flagNo,            /* 0, 1, 2 or 3 */
    979                            Int lowbits_to_ignore, /* 0, 8, 16 or 31   */
    980                            IRExpr* e,             /* Ity_I32 */
    981                            IRTemp condT )
    982 {
    983    vassert( flagNo >= 0 && flagNo <= 3 );
    984    vassert( lowbits_to_ignore == 0  ||
    985             lowbits_to_ignore == 8  ||
    986             lowbits_to_ignore == 16 ||
    987             lowbits_to_ignore == 31 );
    988    IRTemp masked = newTemp(Ity_I32);
    989    assign(masked, binop(Iop_Shr32, e, mkU8(lowbits_to_ignore)));
    990 
    991    switch (flagNo) {
    992       case 0: putMiscReg32(OFFB_GEFLAG0, mkexpr(masked), condT); break;
    993       case 1: putMiscReg32(OFFB_GEFLAG1, mkexpr(masked), condT); break;
    994       case 2: putMiscReg32(OFFB_GEFLAG2, mkexpr(masked), condT); break;
    995       case 3: putMiscReg32(OFFB_GEFLAG3, mkexpr(masked), condT); break;
    996       default: vassert(0);
    997    }
    998 }
    999 
   1000 /* Return the (32-bit, zero-or-nonzero representation scheme) of
   1001    the specified GE flag. */
   1002 static IRExpr* get_GEFLAG32( Int flagNo /* 0, 1, 2, 3 */ )
   1003 {
   1004    switch (flagNo) {
   1005       case 0: return IRExpr_Get( OFFB_GEFLAG0, Ity_I32 );
   1006       case 1: return IRExpr_Get( OFFB_GEFLAG1, Ity_I32 );
   1007       case 2: return IRExpr_Get( OFFB_GEFLAG2, Ity_I32 );
   1008       case 3: return IRExpr_Get( OFFB_GEFLAG3, Ity_I32 );
   1009       default: vassert(0);
   1010    }
   1011 }
   1012 
   1013 /* Set all 4 GE flags from the given 32-bit value as follows: GE 3 and
   1014    2 are set from bit 31 of the value, and GE 1 and 0 are set from bit
   1015    15 of the value.  All other bits are ignored. */
   1016 static void set_GE_32_10_from_bits_31_15 ( IRTemp t32, IRTemp condT )
   1017 {
   1018    IRTemp ge10 = newTemp(Ity_I32);
   1019    IRTemp ge32 = newTemp(Ity_I32);
   1020    assign(ge10, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
   1021    assign(ge32, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
   1022    put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
   1023    put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
   1024    put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
   1025    put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
   1026 }
   1027 
   1028 
   1029 /* Set all 4 GE flags from the given 32-bit value as follows: GE 3
   1030    from bit 31, GE 2 from bit 23, GE 1 from bit 15, and GE0 from
   1031    bit 7.  All other bits are ignored. */
   1032 static void set_GE_3_2_1_0_from_bits_31_23_15_7 ( IRTemp t32, IRTemp condT )
   1033 {
   1034    IRTemp ge0 = newTemp(Ity_I32);
   1035    IRTemp ge1 = newTemp(Ity_I32);
   1036    IRTemp ge2 = newTemp(Ity_I32);
   1037    IRTemp ge3 = newTemp(Ity_I32);
   1038    assign(ge0, binop(Iop_And32, mkexpr(t32), mkU32(0x00000080)));
   1039    assign(ge1, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
   1040    assign(ge2, binop(Iop_And32, mkexpr(t32), mkU32(0x00800000)));
   1041    assign(ge3, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
   1042    put_GEFLAG32( 0, 0, mkexpr(ge0), condT );
   1043    put_GEFLAG32( 1, 0, mkexpr(ge1), condT );
   1044    put_GEFLAG32( 2, 0, mkexpr(ge2), condT );
   1045    put_GEFLAG32( 3, 0, mkexpr(ge3), condT );
   1046 }
   1047 
   1048 
   1049 /* ---------------- FPSCR stuff ---------------- */
   1050 
   1051 /* Generate IR to get hold of the rounding mode bits in FPSCR, and
   1052    convert them to IR format.  Bind the final result to the
   1053    returned temp. */
   1054 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
   1055 {
   1056    /* The ARMvfp encoding for rounding mode bits is:
   1057          00  to nearest
   1058          01  to +infinity
   1059          10  to -infinity
   1060          11  to zero
   1061       We need to convert that to the IR encoding:
   1062          00  to nearest (the default)
   1063          10  to +infinity
   1064          01  to -infinity
   1065          11  to zero
   1066       Which can be done by swapping bits 0 and 1.
   1067       The rmode bits are at 23:22 in FPSCR.
   1068    */
   1069    IRTemp armEncd = newTemp(Ity_I32);
   1070    IRTemp swapped = newTemp(Ity_I32);
   1071    /* Fish FPSCR[23:22] out, and slide to bottom.  Doesn't matter that
   1072       we don't zero out bits 24 and above, since the assignment to
   1073       'swapped' will mask them out anyway. */
   1074    assign(armEncd,
   1075           binop(Iop_Shr32, IRExpr_Get(OFFB_FPSCR, Ity_I32), mkU8(22)));
   1076    /* Now swap them. */
   1077    assign(swapped,
   1078           binop(Iop_Or32,
   1079                 binop(Iop_And32,
   1080                       binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
   1081                       mkU32(2)),
   1082                 binop(Iop_And32,
   1083                       binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
   1084                       mkU32(1))
   1085          ));
   1086    return swapped;
   1087 }
   1088 
   1089 
   1090 /*------------------------------------------------------------*/
   1091 /*--- Helpers for flag handling and conditional insns      ---*/
   1092 /*------------------------------------------------------------*/
   1093 
   1094 static const HChar* name_ARMCondcode ( ARMCondcode cond )
   1095 {
   1096    switch (cond) {
   1097       case ARMCondEQ:  return "{eq}";
   1098       case ARMCondNE:  return "{ne}";
   1099       case ARMCondHS:  return "{hs}";  // or 'cs'
   1100       case ARMCondLO:  return "{lo}";  // or 'cc'
   1101       case ARMCondMI:  return "{mi}";
   1102       case ARMCondPL:  return "{pl}";
   1103       case ARMCondVS:  return "{vs}";
   1104       case ARMCondVC:  return "{vc}";
   1105       case ARMCondHI:  return "{hi}";
   1106       case ARMCondLS:  return "{ls}";
   1107       case ARMCondGE:  return "{ge}";
   1108       case ARMCondLT:  return "{lt}";
   1109       case ARMCondGT:  return "{gt}";
   1110       case ARMCondLE:  return "{le}";
   1111       case ARMCondAL:  return ""; // {al}: is the default
   1112       case ARMCondNV:  return "{nv}";
   1113       default: vpanic("name_ARMCondcode");
   1114    }
   1115 }
   1116 /* and a handy shorthand for it */
   1117 static const HChar* nCC ( ARMCondcode cond ) {
   1118    return name_ARMCondcode(cond);
   1119 }
   1120 
   1121 
   1122 /* Build IR to calculate some particular condition from stored
   1123    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
   1124    Ity_I32, suitable for narrowing.  Although the return type is
   1125    Ity_I32, the returned value is either 0 or 1.  'cond' must be
   1126    :: Ity_I32 and must denote the condition to compute in
   1127    bits 7:4, and be zero everywhere else.
   1128 */
   1129 static IRExpr* mk_armg_calculate_condition_dyn ( IRExpr* cond )
   1130 {
   1131    vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I32);
   1132    /* And 'cond' had better produce a value in which only bits 7:4 are
   1133       nonzero.  However, obviously we can't assert for that. */
   1134 
   1135    /* So what we're constructing for the first argument is
   1136       "(cond << 4) | stored-operation".
   1137       However, as per comments above, 'cond' must be supplied
   1138       pre-shifted to this function.
   1139 
   1140       This pairing scheme requires that the ARM_CC_OP_ values all fit
   1141       in 4 bits.  Hence we are passing a (COND, OP) pair in the lowest
   1142       8 bits of the first argument. */
   1143    IRExpr** args
   1144       = mkIRExprVec_4(
   1145            binop(Iop_Or32, IRExpr_Get(OFFB_CC_OP, Ity_I32), cond),
   1146            IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1147            IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1148            IRExpr_Get(OFFB_CC_NDEP, Ity_I32)
   1149         );
   1150    IRExpr* call
   1151       = mkIRExprCCall(
   1152            Ity_I32,
   1153            0/*regparm*/,
   1154            "armg_calculate_condition", &armg_calculate_condition,
   1155            args
   1156         );
   1157 
   1158    /* Exclude the requested condition, OP and NDEP from definedness
   1159       checking.  We're only interested in DEP1 and DEP2. */
   1160    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1161    return call;
   1162 }
   1163 
   1164 
   1165 /* Build IR to calculate some particular condition from stored
   1166    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
   1167    Ity_I32, suitable for narrowing.  Although the return type is
   1168    Ity_I32, the returned value is either 0 or 1.
   1169 */
   1170 static IRExpr* mk_armg_calculate_condition ( ARMCondcode cond )
   1171 {
   1172   /* First arg is "(cond << 4) | condition".  This requires that the
   1173      ARM_CC_OP_ values all fit in 4 bits.  Hence we are passing a
   1174      (COND, OP) pair in the lowest 8 bits of the first argument. */
   1175    vassert(cond >= 0 && cond <= 15);
   1176    return mk_armg_calculate_condition_dyn( mkU32(cond << 4) );
   1177 }
   1178 
   1179 
   1180 /* Build IR to calculate just the carry flag from stored
   1181    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
   1182    Ity_I32. */
   1183 static IRExpr* mk_armg_calculate_flag_c ( void )
   1184 {
   1185    IRExpr** args
   1186       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
   1187                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1188                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1189                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
   1190    IRExpr* call
   1191       = mkIRExprCCall(
   1192            Ity_I32,
   1193            0/*regparm*/,
   1194            "armg_calculate_flag_c", &armg_calculate_flag_c,
   1195            args
   1196         );
   1197    /* Exclude OP and NDEP from definedness checking.  We're only
   1198       interested in DEP1 and DEP2. */
   1199    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1200    return call;
   1201 }
   1202 
   1203 
   1204 /* Build IR to calculate just the overflow flag from stored
   1205    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
   1206    Ity_I32. */
   1207 static IRExpr* mk_armg_calculate_flag_v ( void )
   1208 {
   1209    IRExpr** args
   1210       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
   1211                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1212                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1213                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
   1214    IRExpr* call
   1215       = mkIRExprCCall(
   1216            Ity_I32,
   1217            0/*regparm*/,
   1218            "armg_calculate_flag_v", &armg_calculate_flag_v,
   1219            args
   1220         );
   1221    /* Exclude OP and NDEP from definedness checking.  We're only
   1222       interested in DEP1 and DEP2. */
   1223    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1224    return call;
   1225 }
   1226 
   1227 
   1228 /* Build IR to calculate N Z C V in bits 31:28 of the
   1229    returned word. */
   1230 static IRExpr* mk_armg_calculate_flags_nzcv ( void )
   1231 {
   1232    IRExpr** args
   1233       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
   1234                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1235                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1236                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
   1237    IRExpr* call
   1238       = mkIRExprCCall(
   1239            Ity_I32,
   1240            0/*regparm*/,
   1241            "armg_calculate_flags_nzcv", &armg_calculate_flags_nzcv,
   1242            args
   1243         );
   1244    /* Exclude OP and NDEP from definedness checking.  We're only
   1245       interested in DEP1 and DEP2. */
   1246    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1247    return call;
   1248 }
   1249 
   1250 static IRExpr* mk_armg_calculate_flag_qc ( IRExpr* resL, IRExpr* resR, Bool Q )
   1251 {
   1252    IRExpr** args1;
   1253    IRExpr** args2;
   1254    IRExpr *call1, *call2, *res;
   1255 
   1256    if (Q) {
   1257       args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(0)),
   1258                               binop(Iop_GetElem32x4, resL, mkU8(1)),
   1259                               binop(Iop_GetElem32x4, resR, mkU8(0)),
   1260                               binop(Iop_GetElem32x4, resR, mkU8(1)) );
   1261       args2 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(2)),
   1262                               binop(Iop_GetElem32x4, resL, mkU8(3)),
   1263                               binop(Iop_GetElem32x4, resR, mkU8(2)),
   1264                               binop(Iop_GetElem32x4, resR, mkU8(3)) );
   1265    } else {
   1266       args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x2, resL, mkU8(0)),
   1267                               binop(Iop_GetElem32x2, resL, mkU8(1)),
   1268                               binop(Iop_GetElem32x2, resR, mkU8(0)),
   1269                               binop(Iop_GetElem32x2, resR, mkU8(1)) );
   1270    }
   1271 
   1272    call1 = mkIRExprCCall(
   1273              Ity_I32,
   1274              0/*regparm*/,
   1275              "armg_calculate_flag_qc", &armg_calculate_flag_qc,
   1276              args1
   1277           );
   1278    if (Q) {
   1279       call2 = mkIRExprCCall(
   1280                 Ity_I32,
   1281                 0/*regparm*/,
   1282                 "armg_calculate_flag_qc", &armg_calculate_flag_qc,
   1283                 args2
   1284              );
   1285    }
   1286    if (Q) {
   1287       res = binop(Iop_Or32, call1, call2);
   1288    } else {
   1289       res = call1;
   1290    }
   1291    return res;
   1292 }
   1293 
   1294 // FIXME: this is named wrongly .. looks like a sticky set of
   1295 // QC, not a write to it.
   1296 static void setFlag_QC ( IRExpr* resL, IRExpr* resR, Bool Q,
   1297                          IRTemp condT )
   1298 {
   1299    putMiscReg32 (OFFB_FPSCR,
   1300                  binop(Iop_Or32,
   1301                        IRExpr_Get(OFFB_FPSCR, Ity_I32),
   1302                        binop(Iop_Shl32,
   1303                              mk_armg_calculate_flag_qc(resL, resR, Q),
   1304                              mkU8(27))),
   1305                  condT);
   1306 }
   1307 
   1308 /* Build IR to conditionally set the flags thunk.  As with putIReg, if
   1309    guard is IRTemp_INVALID then it's unconditional, else it holds a
   1310    condition :: Ity_I32. */
   1311 static
   1312 void setFlags_D1_D2_ND ( UInt cc_op, IRTemp t_dep1,
   1313                          IRTemp t_dep2, IRTemp t_ndep,
   1314                          IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   1315 {
   1316    vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I32));
   1317    vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I32));
   1318    vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I32));
   1319    vassert(cc_op >= ARMG_CC_OP_COPY && cc_op < ARMG_CC_OP_NUMBER);
   1320    if (guardT == IRTemp_INVALID) {
   1321       /* unconditional */
   1322       stmt( IRStmt_Put( OFFB_CC_OP,   mkU32(cc_op) ));
   1323       stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
   1324       stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
   1325       stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
   1326    } else {
   1327       /* conditional */
   1328       IRTemp c1 = newTemp(Ity_I1);
   1329       assign( c1, binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)) );
   1330       stmt( IRStmt_Put(
   1331                OFFB_CC_OP,
   1332                IRExpr_ITE( mkexpr(c1),
   1333                            mkU32(cc_op),
   1334                            IRExpr_Get(OFFB_CC_OP, Ity_I32) ) ));
   1335       stmt( IRStmt_Put(
   1336                OFFB_CC_DEP1,
   1337                IRExpr_ITE( mkexpr(c1),
   1338                            mkexpr(t_dep1),
   1339                            IRExpr_Get(OFFB_CC_DEP1, Ity_I32) ) ));
   1340       stmt( IRStmt_Put(
   1341                OFFB_CC_DEP2,
   1342                IRExpr_ITE( mkexpr(c1),
   1343                            mkexpr(t_dep2),
   1344                            IRExpr_Get(OFFB_CC_DEP2, Ity_I32) ) ));
   1345       stmt( IRStmt_Put(
   1346                OFFB_CC_NDEP,
   1347                IRExpr_ITE( mkexpr(c1),
   1348                            mkexpr(t_ndep),
   1349                            IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ) ));
   1350    }
   1351 }
   1352 
   1353 
   1354 /* Minor variant of the above that sets NDEP to zero (if it
   1355    sets it at all) */
   1356 static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
   1357                              IRTemp t_dep2,
   1358                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   1359 {
   1360    IRTemp z32 = newTemp(Ity_I32);
   1361    assign( z32, mkU32(0) );
   1362    setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
   1363 }
   1364 
   1365 
   1366 /* Minor variant of the above that sets DEP2 to zero (if it
   1367    sets it at all) */
   1368 static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
   1369                              IRTemp t_ndep,
   1370                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   1371 {
   1372    IRTemp z32 = newTemp(Ity_I32);
   1373    assign( z32, mkU32(0) );
   1374    setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
   1375 }
   1376 
   1377 
   1378 /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
   1379    sets them at all) */
   1380 static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
   1381                           IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   1382 {
   1383    IRTemp z32 = newTemp(Ity_I32);
   1384    assign( z32, mkU32(0) );
   1385    setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
   1386 }
   1387 
   1388 
   1389 /* ARM only */
   1390 /* Generate a side-exit to the next instruction, if the given guard
   1391    expression :: Ity_I32 is 0 (note!  the side exit is taken if the
   1392    condition is false!)  This is used to skip over conditional
   1393    instructions which we can't generate straight-line code for, either
   1394    because they are too complex or (more likely) they potentially
   1395    generate exceptions.
   1396 */
   1397 static void mk_skip_over_A32_if_cond_is_false (
   1398                IRTemp guardT /* :: Ity_I32, 0 or 1 */
   1399             )
   1400 {
   1401    ASSERT_IS_ARM;
   1402    vassert(guardT != IRTemp_INVALID);
   1403    vassert(0 == (guest_R15_curr_instr_notENC & 3));
   1404    stmt( IRStmt_Exit(
   1405             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
   1406             Ijk_Boring,
   1407             IRConst_U32(toUInt(guest_R15_curr_instr_notENC + 4)),
   1408             OFFB_R15T
   1409        ));
   1410 }
   1411 
   1412 /* Thumb16 only */
   1413 /* ditto, but jump over a 16-bit thumb insn */
   1414 static void mk_skip_over_T16_if_cond_is_false (
   1415                IRTemp guardT /* :: Ity_I32, 0 or 1 */
   1416             )
   1417 {
   1418    ASSERT_IS_THUMB;
   1419    vassert(guardT != IRTemp_INVALID);
   1420    vassert(0 == (guest_R15_curr_instr_notENC & 1));
   1421    stmt( IRStmt_Exit(
   1422             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
   1423             Ijk_Boring,
   1424             IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 2) | 1)),
   1425             OFFB_R15T
   1426        ));
   1427 }
   1428 
   1429 
   1430 /* Thumb32 only */
   1431 /* ditto, but jump over a 32-bit thumb insn */
   1432 static void mk_skip_over_T32_if_cond_is_false (
   1433                IRTemp guardT /* :: Ity_I32, 0 or 1 */
   1434             )
   1435 {
   1436    ASSERT_IS_THUMB;
   1437    vassert(guardT != IRTemp_INVALID);
   1438    vassert(0 == (guest_R15_curr_instr_notENC & 1));
   1439    stmt( IRStmt_Exit(
   1440             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
   1441             Ijk_Boring,
   1442             IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 4) | 1)),
   1443             OFFB_R15T
   1444        ));
   1445 }
   1446 
   1447 
   1448 /* Thumb16 and Thumb32 only
   1449    Generate a SIGILL followed by a restart of the current instruction
   1450    if the given temp is nonzero. */
   1451 static void gen_SIGILL_T_if_nonzero ( IRTemp t /* :: Ity_I32 */ )
   1452 {
   1453    ASSERT_IS_THUMB;
   1454    vassert(t != IRTemp_INVALID);
   1455    vassert(0 == (guest_R15_curr_instr_notENC & 1));
   1456    stmt(
   1457       IRStmt_Exit(
   1458          binop(Iop_CmpNE32, mkexpr(t), mkU32(0)),
   1459          Ijk_NoDecode,
   1460          IRConst_U32(toUInt(guest_R15_curr_instr_notENC | 1)),
   1461          OFFB_R15T
   1462       )
   1463    );
   1464 }
   1465 
   1466 
   1467 /* Inspect the old_itstate, and generate a SIGILL if it indicates that
   1468    we are currently in an IT block and are not the last in the block.
   1469    This also rolls back guest_ITSTATE to its old value before the exit
   1470    and restores it to its new value afterwards.  This is so that if
   1471    the exit is taken, we have an up to date version of ITSTATE
   1472    available.  Without doing that, we have no hope of making precise
   1473    exceptions work. */
   1474 static void gen_SIGILL_T_if_in_but_NLI_ITBlock (
   1475                IRTemp old_itstate /* :: Ity_I32 */,
   1476                IRTemp new_itstate /* :: Ity_I32 */
   1477             )
   1478 {
   1479    ASSERT_IS_THUMB;
   1480    put_ITSTATE(old_itstate); // backout
   1481    IRTemp guards_for_next3 = newTemp(Ity_I32);
   1482    assign(guards_for_next3,
   1483           binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
   1484    gen_SIGILL_T_if_nonzero(guards_for_next3);
   1485    put_ITSTATE(new_itstate); //restore
   1486 }
   1487 
   1488 
   1489 /* Simpler version of the above, which generates a SIGILL if
   1490    we're anywhere within an IT block. */
   1491 static void gen_SIGILL_T_if_in_ITBlock (
   1492                IRTemp old_itstate /* :: Ity_I32 */,
   1493                IRTemp new_itstate /* :: Ity_I32 */
   1494             )
   1495 {
   1496    put_ITSTATE(old_itstate); // backout
   1497    gen_SIGILL_T_if_nonzero(old_itstate);
   1498    put_ITSTATE(new_itstate); //restore
   1499 }
   1500 
   1501 
   1502 /* Generate an APSR value, from the NZCV thunk, and
   1503    from QFLAG32 and GEFLAG0 .. GEFLAG3. */
   1504 static IRTemp synthesise_APSR ( void )
   1505 {
   1506    IRTemp res1 = newTemp(Ity_I32);
   1507    // Get NZCV
   1508    assign( res1, mk_armg_calculate_flags_nzcv() );
   1509    // OR in the Q value
   1510    IRTemp res2 = newTemp(Ity_I32);
   1511    assign(
   1512       res2,
   1513       binop(Iop_Or32,
   1514             mkexpr(res1),
   1515             binop(Iop_Shl32,
   1516                   unop(Iop_1Uto32,
   1517                        binop(Iop_CmpNE32,
   1518                              mkexpr(get_QFLAG32()),
   1519                              mkU32(0))),
   1520                   mkU8(ARMG_CC_SHIFT_Q)))
   1521    );
   1522    // OR in GE0 .. GE3
   1523    IRExpr* ge0
   1524       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(0), mkU32(0)));
   1525    IRExpr* ge1
   1526       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(1), mkU32(0)));
   1527    IRExpr* ge2
   1528       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(2), mkU32(0)));
   1529    IRExpr* ge3
   1530       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(3), mkU32(0)));
   1531    IRTemp res3 = newTemp(Ity_I32);
   1532    assign(res3,
   1533           binop(Iop_Or32,
   1534                 mkexpr(res2),
   1535                 binop(Iop_Or32,
   1536                       binop(Iop_Or32,
   1537                             binop(Iop_Shl32, ge0, mkU8(16)),
   1538                             binop(Iop_Shl32, ge1, mkU8(17))),
   1539                       binop(Iop_Or32,
   1540                             binop(Iop_Shl32, ge2, mkU8(18)),
   1541                             binop(Iop_Shl32, ge3, mkU8(19))) )));
   1542    return res3;
   1543 }
   1544 
   1545 
   1546 /* and the inverse transformation: given an APSR value,
   1547    set the NZCV thunk, the Q flag, and the GE flags. */
   1548 static void desynthesise_APSR ( Bool write_nzcvq, Bool write_ge,
   1549                                 IRTemp apsrT, IRTemp condT )
   1550 {
   1551    vassert(write_nzcvq || write_ge);
   1552    if (write_nzcvq) {
   1553       // Do NZCV
   1554       IRTemp immT = newTemp(Ity_I32);
   1555       assign(immT, binop(Iop_And32, mkexpr(apsrT), mkU32(0xF0000000)) );
   1556       setFlags_D1(ARMG_CC_OP_COPY, immT, condT);
   1557       // Do Q
   1558       IRTemp qnewT = newTemp(Ity_I32);
   1559       assign(qnewT, binop(Iop_And32, mkexpr(apsrT), mkU32(ARMG_CC_MASK_Q)));
   1560       put_QFLAG32(qnewT, condT);
   1561    }
   1562    if (write_ge) {
   1563       // Do GE3..0
   1564       put_GEFLAG32(0, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<16)),
   1565                    condT);
   1566       put_GEFLAG32(1, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<17)),
   1567                    condT);
   1568       put_GEFLAG32(2, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<18)),
   1569                    condT);
   1570       put_GEFLAG32(3, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<19)),
   1571                    condT);
   1572    }
   1573 }
   1574 
   1575 
   1576 /*------------------------------------------------------------*/
   1577 /*--- Helpers for saturation                               ---*/
   1578 /*------------------------------------------------------------*/
   1579 
   1580 /* FIXME: absolutely the only diff. between (a) armUnsignedSatQ and
   1581    (b) armSignedSatQ is that in (a) the floor is set to 0, whereas in
   1582    (b) the floor is computed from the value of imm5.  these two fnsn
   1583    should be commoned up. */
   1584 
   1585 /* UnsignedSatQ(): 'clamp' each value so it lies between 0 <= x <= (2^N)-1
   1586    Optionally return flag resQ saying whether saturation occurred.
   1587    See definition in manual, section A2.2.1, page 41
   1588    (bits(N), boolean) UnsignedSatQ( integer i, integer N )
   1589    {
   1590      if ( i > (2^N)-1 ) { result = (2^N)-1; saturated = TRUE; }
   1591      elsif ( i < 0 )    { result = 0; saturated = TRUE; }
   1592      else               { result = i; saturated = FALSE; }
   1593      return ( result<N-1:0>, saturated );
   1594    }
   1595 */
   1596 static void armUnsignedSatQ( IRTemp* res,  /* OUT - Ity_I32 */
   1597                              IRTemp* resQ, /* OUT - Ity_I32  */
   1598                              IRTemp regT,  /* value to clamp - Ity_I32 */
   1599                              UInt imm5 )   /* saturation ceiling */
   1600 {
   1601    UInt ceil  = (1 << imm5) - 1;    // (2^imm5)-1
   1602    UInt floor = 0;
   1603 
   1604    IRTemp nd0 = newTemp(Ity_I32);
   1605    IRTemp nd1 = newTemp(Ity_I32);
   1606    IRTemp nd2 = newTemp(Ity_I1);
   1607    IRTemp nd3 = newTemp(Ity_I32);
   1608    IRTemp nd4 = newTemp(Ity_I32);
   1609    IRTemp nd5 = newTemp(Ity_I1);
   1610    IRTemp nd6 = newTemp(Ity_I32);
   1611 
   1612    assign( nd0, mkexpr(regT) );
   1613    assign( nd1, mkU32(ceil) );
   1614    assign( nd2, binop( Iop_CmpLT32S, mkexpr(nd1), mkexpr(nd0) ) );
   1615    assign( nd3, IRExpr_ITE(mkexpr(nd2), mkexpr(nd1), mkexpr(nd0)) );
   1616    assign( nd4, mkU32(floor) );
   1617    assign( nd5, binop( Iop_CmpLT32S, mkexpr(nd3), mkexpr(nd4) ) );
   1618    assign( nd6, IRExpr_ITE(mkexpr(nd5), mkexpr(nd4), mkexpr(nd3)) );
   1619    assign( *res, mkexpr(nd6) );
   1620 
   1621    /* if saturation occurred, then resQ is set to some nonzero value
   1622       if sat did not occur, resQ is guaranteed to be zero. */
   1623    if (resQ) {
   1624       assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
   1625    }
   1626 }
   1627 
   1628 
   1629 /* SignedSatQ(): 'clamp' each value so it lies between  -2^N <= x <= (2^N) - 1
   1630    Optionally return flag resQ saying whether saturation occurred.
   1631    - see definition in manual, section A2.2.1, page 41
   1632    (bits(N), boolean ) SignedSatQ( integer i, integer N )
   1633    {
   1634      if ( i > 2^(N-1) - 1 )    { result = 2^(N-1) - 1; saturated = TRUE; }
   1635      elsif ( i < -(2^(N-1)) )  { result = -(2^(N-1));  saturated = FALSE; }
   1636      else                      { result = i;           saturated = FALSE; }
   1637      return ( result[N-1:0], saturated );
   1638    }
   1639 */
   1640 static void armSignedSatQ( IRTemp regT,    /* value to clamp - Ity_I32 */
   1641                            UInt imm5,      /* saturation ceiling */
   1642                            IRTemp* res,    /* OUT - Ity_I32 */
   1643                            IRTemp* resQ )  /* OUT - Ity_I32  */
   1644 {
   1645    Int ceil  =  (1 << (imm5-1)) - 1;  //  (2^(imm5-1))-1
   1646    Int floor = -(1 << (imm5-1));      // -(2^(imm5-1))
   1647 
   1648    IRTemp nd0 = newTemp(Ity_I32);
   1649    IRTemp nd1 = newTemp(Ity_I32);
   1650    IRTemp nd2 = newTemp(Ity_I1);
   1651    IRTemp nd3 = newTemp(Ity_I32);
   1652    IRTemp nd4 = newTemp(Ity_I32);
   1653    IRTemp nd5 = newTemp(Ity_I1);
   1654    IRTemp nd6 = newTemp(Ity_I32);
   1655 
   1656    assign( nd0, mkexpr(regT) );
   1657    assign( nd1, mkU32(ceil) );
   1658    assign( nd2, binop( Iop_CmpLT32S, mkexpr(nd1), mkexpr(nd0) ) );
   1659    assign( nd3, IRExpr_ITE( mkexpr(nd2), mkexpr(nd1), mkexpr(nd0) ) );
   1660    assign( nd4, mkU32(floor) );
   1661    assign( nd5, binop( Iop_CmpLT32S, mkexpr(nd3), mkexpr(nd4) ) );
   1662    assign( nd6, IRExpr_ITE( mkexpr(nd5), mkexpr(nd4), mkexpr(nd3) ) );
   1663    assign( *res, mkexpr(nd6) );
   1664 
   1665    /* if saturation occurred, then resQ is set to some nonzero value
   1666       if sat did not occur, resQ is guaranteed to be zero. */
   1667    if (resQ) {
   1668      assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
   1669    }
   1670 }
   1671 
   1672 
   1673 /* Compute a value 0 :: I32 or 1 :: I32, indicating whether signed
   1674    overflow occurred for 32-bit addition.  Needs both args and the
   1675    result.  HD p27. */
   1676 static
   1677 IRExpr* signed_overflow_after_Add32 ( IRExpr* resE,
   1678                                       IRTemp argL, IRTemp argR )
   1679 {
   1680    IRTemp res = newTemp(Ity_I32);
   1681    assign(res, resE);
   1682    return
   1683       binop( Iop_Shr32,
   1684              binop( Iop_And32,
   1685                     binop( Iop_Xor32, mkexpr(res), mkexpr(argL) ),
   1686                     binop( Iop_Xor32, mkexpr(res), mkexpr(argR) )),
   1687              mkU8(31) );
   1688 }
   1689 
   1690 /* Similarly .. also from HD p27 .. */
   1691 static
   1692 IRExpr* signed_overflow_after_Sub32 ( IRExpr* resE,
   1693                                       IRTemp argL, IRTemp argR )
   1694 {
   1695    IRTemp res = newTemp(Ity_I32);
   1696    assign(res, resE);
   1697    return
   1698       binop( Iop_Shr32,
   1699              binop( Iop_And32,
   1700                     binop( Iop_Xor32, mkexpr(argL), mkexpr(argR) ),
   1701                     binop( Iop_Xor32, mkexpr(res),  mkexpr(argL) )),
   1702              mkU8(31) );
   1703 }
   1704 
   1705 
   1706 /*------------------------------------------------------------*/
   1707 /*--- Larger helpers                                       ---*/
   1708 /*------------------------------------------------------------*/
   1709 
   1710 /* Compute both the result and new C flag value for a LSL by an imm5
   1711    or by a register operand.  May generate reads of the old C value
   1712    (hence only safe to use before any writes to guest state happen).
   1713    Are factored out so can be used by both ARM and Thumb.
   1714 
   1715    Note that in compute_result_and_C_after_{LSL,LSR,ASR}_by{imm5,reg},
   1716    "res" (the result)  is a.k.a. "shop", shifter operand
   1717    "newC" (the new C)  is a.k.a. "shco", shifter carry out
   1718 
   1719    The calling convention for res and newC is a bit funny.  They could
   1720    be passed by value, but instead are passed by ref.
   1721 
   1722    The C (shco) value computed must be zero in bits 31:1, as the IR
   1723    optimisations for flag handling (guest_arm_spechelper) rely on
   1724    that, and the slow-path handlers (armg_calculate_flags_nzcv) assert
   1725    for it.  Same applies to all these functions that compute shco
   1726    after a shift or rotate, not just this one.
   1727 */
   1728 
   1729 static void compute_result_and_C_after_LSL_by_imm5 (
   1730                /*OUT*/HChar* buf,
   1731                IRTemp* res,
   1732                IRTemp* newC,
   1733                IRTemp rMt, UInt shift_amt, /* operands */
   1734                UInt rM      /* only for debug printing */
   1735             )
   1736 {
   1737    if (shift_amt == 0) {
   1738       if (newC) {
   1739          assign( *newC, mk_armg_calculate_flag_c() );
   1740       }
   1741       assign( *res, mkexpr(rMt) );
   1742       DIS(buf, "r%u", rM);
   1743    } else {
   1744       vassert(shift_amt >= 1 && shift_amt <= 31);
   1745       if (newC) {
   1746          assign( *newC,
   1747                  binop(Iop_And32,
   1748                        binop(Iop_Shr32, mkexpr(rMt),
   1749                                         mkU8(32 - shift_amt)),
   1750                        mkU32(1)));
   1751       }
   1752       assign( *res,
   1753               binop(Iop_Shl32, mkexpr(rMt), mkU8(shift_amt)) );
   1754       DIS(buf, "r%u, LSL #%u", rM, shift_amt);
   1755    }
   1756 }
   1757 
   1758 
   1759 static void compute_result_and_C_after_LSL_by_reg (
   1760                /*OUT*/HChar* buf,
   1761                IRTemp* res,
   1762                IRTemp* newC,
   1763                IRTemp rMt, IRTemp rSt,  /* operands */
   1764                UInt rM,    UInt rS      /* only for debug printing */
   1765             )
   1766 {
   1767    // shift left in range 0 .. 255
   1768    // amt  = rS & 255
   1769    // res  = amt < 32 ?  Rm << amt  : 0
   1770    // newC = amt == 0     ? oldC  :
   1771    //        amt in 1..32 ?  Rm[32-amt]  : 0
   1772    IRTemp amtT = newTemp(Ity_I32);
   1773    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   1774    if (newC) {
   1775       /* mux0X(amt == 0,
   1776                mux0X(amt < 32,
   1777                      0,
   1778                      Rm[(32-amt) & 31]),
   1779                oldC)
   1780       */
   1781       /* About the best you can do is pray that iropt is able
   1782          to nuke most or all of the following junk. */
   1783       IRTemp oldC = newTemp(Ity_I32);
   1784       assign(oldC, mk_armg_calculate_flag_c() );
   1785       assign(
   1786          *newC,
   1787          IRExpr_ITE(
   1788             binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
   1789             mkexpr(oldC),
   1790             IRExpr_ITE(
   1791                binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
   1792                binop(Iop_And32,
   1793                      binop(Iop_Shr32,
   1794                            mkexpr(rMt),
   1795                            unop(Iop_32to8,
   1796                                 binop(Iop_And32,
   1797                                       binop(Iop_Sub32,
   1798                                             mkU32(32),
   1799                                             mkexpr(amtT)),
   1800                                       mkU32(31)
   1801                                 )
   1802                            )
   1803                      ),
   1804                      mkU32(1)
   1805                      ),
   1806                mkU32(0)
   1807             )
   1808          )
   1809       );
   1810    }
   1811    // (Rm << (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
   1812    // Lhs of the & limits the shift to 31 bits, so as to
   1813    // give known IR semantics.  Rhs of the & is all 1s for
   1814    // Rs <= 31 and all 0s for Rs >= 32.
   1815    assign(
   1816       *res,
   1817       binop(
   1818          Iop_And32,
   1819          binop(Iop_Shl32,
   1820                mkexpr(rMt),
   1821                unop(Iop_32to8,
   1822                     binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
   1823          binop(Iop_Sar32,
   1824                binop(Iop_Sub32,
   1825                      mkexpr(amtT),
   1826                      mkU32(32)),
   1827                mkU8(31))));
   1828     DIS(buf, "r%u, LSL r%u", rM, rS);
   1829 }
   1830 
   1831 
   1832 static void compute_result_and_C_after_LSR_by_imm5 (
   1833                /*OUT*/HChar* buf,
   1834                IRTemp* res,
   1835                IRTemp* newC,
   1836                IRTemp rMt, UInt shift_amt, /* operands */
   1837                UInt rM      /* only for debug printing */
   1838             )
   1839 {
   1840    if (shift_amt == 0) {
   1841       // conceptually a 32-bit shift, however:
   1842       // res  = 0
   1843       // newC = Rm[31]
   1844       if (newC) {
   1845          assign( *newC,
   1846                  binop(Iop_And32,
   1847                        binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
   1848                        mkU32(1)));
   1849       }
   1850       assign( *res, mkU32(0) );
   1851       DIS(buf, "r%u, LSR #0(a.k.a. 32)", rM);
   1852    } else {
   1853       // shift in range 1..31
   1854       // res  = Rm >>u shift_amt
   1855       // newC = Rm[shift_amt - 1]
   1856       vassert(shift_amt >= 1 && shift_amt <= 31);
   1857       if (newC) {
   1858          assign( *newC,
   1859                  binop(Iop_And32,
   1860                        binop(Iop_Shr32, mkexpr(rMt),
   1861                                         mkU8(shift_amt - 1)),
   1862                        mkU32(1)));
   1863       }
   1864       assign( *res,
   1865               binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)) );
   1866       DIS(buf, "r%u, LSR #%u", rM, shift_amt);
   1867    }
   1868 }
   1869 
   1870 
   1871 static void compute_result_and_C_after_LSR_by_reg (
   1872                /*OUT*/HChar* buf,
   1873                IRTemp* res,
   1874                IRTemp* newC,
   1875                IRTemp rMt, IRTemp rSt,  /* operands */
   1876                UInt rM,    UInt rS      /* only for debug printing */
   1877             )
   1878 {
   1879    // shift right in range 0 .. 255
   1880    // amt = rS & 255
   1881    // res  = amt < 32 ?  Rm >>u amt  : 0
   1882    // newC = amt == 0     ? oldC  :
   1883    //        amt in 1..32 ?  Rm[amt-1]  : 0
   1884    IRTemp amtT = newTemp(Ity_I32);
   1885    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   1886    if (newC) {
   1887       /* mux0X(amt == 0,
   1888                mux0X(amt < 32,
   1889                      0,
   1890                      Rm[(amt-1) & 31]),
   1891                oldC)
   1892       */
   1893       IRTemp oldC = newTemp(Ity_I32);
   1894       assign(oldC, mk_armg_calculate_flag_c() );
   1895       assign(
   1896          *newC,
   1897          IRExpr_ITE(
   1898             binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
   1899             mkexpr(oldC),
   1900             IRExpr_ITE(
   1901                binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
   1902                binop(Iop_And32,
   1903                      binop(Iop_Shr32,
   1904                            mkexpr(rMt),
   1905                            unop(Iop_32to8,
   1906                                 binop(Iop_And32,
   1907                                       binop(Iop_Sub32,
   1908                                             mkexpr(amtT),
   1909                                             mkU32(1)),
   1910                                       mkU32(31)
   1911                                 )
   1912                            )
   1913                      ),
   1914                      mkU32(1)
   1915                      ),
   1916                mkU32(0)
   1917             )
   1918          )
   1919       );
   1920    }
   1921    // (Rm >>u (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
   1922    // Lhs of the & limits the shift to 31 bits, so as to
   1923    // give known IR semantics.  Rhs of the & is all 1s for
   1924    // Rs <= 31 and all 0s for Rs >= 32.
   1925    assign(
   1926       *res,
   1927       binop(
   1928          Iop_And32,
   1929          binop(Iop_Shr32,
   1930                mkexpr(rMt),
   1931                unop(Iop_32to8,
   1932                     binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
   1933          binop(Iop_Sar32,
   1934                binop(Iop_Sub32,
   1935                      mkexpr(amtT),
   1936                      mkU32(32)),
   1937                mkU8(31))));
   1938     DIS(buf, "r%u, LSR r%u", rM, rS);
   1939 }
   1940 
   1941 
   1942 static void compute_result_and_C_after_ASR_by_imm5 (
   1943                /*OUT*/HChar* buf,
   1944                IRTemp* res,
   1945                IRTemp* newC,
   1946                IRTemp rMt, UInt shift_amt, /* operands */
   1947                UInt rM      /* only for debug printing */
   1948             )
   1949 {
   1950    if (shift_amt == 0) {
   1951       // conceptually a 32-bit shift, however:
   1952       // res  = Rm >>s 31
   1953       // newC = Rm[31]
   1954       if (newC) {
   1955          assign( *newC,
   1956                  binop(Iop_And32,
   1957                        binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
   1958                        mkU32(1)));
   1959       }
   1960       assign( *res, binop(Iop_Sar32, mkexpr(rMt), mkU8(31)) );
   1961       DIS(buf, "r%u, ASR #0(a.k.a. 32)", rM);
   1962    } else {
   1963       // shift in range 1..31
   1964       // res = Rm >>s shift_amt
   1965       // newC = Rm[shift_amt - 1]
   1966       vassert(shift_amt >= 1 && shift_amt <= 31);
   1967       if (newC) {
   1968          assign( *newC,
   1969                  binop(Iop_And32,
   1970                        binop(Iop_Shr32, mkexpr(rMt),
   1971                                         mkU8(shift_amt - 1)),
   1972                        mkU32(1)));
   1973       }
   1974       assign( *res,
   1975               binop(Iop_Sar32, mkexpr(rMt), mkU8(shift_amt)) );
   1976       DIS(buf, "r%u, ASR #%u", rM, shift_amt);
   1977    }
   1978 }
   1979 
   1980 
   1981 static void compute_result_and_C_after_ASR_by_reg (
   1982                /*OUT*/HChar* buf,
   1983                IRTemp* res,
   1984                IRTemp* newC,
   1985                IRTemp rMt, IRTemp rSt,  /* operands */
   1986                UInt rM,    UInt rS      /* only for debug printing */
   1987             )
   1988 {
   1989    // arithmetic shift right in range 0 .. 255
   1990    // amt = rS & 255
   1991    // res  = amt < 32 ?  Rm >>s amt  : Rm >>s 31
   1992    // newC = amt == 0     ? oldC  :
   1993    //        amt in 1..32 ?  Rm[amt-1]  : Rm[31]
   1994    IRTemp amtT = newTemp(Ity_I32);
   1995    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   1996    if (newC) {
   1997       /* mux0X(amt == 0,
   1998                mux0X(amt < 32,
   1999                      Rm[31],
   2000                      Rm[(amt-1) & 31])
   2001                oldC)
   2002       */
   2003       IRTemp oldC = newTemp(Ity_I32);
   2004       assign(oldC, mk_armg_calculate_flag_c() );
   2005       assign(
   2006          *newC,
   2007          IRExpr_ITE(
   2008             binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
   2009             mkexpr(oldC),
   2010             IRExpr_ITE(
   2011                binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
   2012                binop(Iop_And32,
   2013                      binop(Iop_Shr32,
   2014                            mkexpr(rMt),
   2015                            unop(Iop_32to8,
   2016                                 binop(Iop_And32,
   2017                                       binop(Iop_Sub32,
   2018                                             mkexpr(amtT),
   2019                                             mkU32(1)),
   2020                                       mkU32(31)
   2021                                 )
   2022                            )
   2023                      ),
   2024                      mkU32(1)
   2025                      ),
   2026                binop(Iop_And32,
   2027                      binop(Iop_Shr32,
   2028                            mkexpr(rMt),
   2029                            mkU8(31)
   2030                      ),
   2031                      mkU32(1)
   2032                )
   2033             )
   2034          )
   2035       );
   2036    }
   2037    // (Rm >>s (amt <u 32 ? amt : 31))
   2038    assign(
   2039       *res,
   2040       binop(
   2041          Iop_Sar32,
   2042          mkexpr(rMt),
   2043          unop(
   2044             Iop_32to8,
   2045             IRExpr_ITE(
   2046                binop(Iop_CmpLT32U, mkexpr(amtT), mkU32(32)),
   2047                mkexpr(amtT),
   2048                mkU32(31)))));
   2049     DIS(buf, "r%u, ASR r%u", rM, rS);
   2050 }
   2051 
   2052 
   2053 static void compute_result_and_C_after_ROR_by_reg (
   2054                /*OUT*/HChar* buf,
   2055                IRTemp* res,
   2056                IRTemp* newC,
   2057                IRTemp rMt, IRTemp rSt,  /* operands */
   2058                UInt rM,    UInt rS      /* only for debug printing */
   2059             )
   2060 {
   2061    // rotate right in range 0 .. 255
   2062    // amt = rS & 255
   2063    // shop =  Rm `ror` (amt & 31)
   2064    // shco =  amt == 0 ? oldC : Rm[(amt-1) & 31]
   2065    IRTemp amtT = newTemp(Ity_I32);
   2066    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   2067    IRTemp amt5T = newTemp(Ity_I32);
   2068    assign( amt5T, binop(Iop_And32, mkexpr(rSt), mkU32(31)) );
   2069    IRTemp oldC = newTemp(Ity_I32);
   2070    assign(oldC, mk_armg_calculate_flag_c() );
   2071    if (newC) {
   2072       assign(
   2073          *newC,
   2074          IRExpr_ITE(
   2075             binop(Iop_CmpNE32, mkexpr(amtT), mkU32(0)),
   2076             binop(Iop_And32,
   2077                   binop(Iop_Shr32,
   2078                         mkexpr(rMt),
   2079                         unop(Iop_32to8,
   2080                              binop(Iop_And32,
   2081                                    binop(Iop_Sub32,
   2082                                          mkexpr(amtT),
   2083                                          mkU32(1)
   2084                                    ),
   2085                                    mkU32(31)
   2086                              )
   2087                         )
   2088                   ),
   2089                   mkU32(1)
   2090             ),
   2091             mkexpr(oldC)
   2092          )
   2093       );
   2094    }
   2095    assign(
   2096       *res,
   2097       IRExpr_ITE(
   2098          binop(Iop_CmpNE32, mkexpr(amt5T), mkU32(0)),
   2099          binop(Iop_Or32,
   2100                binop(Iop_Shr32,
   2101                      mkexpr(rMt),
   2102                      unop(Iop_32to8, mkexpr(amt5T))
   2103                ),
   2104                binop(Iop_Shl32,
   2105                      mkexpr(rMt),
   2106                      unop(Iop_32to8,
   2107                           binop(Iop_Sub32, mkU32(32), mkexpr(amt5T))
   2108                      )
   2109                )
   2110                ),
   2111          mkexpr(rMt)
   2112       )
   2113    );
   2114    DIS(buf, "r%u, ROR r#%u", rM, rS);
   2115 }
   2116 
   2117 
   2118 /* Generate an expression corresponding to the immediate-shift case of
   2119    a shifter operand.  This is used both for ARM and Thumb2.
   2120 
   2121    Bind it to a temporary, and return that via *res.  If newC is
   2122    non-NULL, also compute a value for the shifter's carry out (in the
   2123    LSB of a word), bind it to a temporary, and return that via *shco.
   2124 
   2125    Generates GETs from the guest state and is therefore not safe to
   2126    use once we start doing PUTs to it, for any given instruction.
   2127 
   2128    'how' is encoded thusly:
   2129       00b LSL,  01b LSR,  10b ASR,  11b ROR
   2130    Most but not all ARM and Thumb integer insns use this encoding.
   2131    Be careful to ensure the right value is passed here.
   2132 */
   2133 static void compute_result_and_C_after_shift_by_imm5 (
   2134                /*OUT*/HChar* buf,
   2135                /*OUT*/IRTemp* res,
   2136                /*OUT*/IRTemp* newC,
   2137                IRTemp  rMt,       /* reg to shift */
   2138                UInt    how,       /* what kind of shift */
   2139                UInt    shift_amt, /* shift amount (0..31) */
   2140                UInt    rM         /* only for debug printing */
   2141             )
   2142 {
   2143    vassert(shift_amt < 32);
   2144    vassert(how < 4);
   2145 
   2146    switch (how) {
   2147 
   2148       case 0:
   2149          compute_result_and_C_after_LSL_by_imm5(
   2150             buf, res, newC, rMt, shift_amt, rM
   2151          );
   2152          break;
   2153 
   2154       case 1:
   2155          compute_result_and_C_after_LSR_by_imm5(
   2156             buf, res, newC, rMt, shift_amt, rM
   2157          );
   2158          break;
   2159 
   2160       case 2:
   2161          compute_result_and_C_after_ASR_by_imm5(
   2162             buf, res, newC, rMt, shift_amt, rM
   2163          );
   2164          break;
   2165 
   2166       case 3:
   2167          if (shift_amt == 0) {
   2168             IRTemp oldcT = newTemp(Ity_I32);
   2169             // rotate right 1 bit through carry (?)
   2170             // RRX -- described at ARM ARM A5-17
   2171             // res  = (oldC << 31) | (Rm >>u 1)
   2172             // newC = Rm[0]
   2173             if (newC) {
   2174                assign( *newC,
   2175                        binop(Iop_And32, mkexpr(rMt), mkU32(1)));
   2176             }
   2177             assign( oldcT, mk_armg_calculate_flag_c() );
   2178             assign( *res,
   2179                     binop(Iop_Or32,
   2180                           binop(Iop_Shl32, mkexpr(oldcT), mkU8(31)),
   2181                           binop(Iop_Shr32, mkexpr(rMt), mkU8(1))) );
   2182             DIS(buf, "r%u, RRX", rM);
   2183          } else {
   2184             // rotate right in range 1..31
   2185             // res  = Rm `ror` shift_amt
   2186             // newC = Rm[shift_amt - 1]
   2187             vassert(shift_amt >= 1 && shift_amt <= 31);
   2188             if (newC) {
   2189                assign( *newC,
   2190                        binop(Iop_And32,
   2191                              binop(Iop_Shr32, mkexpr(rMt),
   2192                                               mkU8(shift_amt - 1)),
   2193                              mkU32(1)));
   2194             }
   2195             assign( *res,
   2196                     binop(Iop_Or32,
   2197                           binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)),
   2198                           binop(Iop_Shl32, mkexpr(rMt),
   2199                                            mkU8(32-shift_amt))));
   2200             DIS(buf, "r%u, ROR #%u", rM, shift_amt);
   2201          }
   2202          break;
   2203 
   2204       default:
   2205          /*NOTREACHED*/
   2206          vassert(0);
   2207    }
   2208 }
   2209 
   2210 
   2211 /* Generate an expression corresponding to the register-shift case of
   2212    a shifter operand.  This is used both for ARM and Thumb2.
   2213 
   2214    Bind it to a temporary, and return that via *res.  If newC is
   2215    non-NULL, also compute a value for the shifter's carry out (in the
   2216    LSB of a word), bind it to a temporary, and return that via *shco.
   2217 
   2218    Generates GETs from the guest state and is therefore not safe to
   2219    use once we start doing PUTs to it, for any given instruction.
   2220 
   2221    'how' is encoded thusly:
   2222       00b LSL,  01b LSR,  10b ASR,  11b ROR
   2223    Most but not all ARM and Thumb integer insns use this encoding.
   2224    Be careful to ensure the right value is passed here.
   2225 */
   2226 static void compute_result_and_C_after_shift_by_reg (
   2227                /*OUT*/HChar*  buf,
   2228                /*OUT*/IRTemp* res,
   2229                /*OUT*/IRTemp* newC,
   2230                IRTemp  rMt,       /* reg to shift */
   2231                UInt    how,       /* what kind of shift */
   2232                IRTemp  rSt,       /* shift amount */
   2233                UInt    rM,        /* only for debug printing */
   2234                UInt    rS         /* only for debug printing */
   2235             )
   2236 {
   2237    vassert(how < 4);
   2238    switch (how) {
   2239       case 0: { /* LSL */
   2240          compute_result_and_C_after_LSL_by_reg(
   2241             buf, res, newC, rMt, rSt, rM, rS
   2242          );
   2243          break;
   2244       }
   2245       case 1: { /* LSR */
   2246          compute_result_and_C_after_LSR_by_reg(
   2247             buf, res, newC, rMt, rSt, rM, rS
   2248          );
   2249          break;
   2250       }
   2251       case 2: { /* ASR */
   2252          compute_result_and_C_after_ASR_by_reg(
   2253             buf, res, newC, rMt, rSt, rM, rS
   2254          );
   2255          break;
   2256       }
   2257       case 3: { /* ROR */
   2258          compute_result_and_C_after_ROR_by_reg(
   2259              buf, res, newC, rMt, rSt, rM, rS
   2260          );
   2261          break;
   2262       }
   2263       default:
   2264          /*NOTREACHED*/
   2265          vassert(0);
   2266    }
   2267 }
   2268 
   2269 
   2270 /* Generate an expression corresponding to a shifter_operand, bind it
   2271    to a temporary, and return that via *shop.  If shco is non-NULL,
   2272    also compute a value for the shifter's carry out (in the LSB of a
   2273    word), bind it to a temporary, and return that via *shco.
   2274 
   2275    If for some reason we can't come up with a shifter operand (missing
   2276    case?  not really a shifter operand?) return False.
   2277 
   2278    Generates GETs from the guest state and is therefore not safe to
   2279    use once we start doing PUTs to it, for any given instruction.
   2280 
   2281    For ARM insns only; not for Thumb.
   2282 */
   2283 static Bool mk_shifter_operand ( UInt insn_25, UInt insn_11_0,
   2284                                  /*OUT*/IRTemp* shop,
   2285                                  /*OUT*/IRTemp* shco,
   2286                                  /*OUT*/HChar* buf )
   2287 {
   2288    UInt insn_4 = (insn_11_0 >> 4) & 1;
   2289    UInt insn_7 = (insn_11_0 >> 7) & 1;
   2290    vassert(insn_25 <= 0x1);
   2291    vassert(insn_11_0 <= 0xFFF);
   2292 
   2293    vassert(shop && *shop == IRTemp_INVALID);
   2294    *shop = newTemp(Ity_I32);
   2295 
   2296    if (shco) {
   2297       vassert(*shco == IRTemp_INVALID);
   2298       *shco = newTemp(Ity_I32);
   2299    }
   2300 
   2301    /* 32-bit immediate */
   2302 
   2303    if (insn_25 == 1) {
   2304       /* immediate: (7:0) rotated right by 2 * (11:8) */
   2305       UInt imm = (insn_11_0 >> 0) & 0xFF;
   2306       UInt rot = 2 * ((insn_11_0 >> 8) & 0xF);
   2307       vassert(rot <= 30);
   2308       imm = ROR32(imm, rot);
   2309       if (shco) {
   2310          if (rot == 0) {
   2311             assign( *shco, mk_armg_calculate_flag_c() );
   2312          } else {
   2313             assign( *shco, mkU32( (imm >> 31) & 1 ) );
   2314          }
   2315       }
   2316       DIS(buf, "#0x%x", imm);
   2317       assign( *shop, mkU32(imm) );
   2318       return True;
   2319    }
   2320 
   2321    /* Shift/rotate by immediate */
   2322 
   2323    if (insn_25 == 0 && insn_4 == 0) {
   2324       /* Rm (3:0) shifted (6:5) by immediate (11:7) */
   2325       UInt shift_amt = (insn_11_0 >> 7) & 0x1F;
   2326       UInt rM        = (insn_11_0 >> 0) & 0xF;
   2327       UInt how       = (insn_11_0 >> 5) & 3;
   2328       /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
   2329       IRTemp rMt = newTemp(Ity_I32);
   2330       assign(rMt, getIRegA(rM));
   2331 
   2332       vassert(shift_amt <= 31);
   2333 
   2334       compute_result_and_C_after_shift_by_imm5(
   2335          buf, shop, shco, rMt, how, shift_amt, rM
   2336       );
   2337       return True;
   2338    }
   2339 
   2340    /* Shift/rotate by register */
   2341    if (insn_25 == 0 && insn_4 == 1) {
   2342       /* Rm (3:0) shifted (6:5) by Rs (11:8) */
   2343       UInt rM  = (insn_11_0 >> 0) & 0xF;
   2344       UInt rS  = (insn_11_0 >> 8) & 0xF;
   2345       UInt how = (insn_11_0 >> 5) & 3;
   2346       /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
   2347       IRTemp rMt = newTemp(Ity_I32);
   2348       IRTemp rSt = newTemp(Ity_I32);
   2349 
   2350       if (insn_7 == 1)
   2351          return False; /* not really a shifter operand */
   2352 
   2353       assign(rMt, getIRegA(rM));
   2354       assign(rSt, getIRegA(rS));
   2355 
   2356       compute_result_and_C_after_shift_by_reg(
   2357          buf, shop, shco, rMt, how, rSt, rM, rS
   2358       );
   2359       return True;
   2360    }
   2361 
   2362    vex_printf("mk_shifter_operand(0x%x,0x%x)\n", insn_25, insn_11_0 );
   2363    return False;
   2364 }
   2365 
   2366 
   2367 /* ARM only */
   2368 static
   2369 IRExpr* mk_EA_reg_plusminus_imm12 ( UInt rN, UInt bU, UInt imm12,
   2370                                     /*OUT*/HChar* buf )
   2371 {
   2372    vassert(rN < 16);
   2373    vassert(bU < 2);
   2374    vassert(imm12 < 0x1000);
   2375    HChar opChar = bU == 1 ? '+' : '-';
   2376    DIS(buf, "[r%u, #%c%u]", rN, opChar, imm12);
   2377    return
   2378       binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
   2379              getIRegA(rN),
   2380              mkU32(imm12) );
   2381 }
   2382 
   2383 
   2384 /* ARM only.
   2385    NB: This is "DecodeImmShift" in newer versions of the the ARM ARM.
   2386 */
   2387 static
   2388 IRExpr* mk_EA_reg_plusminus_shifted_reg ( UInt rN, UInt bU, UInt rM,
   2389                                           UInt sh2, UInt imm5,
   2390                                           /*OUT*/HChar* buf )
   2391 {
   2392    vassert(rN < 16);
   2393    vassert(bU < 2);
   2394    vassert(rM < 16);
   2395    vassert(sh2 < 4);
   2396    vassert(imm5 < 32);
   2397    HChar   opChar = bU == 1 ? '+' : '-';
   2398    IRExpr* index  = NULL;
   2399    switch (sh2) {
   2400       case 0: /* LSL */
   2401          /* imm5 can be in the range 0 .. 31 inclusive. */
   2402          index = binop(Iop_Shl32, getIRegA(rM), mkU8(imm5));
   2403          DIS(buf, "[r%u, %c r%u LSL #%u]", rN, opChar, rM, imm5);
   2404          break;
   2405       case 1: /* LSR */
   2406          if (imm5 == 0) {
   2407             index = mkU32(0);
   2408             vassert(0); // ATC
   2409          } else {
   2410             index = binop(Iop_Shr32, getIRegA(rM), mkU8(imm5));
   2411          }
   2412          DIS(buf, "[r%u, %cr%u, LSR #%u]",
   2413                   rN, opChar, rM, imm5 == 0 ? 32 : imm5);
   2414          break;
   2415       case 2: /* ASR */
   2416          /* Doesn't this just mean that the behaviour with imm5 == 0
   2417             is the same as if it had been 31 ? */
   2418          if (imm5 == 0) {
   2419             index = binop(Iop_Sar32, getIRegA(rM), mkU8(31));
   2420             vassert(0); // ATC
   2421          } else {
   2422             index = binop(Iop_Sar32, getIRegA(rM), mkU8(imm5));
   2423          }
   2424          DIS(buf, "[r%u, %cr%u, ASR #%u]",
   2425                   rN, opChar, rM, imm5 == 0 ? 32 : imm5);
   2426          break;
   2427       case 3: /* ROR or RRX */
   2428          if (imm5 == 0) {
   2429             IRTemp rmT    = newTemp(Ity_I32);
   2430             IRTemp cflagT = newTemp(Ity_I32);
   2431             assign(rmT, getIRegA(rM));
   2432             assign(cflagT, mk_armg_calculate_flag_c());
   2433             index = binop(Iop_Or32,
   2434                           binop(Iop_Shl32, mkexpr(cflagT), mkU8(31)),
   2435                           binop(Iop_Shr32, mkexpr(rmT), mkU8(1)));
   2436             DIS(buf, "[r%u, %cr%u, RRX]", rN, opChar, rM);
   2437          } else {
   2438             IRTemp rmT = newTemp(Ity_I32);
   2439             assign(rmT, getIRegA(rM));
   2440             vassert(imm5 >= 1 && imm5 <= 31);
   2441             index = binop(Iop_Or32,
   2442                           binop(Iop_Shl32, mkexpr(rmT), mkU8(32-imm5)),
   2443                           binop(Iop_Shr32, mkexpr(rmT), mkU8(imm5)));
   2444             DIS(buf, "[r%u, %cr%u, ROR #%u]", rN, opChar, rM, imm5);
   2445          }
   2446          break;
   2447       default:
   2448          vassert(0);
   2449    }
   2450    vassert(index);
   2451    return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
   2452                 getIRegA(rN), index);
   2453 }
   2454 
   2455 
   2456 /* ARM only */
   2457 static
   2458 IRExpr* mk_EA_reg_plusminus_imm8 ( UInt rN, UInt bU, UInt imm8,
   2459                                    /*OUT*/HChar* buf )
   2460 {
   2461    vassert(rN < 16);
   2462    vassert(bU < 2);
   2463    vassert(imm8 < 0x100);
   2464    HChar opChar = bU == 1 ? '+' : '-';
   2465    DIS(buf, "[r%u, #%c%u]", rN, opChar, imm8);
   2466    return
   2467       binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
   2468              getIRegA(rN),
   2469              mkU32(imm8) );
   2470 }
   2471 
   2472 
   2473 /* ARM only */
   2474 static
   2475 IRExpr* mk_EA_reg_plusminus_reg ( UInt rN, UInt bU, UInt rM,
   2476                                   /*OUT*/HChar* buf )
   2477 {
   2478    vassert(rN < 16);
   2479    vassert(bU < 2);
   2480    vassert(rM < 16);
   2481    HChar   opChar = bU == 1 ? '+' : '-';
   2482    IRExpr* index  = getIRegA(rM);
   2483    DIS(buf, "[r%u, %c r%u]", rN, opChar, rM);
   2484    return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
   2485                 getIRegA(rN), index);
   2486 }
   2487 
   2488 
   2489 /* irRes :: Ity_I32 holds a floating point comparison result encoded
   2490    as an IRCmpF64Result.  Generate code to convert it to an
   2491    ARM-encoded (N,Z,C,V) group in the lowest 4 bits of an I32 value.
   2492    Assign a new temp to hold that value, and return the temp. */
   2493 static
   2494 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes )
   2495 {
   2496    IRTemp ix       = newTemp(Ity_I32);
   2497    IRTemp termL    = newTemp(Ity_I32);
   2498    IRTemp termR    = newTemp(Ity_I32);
   2499    IRTemp nzcv     = newTemp(Ity_I32);
   2500 
   2501    /* This is where the fun starts.  We have to convert 'irRes' from
   2502       an IR-convention return result (IRCmpF64Result) to an
   2503       ARM-encoded (N,Z,C,V) group.  The final result is in the bottom
   2504       4 bits of 'nzcv'. */
   2505    /* Map compare result from IR to ARM(nzcv) */
   2506    /*
   2507       FP cmp result | IR   | ARM(nzcv)
   2508       --------------------------------
   2509       UN              0x45   0011
   2510       LT              0x01   1000
   2511       GT              0x00   0010
   2512       EQ              0x40   0110
   2513    */
   2514    /* Now since you're probably wondering WTF ..
   2515 
   2516       ix fishes the useful bits out of the IR value, bits 6 and 0, and
   2517       places them side by side, giving a number which is 0, 1, 2 or 3.
   2518 
   2519       termL is a sequence cooked up by GNU superopt.  It converts ix
   2520          into an almost correct value NZCV value (incredibly), except
   2521          for the case of UN, where it produces 0100 instead of the
   2522          required 0011.
   2523 
   2524       termR is therefore a correction term, also computed from ix.  It
   2525          is 1 in the UN case and 0 for LT, GT and UN.  Hence, to get
   2526          the final correct value, we subtract termR from termL.
   2527 
   2528       Don't take my word for it.  There's a test program at the bottom
   2529       of this file, to try this out with.
   2530    */
   2531    assign(
   2532       ix,
   2533       binop(Iop_Or32,
   2534             binop(Iop_And32,
   2535                   binop(Iop_Shr32, mkexpr(irRes), mkU8(5)),
   2536                   mkU32(3)),
   2537             binop(Iop_And32, mkexpr(irRes), mkU32(1))));
   2538 
   2539    assign(
   2540       termL,
   2541       binop(Iop_Add32,
   2542             binop(Iop_Shr32,
   2543                   binop(Iop_Sub32,
   2544                         binop(Iop_Shl32,
   2545                               binop(Iop_Xor32, mkexpr(ix), mkU32(1)),
   2546                               mkU8(30)),
   2547                         mkU32(1)),
   2548                   mkU8(29)),
   2549             mkU32(1)));
   2550 
   2551    assign(
   2552       termR,
   2553       binop(Iop_And32,
   2554             binop(Iop_And32,
   2555                   mkexpr(ix),
   2556                   binop(Iop_Shr32, mkexpr(ix), mkU8(1))),
   2557             mkU32(1)));
   2558 
   2559    assign(nzcv, binop(Iop_Sub32, mkexpr(termL), mkexpr(termR)));
   2560    return nzcv;
   2561 }
   2562 
   2563 
   2564 /* Thumb32 only.  This is "ThumbExpandImm" in the ARM ARM.  If
   2565    updatesC is non-NULL, a boolean is written to it indicating whether
   2566    or not the C flag is updated, as per ARM ARM "ThumbExpandImm_C".
   2567 */
   2568 static UInt thumbExpandImm ( Bool* updatesC,
   2569                              UInt imm1, UInt imm3, UInt imm8 )
   2570 {
   2571    vassert(imm1 < (1<<1));
   2572    vassert(imm3 < (1<<3));
   2573    vassert(imm8 < (1<<8));
   2574    UInt i_imm3_a = (imm1 << 4) | (imm3 << 1) | ((imm8 >> 7) & 1);
   2575    UInt abcdefgh = imm8;
   2576    UInt lbcdefgh = imm8 | 0x80;
   2577    if (updatesC) {
   2578       *updatesC = i_imm3_a >= 8;
   2579    }
   2580    switch (i_imm3_a) {
   2581       case 0: case 1:
   2582          return abcdefgh;
   2583       case 2: case 3:
   2584          return (abcdefgh << 16) | abcdefgh;
   2585       case 4: case 5:
   2586          return (abcdefgh << 24) | (abcdefgh << 8);
   2587       case 6: case 7:
   2588          return (abcdefgh << 24) | (abcdefgh << 16)
   2589                 | (abcdefgh << 8) | abcdefgh;
   2590       case 8 ... 31:
   2591          return lbcdefgh << (32 - i_imm3_a);
   2592       default:
   2593          break;
   2594    }
   2595    /*NOTREACHED*/vassert(0);
   2596 }
   2597 
   2598 
   2599 /* Version of thumbExpandImm where we simply feed it the
   2600    instruction halfwords (the lowest addressed one is I0). */
   2601 static UInt thumbExpandImm_from_I0_I1 ( Bool* updatesC,
   2602                                         UShort i0s, UShort i1s )
   2603 {
   2604    UInt i0    = (UInt)i0s;
   2605    UInt i1    = (UInt)i1s;
   2606    UInt imm1  = SLICE_UInt(i0,10,10);
   2607    UInt imm3  = SLICE_UInt(i1,14,12);
   2608    UInt imm8  = SLICE_UInt(i1,7,0);
   2609    return thumbExpandImm(updatesC, imm1, imm3, imm8);
   2610 }
   2611 
   2612 
   2613 /* Thumb16 only.  Given the firstcond and mask fields from an IT
   2614    instruction, compute the 32-bit ITSTATE value implied, as described
   2615    in libvex_guest_arm.h.  This is not the ARM ARM representation.
   2616    Also produce the t/e chars for the 2nd, 3rd, 4th insns, for
   2617    disassembly printing.  Returns False if firstcond or mask
   2618    denote something invalid.
   2619 
   2620    The number and conditions for the instructions to be
   2621    conditionalised depend on firstcond and mask:
   2622 
   2623    mask      cond 1    cond 2      cond 3      cond 4
   2624 
   2625    1000      fc[3:0]
   2626    x100      fc[3:0]   fc[3:1]:x
   2627    xy10      fc[3:0]   fc[3:1]:x   fc[3:1]:y
   2628    xyz1      fc[3:0]   fc[3:1]:x   fc[3:1]:y   fc[3:1]:z
   2629 
   2630    The condition fields are assembled in *itstate backwards (cond 4 at
   2631    the top, cond 1 at the bottom).  Conditions are << 4'd and then
   2632    ^0xE'd, and those fields that correspond to instructions in the IT
   2633    block are tagged with a 1 bit.
   2634 */
   2635 static Bool compute_ITSTATE ( /*OUT*/UInt*  itstate,
   2636                               /*OUT*/HChar* ch1,
   2637                               /*OUT*/HChar* ch2,
   2638                               /*OUT*/HChar* ch3,
   2639                               UInt firstcond, UInt mask )
   2640 {
   2641    vassert(firstcond <= 0xF);
   2642    vassert(mask <= 0xF);
   2643    *itstate = 0;
   2644    *ch1 = *ch2 = *ch3 = '.';
   2645    if (mask == 0)
   2646       return False; /* the logic below actually ensures this anyway,
   2647                        but clearer to make it explicit. */
   2648    if (firstcond == 0xF)
   2649       return False; /* NV is not allowed */
   2650    if (firstcond == 0xE && popcount32(mask) != 1)
   2651       return False; /* if firstcond is AL then all the rest must be too */
   2652 
   2653    UInt m3 = (mask >> 3) & 1;
   2654    UInt m2 = (mask >> 2) & 1;
   2655    UInt m1 = (mask >> 1) & 1;
   2656    UInt m0 = (mask >> 0) & 1;
   2657 
   2658    UInt fc = (firstcond << 4) | 1/*in-IT-block*/;
   2659    UInt ni = (0xE/*AL*/ << 4) | 0/*not-in-IT-block*/;
   2660 
   2661    if (m3 == 1 && (m2|m1|m0) == 0) {
   2662       *itstate = (ni << 24) | (ni << 16) | (ni << 8) | fc;
   2663       *itstate ^= 0xE0E0E0E0;
   2664       return True;
   2665    }
   2666 
   2667    if (m2 == 1 && (m1|m0) == 0) {
   2668       *itstate = (ni << 24) | (ni << 16) | (setbit32(fc, 4, m3) << 8) | fc;
   2669       *itstate ^= 0xE0E0E0E0;
   2670       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
   2671       return True;
   2672    }
   2673 
   2674    if (m1 == 1 && m0 == 0) {
   2675       *itstate = (ni << 24)
   2676                  | (setbit32(fc, 4, m2) << 16)
   2677                  | (setbit32(fc, 4, m3) << 8) | fc;
   2678       *itstate ^= 0xE0E0E0E0;
   2679       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
   2680       *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
   2681       return True;
   2682    }
   2683 
   2684    if (m0 == 1) {
   2685       *itstate = (setbit32(fc, 4, m1) << 24)
   2686                  | (setbit32(fc, 4, m2) << 16)
   2687                  | (setbit32(fc, 4, m3) << 8) | fc;
   2688       *itstate ^= 0xE0E0E0E0;
   2689       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
   2690       *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
   2691       *ch3 = m1 == (firstcond & 1) ? 't' : 'e';
   2692       return True;
   2693    }
   2694 
   2695    return False;
   2696 }
   2697 
   2698 
   2699 /* Generate IR to do 32-bit bit reversal, a la Hacker's Delight
   2700    Chapter 7 Section 1. */
   2701 static IRTemp gen_BITREV ( IRTemp x0 )
   2702 {
   2703    IRTemp x1 = newTemp(Ity_I32);
   2704    IRTemp x2 = newTemp(Ity_I32);
   2705    IRTemp x3 = newTemp(Ity_I32);
   2706    IRTemp x4 = newTemp(Ity_I32);
   2707    IRTemp x5 = newTemp(Ity_I32);
   2708    UInt   c1 = 0x55555555;
   2709    UInt   c2 = 0x33333333;
   2710    UInt   c3 = 0x0F0F0F0F;
   2711    UInt   c4 = 0x00FF00FF;
   2712    UInt   c5 = 0x0000FFFF;
   2713    assign(x1,
   2714           binop(Iop_Or32,
   2715                 binop(Iop_Shl32,
   2716                       binop(Iop_And32, mkexpr(x0), mkU32(c1)),
   2717                       mkU8(1)),
   2718                 binop(Iop_Shr32,
   2719                       binop(Iop_And32, mkexpr(x0), mkU32(~c1)),
   2720                       mkU8(1))
   2721    ));
   2722    assign(x2,
   2723           binop(Iop_Or32,
   2724                 binop(Iop_Shl32,
   2725                       binop(Iop_And32, mkexpr(x1), mkU32(c2)),
   2726                       mkU8(2)),
   2727                 binop(Iop_Shr32,
   2728                       binop(Iop_And32, mkexpr(x1), mkU32(~c2)),
   2729                       mkU8(2))
   2730    ));
   2731    assign(x3,
   2732           binop(Iop_Or32,
   2733                 binop(Iop_Shl32,
   2734                       binop(Iop_And32, mkexpr(x2), mkU32(c3)),
   2735                       mkU8(4)),
   2736                 binop(Iop_Shr32,
   2737                       binop(Iop_And32, mkexpr(x2), mkU32(~c3)),
   2738                       mkU8(4))
   2739    ));
   2740    assign(x4,
   2741           binop(Iop_Or32,
   2742                 binop(Iop_Shl32,
   2743                       binop(Iop_And32, mkexpr(x3), mkU32(c4)),
   2744                       mkU8(8)),
   2745                 binop(Iop_Shr32,
   2746                       binop(Iop_And32, mkexpr(x3), mkU32(~c4)),
   2747                       mkU8(8))
   2748    ));
   2749    assign(x5,
   2750           binop(Iop_Or32,
   2751                 binop(Iop_Shl32,
   2752                       binop(Iop_And32, mkexpr(x4), mkU32(c5)),
   2753                       mkU8(16)),
   2754                 binop(Iop_Shr32,
   2755                       binop(Iop_And32, mkexpr(x4), mkU32(~c5)),
   2756                       mkU8(16))
   2757    ));
   2758    return x5;
   2759 }
   2760 
   2761 
   2762 /* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
   2763    0:1:2:3 (aka byte-swap). */
   2764 static IRTemp gen_REV ( IRTemp arg )
   2765 {
   2766    IRTemp res = newTemp(Ity_I32);
   2767    assign(res,
   2768           binop(Iop_Or32,
   2769                 binop(Iop_Shl32, mkexpr(arg), mkU8(24)),
   2770           binop(Iop_Or32,
   2771                 binop(Iop_And32, binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
   2772                                  mkU32(0x00FF0000)),
   2773           binop(Iop_Or32,
   2774                 binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
   2775                                        mkU32(0x0000FF00)),
   2776                 binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(24)),
   2777                                        mkU32(0x000000FF) )
   2778    ))));
   2779    return res;
   2780 }
   2781 
   2782 
   2783 /* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
   2784    2:3:0:1 (swap within lo and hi halves). */
   2785 static IRTemp gen_REV16 ( IRTemp arg )
   2786 {
   2787    IRTemp res = newTemp(Ity_I32);
   2788    assign(res,
   2789           binop(Iop_Or32,
   2790                 binop(Iop_And32,
   2791                       binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
   2792                       mkU32(0xFF00FF00)),
   2793                 binop(Iop_And32,
   2794                       binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
   2795                       mkU32(0x00FF00FF))));
   2796    return res;
   2797 }
   2798 
   2799 
   2800 /*------------------------------------------------------------*/
   2801 /*--- Advanced SIMD (NEON) instructions                    ---*/
   2802 /*------------------------------------------------------------*/
   2803 
   2804 /*------------------------------------------------------------*/
   2805 /*--- NEON data processing                                 ---*/
   2806 /*------------------------------------------------------------*/
   2807 
   2808 /* For all NEON DP ops, we use the normal scheme to handle conditional
   2809    writes to registers -- pass in condT and hand that on to the
   2810    put*Reg functions.  In ARM mode condT is always IRTemp_INVALID
   2811    since NEON is unconditional for ARM.  In Thumb mode condT is
   2812    derived from the ITSTATE shift register in the normal way. */
   2813 
   2814 static
   2815 UInt get_neon_d_regno(UInt theInstr)
   2816 {
   2817    UInt x = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
   2818    if (theInstr & 0x40) {
   2819       if (x & 1) {
   2820          x = x + 0x100;
   2821       } else {
   2822          x = x >> 1;
   2823       }
   2824    }
   2825    return x;
   2826 }
   2827 
   2828 static
   2829 UInt get_neon_n_regno(UInt theInstr)
   2830 {
   2831    UInt x = ((theInstr >> 3) & 0x10) | ((theInstr >> 16) & 0xF);
   2832    if (theInstr & 0x40) {
   2833       if (x & 1) {
   2834          x = x + 0x100;
   2835       } else {
   2836          x = x >> 1;
   2837       }
   2838    }
   2839    return x;
   2840 }
   2841 
   2842 static
   2843 UInt get_neon_m_regno(UInt theInstr)
   2844 {
   2845    UInt x = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
   2846    if (theInstr & 0x40) {
   2847       if (x & 1) {
   2848          x = x + 0x100;
   2849       } else {
   2850          x = x >> 1;
   2851       }
   2852    }
   2853    return x;
   2854 }
   2855 
   2856 static
   2857 Bool dis_neon_vext ( UInt theInstr, IRTemp condT )
   2858 {
   2859    UInt dreg = get_neon_d_regno(theInstr);
   2860    UInt mreg = get_neon_m_regno(theInstr);
   2861    UInt nreg = get_neon_n_regno(theInstr);
   2862    UInt imm4 = (theInstr >> 8) & 0xf;
   2863    UInt Q = (theInstr >> 6) & 1;
   2864    HChar reg_t = Q ? 'q' : 'd';
   2865 
   2866    if (Q) {
   2867       putQReg(dreg, triop(Iop_SliceV128, /*hiV128*/getQReg(mreg),
   2868                           /*loV128*/getQReg(nreg), mkU8(imm4)), condT);
   2869    } else {
   2870       putDRegI64(dreg, triop(Iop_Slice64, /*hiI64*/getDRegI64(mreg),
   2871                              /*loI64*/getDRegI64(nreg), mkU8(imm4)), condT);
   2872    }
   2873    DIP("vext.8 %c%d, %c%d, %c%d, #%d\n", reg_t, dreg, reg_t, nreg,
   2874                                          reg_t, mreg, imm4);
   2875    return True;
   2876 }
   2877 
   2878 /* Generate specific vector FP binary ops, possibly with a fake
   2879    rounding mode as required by the primop. */
   2880 static
   2881 IRExpr* binop_w_fake_RM ( IROp op, IRExpr* argL, IRExpr* argR )
   2882 {
   2883    switch (op) {
   2884       case Iop_Add32Fx4:
   2885       case Iop_Sub32Fx4:
   2886       case Iop_Mul32Fx4:
   2887          return triop(op, get_FAKE_roundingmode(), argL, argR );
   2888       case Iop_Add32x4: case Iop_Add16x8:
   2889       case Iop_Sub32x4: case Iop_Sub16x8:
   2890       case Iop_Mul32x4: case Iop_Mul16x8:
   2891       case Iop_Mul32x2: case Iop_Mul16x4:
   2892       case Iop_Add32Fx2:
   2893       case Iop_Sub32Fx2:
   2894       case Iop_Mul32Fx2:
   2895       case Iop_PwAdd32Fx2:
   2896          return binop(op, argL, argR);
   2897       default:
   2898         ppIROp(op);
   2899         vassert(0);
   2900    }
   2901 }
   2902 
   2903 /* VTBL, VTBX */
   2904 static
   2905 Bool dis_neon_vtb ( UInt theInstr, IRTemp condT )
   2906 {
   2907    UInt op = (theInstr >> 6) & 1;
   2908    UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
   2909    UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
   2910    UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
   2911    UInt len = (theInstr >> 8) & 3;
   2912    Int i;
   2913    IROp cmp;
   2914    ULong imm;
   2915    IRTemp arg_l;
   2916    IRTemp old_mask, new_mask, cur_mask;
   2917    IRTemp old_res, new_res;
   2918    IRTemp old_arg, new_arg;
   2919 
   2920    if (dreg >= 0x100 || mreg >= 0x100 || nreg >= 0x100)
   2921       return False;
   2922    if (nreg + len > 31)
   2923       return False;
   2924 
   2925    cmp = Iop_CmpGT8Ux8;
   2926 
   2927    old_mask = newTemp(Ity_I64);
   2928    old_res = newTemp(Ity_I64);
   2929    old_arg = newTemp(Ity_I64);
   2930    assign(old_mask, mkU64(0));
   2931    assign(old_res, mkU64(0));
   2932    assign(old_arg, getDRegI64(mreg));
   2933    imm = 8;
   2934    imm = (imm <<  8) | imm;
   2935    imm = (imm << 16) | imm;
   2936    imm = (imm << 32) | imm;
   2937 
   2938    for (i = 0; i <= len; i++) {
   2939       arg_l = newTemp(Ity_I64);
   2940       new_mask = newTemp(Ity_I64);
   2941       cur_mask = newTemp(Ity_I64);
   2942       new_res = newTemp(Ity_I64);
   2943       new_arg = newTemp(Ity_I64);
   2944       assign(arg_l, getDRegI64(nreg+i));
   2945       assign(new_arg, binop(Iop_Sub8x8, mkexpr(old_arg), mkU64(imm)));
   2946       assign(cur_mask, binop(cmp, mkU64(imm), mkexpr(old_arg)));
   2947       assign(new_mask, binop(Iop_Or64, mkexpr(old_mask), mkexpr(cur_mask)));
   2948       assign(new_res, binop(Iop_Or64,
   2949                             mkexpr(old_res),
   2950                             binop(Iop_And64,
   2951                                   binop(Iop_Perm8x8,
   2952                                         mkexpr(arg_l),
   2953                                         binop(Iop_And64,
   2954                                               mkexpr(old_arg),
   2955                                               mkexpr(cur_mask))),
   2956                                   mkexpr(cur_mask))));
   2957 
   2958       old_arg = new_arg;
   2959       old_mask = new_mask;
   2960       old_res = new_res;
   2961    }
   2962    if (op) {
   2963       new_res = newTemp(Ity_I64);
   2964       assign(new_res, binop(Iop_Or64,
   2965                             binop(Iop_And64,
   2966                                   getDRegI64(dreg),
   2967                                   unop(Iop_Not64, mkexpr(old_mask))),
   2968                             mkexpr(old_res)));
   2969       old_res = new_res;
   2970    }
   2971 
   2972    putDRegI64(dreg, mkexpr(old_res), condT);
   2973    DIP("vtb%c.8 d%u, {", op ? 'x' : 'l', dreg);
   2974    if (len > 0) {
   2975       DIP("d%u-d%u", nreg, nreg + len);
   2976    } else {
   2977       DIP("d%u", nreg);
   2978    }
   2979    DIP("}, d%u\n", mreg);
   2980    return True;
   2981 }
   2982 
   2983 /* VDUP (scalar)  */
   2984 static
   2985 Bool dis_neon_vdup ( UInt theInstr, IRTemp condT )
   2986 {
   2987    UInt Q = (theInstr >> 6) & 1;
   2988    UInt dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
   2989    UInt mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
   2990    UInt imm4 = (theInstr >> 16) & 0xF;
   2991    UInt index;
   2992    UInt size;
   2993    IRTemp arg_m;
   2994    IRTemp res;
   2995    IROp op, op2;
   2996 
   2997    if ((imm4 == 0) || (imm4 == 8))
   2998       return False;
   2999    if ((Q == 1) && ((dreg & 1) == 1))
   3000       return False;
   3001    if (Q)
   3002       dreg >>= 1;
   3003    arg_m = newTemp(Ity_I64);
   3004    assign(arg_m, getDRegI64(mreg));
   3005    if (Q)
   3006       res = newTemp(Ity_V128);
   3007    else
   3008       res = newTemp(Ity_I64);
   3009    if ((imm4 & 1) == 1) {
   3010       op = Q ? Iop_Dup8x16 : Iop_Dup8x8;
   3011       op2 = Iop_GetElem8x8;
   3012       index = imm4 >> 1;
   3013       size = 8;
   3014    } else if ((imm4 & 3) == 2) {
   3015       op = Q ? Iop_Dup16x8 : Iop_Dup16x4;
   3016       op2 = Iop_GetElem16x4;
   3017       index = imm4 >> 2;
   3018       size = 16;
   3019    } else if ((imm4 & 7) == 4) {
   3020       op = Q ? Iop_Dup32x4 : Iop_Dup32x2;
   3021       op2 = Iop_GetElem32x2;
   3022       index = imm4 >> 3;
   3023       size = 32;
   3024    } else {
   3025       return False; // can this ever happen?
   3026    }
   3027    assign(res, unop(op, binop(op2, mkexpr(arg_m), mkU8(index))));
   3028    if (Q) {
   3029       putQReg(dreg, mkexpr(res), condT);
   3030    } else {
   3031       putDRegI64(dreg, mkexpr(res), condT);
   3032    }
   3033    DIP("vdup.%d %c%d, d%d[%d]\n", size, Q ? 'q' : 'd', dreg, mreg, index);
   3034    return True;
   3035 }
   3036 
   3037 /* A7.4.1 Three registers of the same length */
   3038 static
   3039 Bool dis_neon_data_3same ( UInt theInstr, IRTemp condT )
   3040 {
   3041    UInt Q = (theInstr >> 6) & 1;
   3042    UInt dreg = get_neon_d_regno(theInstr);
   3043    UInt nreg = get_neon_n_regno(theInstr);
   3044    UInt mreg = get_neon_m_regno(theInstr);
   3045    UInt A = (theInstr >> 8) & 0xF;
   3046    UInt B = (theInstr >> 4) & 1;
   3047    UInt C = (theInstr >> 20) & 0x3;
   3048    UInt U = (theInstr >> 24) & 1;
   3049    UInt size = C;
   3050 
   3051    IRTemp arg_n;
   3052    IRTemp arg_m;
   3053    IRTemp res;
   3054 
   3055    if (Q) {
   3056       arg_n = newTemp(Ity_V128);
   3057       arg_m = newTemp(Ity_V128);
   3058       res = newTemp(Ity_V128);
   3059       assign(arg_n, getQReg(nreg));
   3060       assign(arg_m, getQReg(mreg));
   3061    } else {
   3062       arg_n = newTemp(Ity_I64);
   3063       arg_m = newTemp(Ity_I64);
   3064       res = newTemp(Ity_I64);
   3065       assign(arg_n, getDRegI64(nreg));
   3066       assign(arg_m, getDRegI64(mreg));
   3067    }
   3068 
   3069    switch(A) {
   3070       case 0:
   3071          if (B == 0) {
   3072             /* VHADD */
   3073             ULong imm = 0;
   3074             IRExpr *imm_val;
   3075             IROp addOp;
   3076             IROp andOp;
   3077             IROp shOp;
   3078             HChar regType = Q ? 'q' : 'd';
   3079 
   3080             if (size == 3)
   3081                return False;
   3082             switch(size) {
   3083                case 0: imm = 0x101010101010101LL; break;
   3084                case 1: imm = 0x1000100010001LL; break;
   3085                case 2: imm = 0x100000001LL; break;
   3086                default: vassert(0);
   3087             }
   3088             if (Q) {
   3089                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
   3090                andOp = Iop_AndV128;
   3091             } else {
   3092                imm_val = mkU64(imm);
   3093                andOp = Iop_And64;
   3094             }
   3095             if (U) {
   3096                switch(size) {
   3097                   case 0:
   3098                      addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
   3099                      shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3100                      break;
   3101                   case 1:
   3102                      addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
   3103                      shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3104                      break;
   3105                   case 2:
   3106                      addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
   3107                      shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3108                      break;
   3109                   default:
   3110                      vassert(0);
   3111                }
   3112             } else {
   3113                switch(size) {
   3114                   case 0:
   3115                      addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
   3116                      shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
   3117                      break;
   3118                   case 1:
   3119                      addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
   3120                      shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
   3121                      break;
   3122                   case 2:
   3123                      addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
   3124                      shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
   3125                      break;
   3126                   default:
   3127                      vassert(0);
   3128                }
   3129             }
   3130             assign(res,
   3131                    binop(addOp,
   3132                          binop(addOp,
   3133                                binop(shOp, mkexpr(arg_m), mkU8(1)),
   3134                                binop(shOp, mkexpr(arg_n), mkU8(1))),
   3135                          binop(shOp,
   3136                                binop(addOp,
   3137                                      binop(andOp, mkexpr(arg_m), imm_val),
   3138                                      binop(andOp, mkexpr(arg_n), imm_val)),
   3139                                mkU8(1))));
   3140             DIP("vhadd.%c%d %c%d, %c%d, %c%d\n",
   3141                 U ? 'u' : 's', 8 << size, regType,
   3142                 dreg, regType, nreg, regType, mreg);
   3143          } else {
   3144             /* VQADD */
   3145             IROp op, op2;
   3146             IRTemp tmp;
   3147             HChar reg_t = Q ? 'q' : 'd';
   3148             if (Q) {
   3149                switch (size) {
   3150                   case 0:
   3151                      op = U ? Iop_QAdd8Ux16 : Iop_QAdd8Sx16;
   3152                      op2 = Iop_Add8x16;
   3153                      break;
   3154                   case 1:
   3155                      op = U ? Iop_QAdd16Ux8 : Iop_QAdd16Sx8;
   3156                      op2 = Iop_Add16x8;
   3157                      break;
   3158                   case 2:
   3159                      op = U ? Iop_QAdd32Ux4 : Iop_QAdd32Sx4;
   3160                      op2 = Iop_Add32x4;
   3161                      break;
   3162                   case 3:
   3163                      op = U ? Iop_QAdd64Ux2 : Iop_QAdd64Sx2;
   3164                      op2 = Iop_Add64x2;
   3165                      break;
   3166                   default:
   3167                      vassert(0);
   3168                }
   3169             } else {
   3170                switch (size) {
   3171                   case 0:
   3172                      op = U ? Iop_QAdd8Ux8 : Iop_QAdd8Sx8;
   3173                      op2 = Iop_Add8x8;
   3174                      break;
   3175                   case 1:
   3176                      op = U ? Iop_QAdd16Ux4 : Iop_QAdd16Sx4;
   3177                      op2 = Iop_Add16x4;
   3178                      break;
   3179                   case 2:
   3180                      op = U ? Iop_QAdd32Ux2 : Iop_QAdd32Sx2;
   3181                      op2 = Iop_Add32x2;
   3182                      break;
   3183                   case 3:
   3184                      op = U ? Iop_QAdd64Ux1 : Iop_QAdd64Sx1;
   3185                      op2 = Iop_Add64;
   3186                      break;
   3187                   default:
   3188                      vassert(0);
   3189                }
   3190             }
   3191             if (Q) {
   3192                tmp = newTemp(Ity_V128);
   3193             } else {
   3194                tmp = newTemp(Ity_I64);
   3195             }
   3196             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   3197             assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
   3198             setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
   3199             DIP("vqadd.%c%d %c%d, %c%d, %c%d\n",
   3200                 U ? 'u' : 's',
   3201                 8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3202          }
   3203          break;
   3204       case 1:
   3205          if (B == 0) {
   3206             /* VRHADD */
   3207             /* VRHADD C, A, B ::=
   3208                  C = (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1) */
   3209             IROp shift_op, add_op;
   3210             IRTemp cc;
   3211             ULong one = 1;
   3212             HChar reg_t = Q ? 'q' : 'd';
   3213             switch (size) {
   3214                case 0: one = (one <<  8) | one; /* fall through */
   3215                case 1: one = (one << 16) | one; /* fall through */
   3216                case 2: one = (one << 32) | one; break;
   3217                case 3: return False;
   3218                default: vassert(0);
   3219             }
   3220             if (Q) {
   3221                switch (size) {
   3222                   case 0:
   3223                      shift_op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
   3224                      add_op = Iop_Add8x16;
   3225                      break;
   3226                   case 1:
   3227                      shift_op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
   3228                      add_op = Iop_Add16x8;
   3229                      break;
   3230                   case 2:
   3231                      shift_op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
   3232                      add_op = Iop_Add32x4;
   3233                      break;
   3234                   case 3:
   3235                      return False;
   3236                   default:
   3237                      vassert(0);
   3238                }
   3239             } else {
   3240                switch (size) {
   3241                   case 0:
   3242                      shift_op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
   3243                      add_op = Iop_Add8x8;
   3244                      break;
   3245                   case 1:
   3246                      shift_op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
   3247                      add_op = Iop_Add16x4;
   3248                      break;
   3249                   case 2:
   3250                      shift_op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
   3251                      add_op = Iop_Add32x2;
   3252                      break;
   3253                   case 3:
   3254                      return False;
   3255                   default:
   3256                      vassert(0);
   3257                }
   3258             }
   3259             if (Q) {
   3260                cc = newTemp(Ity_V128);
   3261                assign(cc, binop(shift_op,
   3262                                 binop(add_op,
   3263                                       binop(add_op,
   3264                                             binop(Iop_AndV128,
   3265                                                   mkexpr(arg_n),
   3266                                                   binop(Iop_64HLtoV128,
   3267                                                         mkU64(one),
   3268                                                         mkU64(one))),
   3269                                             binop(Iop_AndV128,
   3270                                                   mkexpr(arg_m),
   3271                                                   binop(Iop_64HLtoV128,
   3272                                                         mkU64(one),
   3273                                                         mkU64(one)))),
   3274                                       binop(Iop_64HLtoV128,
   3275                                             mkU64(one),
   3276                                             mkU64(one))),
   3277                                 mkU8(1)));
   3278                assign(res, binop(add_op,
   3279                                  binop(add_op,
   3280                                        binop(shift_op,
   3281                                              mkexpr(arg_n),
   3282                                              mkU8(1)),
   3283                                        binop(shift_op,
   3284                                              mkexpr(arg_m),
   3285                                              mkU8(1))),
   3286                                  mkexpr(cc)));
   3287             } else {
   3288                cc = newTemp(Ity_I64);
   3289                assign(cc, binop(shift_op,
   3290                                 binop(add_op,
   3291                                       binop(add_op,
   3292                                             binop(Iop_And64,
   3293                                                   mkexpr(arg_n),
   3294                                                   mkU64(one)),
   3295                                             binop(Iop_And64,
   3296                                                   mkexpr(arg_m),
   3297                                                   mkU64(one))),
   3298                                       mkU64(one)),
   3299                                 mkU8(1)));
   3300                assign(res, binop(add_op,
   3301                                  binop(add_op,
   3302                                        binop(shift_op,
   3303                                              mkexpr(arg_n),
   3304                                              mkU8(1)),
   3305                                        binop(shift_op,
   3306                                              mkexpr(arg_m),
   3307                                              mkU8(1))),
   3308                                  mkexpr(cc)));
   3309             }
   3310             DIP("vrhadd.%c%d %c%d, %c%d, %c%d\n",
   3311                 U ? 'u' : 's',
   3312                 8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3313          } else {
   3314             if (U == 0)  {
   3315                switch(C) {
   3316                   case 0: {
   3317                      /* VAND  */
   3318                      HChar reg_t = Q ? 'q' : 'd';
   3319                      if (Q) {
   3320                         assign(res, binop(Iop_AndV128, mkexpr(arg_n),
   3321                                                        mkexpr(arg_m)));
   3322                      } else {
   3323                         assign(res, binop(Iop_And64, mkexpr(arg_n),
   3324                                                      mkexpr(arg_m)));
   3325                      }
   3326                      DIP("vand %c%d, %c%d, %c%d\n",
   3327                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3328                      break;
   3329                   }
   3330                   case 1: {
   3331                      /* VBIC  */
   3332                      HChar reg_t = Q ? 'q' : 'd';
   3333                      if (Q) {
   3334                         assign(res, binop(Iop_AndV128,mkexpr(arg_n),
   3335                                unop(Iop_NotV128, mkexpr(arg_m))));
   3336                      } else {
   3337                         assign(res, binop(Iop_And64, mkexpr(arg_n),
   3338                                unop(Iop_Not64, mkexpr(arg_m))));
   3339                      }
   3340                      DIP("vbic %c%d, %c%d, %c%d\n",
   3341                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3342                      break;
   3343                   }
   3344                   case 2:
   3345                      if ( nreg != mreg) {
   3346                         /* VORR  */
   3347                         HChar reg_t = Q ? 'q' : 'd';
   3348                         if (Q) {
   3349                            assign(res, binop(Iop_OrV128, mkexpr(arg_n),
   3350                                                          mkexpr(arg_m)));
   3351                         } else {
   3352                            assign(res, binop(Iop_Or64, mkexpr(arg_n),
   3353                                                        mkexpr(arg_m)));
   3354                         }
   3355                         DIP("vorr %c%d, %c%d, %c%d\n",
   3356                             reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3357                      } else {
   3358                         /* VMOV  */
   3359                         HChar reg_t = Q ? 'q' : 'd';
   3360                         assign(res, mkexpr(arg_m));
   3361                         DIP("vmov %c%d, %c%d\n", reg_t, dreg, reg_t, mreg);
   3362                      }
   3363                      break;
   3364                   case 3:{
   3365                      /* VORN  */
   3366                      HChar reg_t = Q ? 'q' : 'd';
   3367                      if (Q) {
   3368                         assign(res, binop(Iop_OrV128,mkexpr(arg_n),
   3369                                unop(Iop_NotV128, mkexpr(arg_m))));
   3370                      } else {
   3371                         assign(res, binop(Iop_Or64, mkexpr(arg_n),
   3372                                unop(Iop_Not64, mkexpr(arg_m))));
   3373                      }
   3374                      DIP("vorn %c%d, %c%d, %c%d\n",
   3375                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3376                      break;
   3377                   }
   3378                }
   3379             } else {
   3380                switch(C) {
   3381                   case 0:
   3382                      /* VEOR (XOR)  */
   3383                      if (Q) {
   3384                         assign(res, binop(Iop_XorV128, mkexpr(arg_n),
   3385                                                        mkexpr(arg_m)));
   3386                      } else {
   3387                         assign(res, binop(Iop_Xor64, mkexpr(arg_n),
   3388                                                      mkexpr(arg_m)));
   3389                      }
   3390                      DIP("veor %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
   3391                            Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   3392                      break;
   3393                   case 1:
   3394                      /* VBSL  */
   3395                      if (Q) {
   3396                         IRTemp reg_d = newTemp(Ity_V128);
   3397                         assign(reg_d, getQReg(dreg));
   3398                         assign(res,
   3399                                binop(Iop_OrV128,
   3400                                      binop(Iop_AndV128, mkexpr(arg_n),
   3401                                                         mkexpr(reg_d)),
   3402                                      binop(Iop_AndV128,
   3403                                            mkexpr(arg_m),
   3404                                            unop(Iop_NotV128,
   3405                                                  mkexpr(reg_d)) ) ) );
   3406                      } else {
   3407                         IRTemp reg_d = newTemp(Ity_I64);
   3408                         assign(reg_d, getDRegI64(dreg));
   3409                         assign(res,
   3410                                binop(Iop_Or64,
   3411                                      binop(Iop_And64, mkexpr(arg_n),
   3412                                                       mkexpr(reg_d)),
   3413                                      binop(Iop_And64,
   3414                                            mkexpr(arg_m),
   3415                                            unop(Iop_Not64, mkexpr(reg_d)))));
   3416                      }
   3417                      DIP("vbsl %c%u, %c%u, %c%u\n",
   3418                          Q ? 'q' : 'd', dreg,
   3419                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   3420                      break;
   3421                   case 2:
   3422                      /* VBIT  */
   3423                      if (Q) {
   3424                         IRTemp reg_d = newTemp(Ity_V128);
   3425                         assign(reg_d, getQReg(dreg));
   3426                         assign(res,
   3427                                binop(Iop_OrV128,
   3428                                      binop(Iop_AndV128, mkexpr(arg_n),
   3429                                                         mkexpr(arg_m)),
   3430                                      binop(Iop_AndV128,
   3431                                            mkexpr(reg_d),
   3432                                            unop(Iop_NotV128, mkexpr(arg_m)))));
   3433                      } else {
   3434                         IRTemp reg_d = newTemp(Ity_I64);
   3435                         assign(reg_d, getDRegI64(dreg));
   3436                         assign(res,
   3437                                binop(Iop_Or64,
   3438                                      binop(Iop_And64, mkexpr(arg_n),
   3439                                                       mkexpr(arg_m)),
   3440                                      binop(Iop_And64,
   3441                                            mkexpr(reg_d),
   3442                                            unop(Iop_Not64, mkexpr(arg_m)))));
   3443                      }
   3444                      DIP("vbit %c%u, %c%u, %c%u\n",
   3445                          Q ? 'q' : 'd', dreg,
   3446                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   3447                      break;
   3448                   case 3:
   3449                      /* VBIF  */
   3450                      if (Q) {
   3451                         IRTemp reg_d = newTemp(Ity_V128);
   3452                         assign(reg_d, getQReg(dreg));
   3453                         assign(res,
   3454                                binop(Iop_OrV128,
   3455                                      binop(Iop_AndV128, mkexpr(reg_d),
   3456                                                         mkexpr(arg_m)),
   3457                                      binop(Iop_AndV128,
   3458                                            mkexpr(arg_n),
   3459                                            unop(Iop_NotV128, mkexpr(arg_m)))));
   3460                      } else {
   3461                         IRTemp reg_d = newTemp(Ity_I64);
   3462                         assign(reg_d, getDRegI64(dreg));
   3463                         assign(res,
   3464                                binop(Iop_Or64,
   3465                                      binop(Iop_And64, mkexpr(reg_d),
   3466                                                       mkexpr(arg_m)),
   3467                                      binop(Iop_And64,
   3468                                            mkexpr(arg_n),
   3469                                            unop(Iop_Not64, mkexpr(arg_m)))));
   3470                      }
   3471                      DIP("vbif %c%u, %c%u, %c%u\n",
   3472                          Q ? 'q' : 'd', dreg,
   3473                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   3474                      break;
   3475                }
   3476             }
   3477          }
   3478          break;
   3479       case 2:
   3480          if (B == 0) {
   3481             /* VHSUB */
   3482             /* (A >> 1) - (B >> 1) - (NOT (A) & B & 1)   */
   3483             ULong imm = 0;
   3484             IRExpr *imm_val;
   3485             IROp subOp;
   3486             IROp notOp;
   3487             IROp andOp;
   3488             IROp shOp;
   3489             if (size == 3)
   3490                return False;
   3491             switch(size) {
   3492                case 0: imm = 0x101010101010101LL; break;
   3493                case 1: imm = 0x1000100010001LL; break;
   3494                case 2: imm = 0x100000001LL; break;
   3495                default: vassert(0);
   3496             }
   3497             if (Q) {
   3498                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
   3499                andOp = Iop_AndV128;
   3500                notOp = Iop_NotV128;
   3501             } else {
   3502                imm_val = mkU64(imm);
   3503                andOp = Iop_And64;
   3504                notOp = Iop_Not64;
   3505             }
   3506             if (U) {
   3507                switch(size) {
   3508                   case 0:
   3509                      subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   3510                      shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3511                      break;
   3512                   case 1:
   3513                      subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   3514                      shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3515                      break;
   3516                   case 2:
   3517                      subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   3518                      shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3519                      break;
   3520                   default:
   3521                      vassert(0);
   3522                }
   3523             } else {
   3524                switch(size) {
   3525                   case 0:
   3526                      subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   3527                      shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
   3528                      break;
   3529                   case 1:
   3530                      subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   3531                      shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
   3532                      break;
   3533                   case 2:
   3534                      subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   3535                      shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
   3536                      break;
   3537                   default:
   3538                      vassert(0);
   3539                }
   3540             }
   3541             assign(res,
   3542                    binop(subOp,
   3543                          binop(subOp,
   3544                                binop(shOp, mkexpr(arg_n), mkU8(1)),
   3545                                binop(shOp, mkexpr(arg_m), mkU8(1))),
   3546                          binop(andOp,
   3547                                binop(andOp,
   3548                                      unop(notOp, mkexpr(arg_n)),
   3549                                      mkexpr(arg_m)),
   3550                                imm_val)));
   3551             DIP("vhsub.%c%u %c%u, %c%u, %c%u\n",
   3552                 U ? 'u' : 's', 8 << size,
   3553                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   3554                 mreg);
   3555          } else {
   3556             /* VQSUB */
   3557             IROp op, op2;
   3558             IRTemp tmp;
   3559             if (Q) {
   3560                switch (size) {
   3561                   case 0:
   3562                      op = U ? Iop_QSub8Ux16 : Iop_QSub8Sx16;
   3563                      op2 = Iop_Sub8x16;
   3564                      break;
   3565                   case 1:
   3566                      op = U ? Iop_QSub16Ux8 : Iop_QSub16Sx8;
   3567                      op2 = Iop_Sub16x8;
   3568                      break;
   3569                   case 2:
   3570                      op = U ? Iop_QSub32Ux4 : Iop_QSub32Sx4;
   3571                      op2 = Iop_Sub32x4;
   3572                      break;
   3573                   case 3:
   3574                      op = U ? Iop_QSub64Ux2 : Iop_QSub64Sx2;
   3575                      op2 = Iop_Sub64x2;
   3576                      break;
   3577                   default:
   3578                      vassert(0);
   3579                }
   3580             } else {
   3581                switch (size) {
   3582                   case 0:
   3583                      op = U ? Iop_QSub8Ux8 : Iop_QSub8Sx8;
   3584                      op2 = Iop_Sub8x8;
   3585                      break;
   3586                   case 1:
   3587                      op = U ? Iop_QSub16Ux4 : Iop_QSub16Sx4;
   3588                      op2 = Iop_Sub16x4;
   3589                      break;
   3590                   case 2:
   3591                      op = U ? Iop_QSub32Ux2 : Iop_QSub32Sx2;
   3592                      op2 = Iop_Sub32x2;
   3593                      break;
   3594                   case 3:
   3595                      op = U ? Iop_QSub64Ux1 : Iop_QSub64Sx1;
   3596                      op2 = Iop_Sub64;
   3597                      break;
   3598                   default:
   3599                      vassert(0);
   3600                }
   3601             }
   3602             if (Q)
   3603                tmp = newTemp(Ity_V128);
   3604             else
   3605                tmp = newTemp(Ity_I64);
   3606             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   3607             assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
   3608             setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
   3609             DIP("vqsub.%c%u %c%u, %c%u, %c%u\n",
   3610                 U ? 'u' : 's', 8 << size,
   3611                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   3612                 mreg);
   3613          }
   3614          break;
   3615       case 3: {
   3616             IROp op;
   3617             if (Q) {
   3618                switch (size) {
   3619                   case 0: op = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16; break;
   3620                   case 1: op = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8; break;
   3621                   case 2: op = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4; break;
   3622                   case 3: return False;
   3623                   default: vassert(0);
   3624                }
   3625             } else {
   3626                switch (size) {
   3627                   case 0: op = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8; break;
   3628                   case 1: op = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4; break;
   3629                   case 2: op = U ? Iop_CmpGT32Ux2: Iop_CmpGT32Sx2; break;
   3630                   case 3: return False;
   3631                   default: vassert(0);
   3632                }
   3633             }
   3634             if (B == 0) {
   3635                /* VCGT  */
   3636                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   3637                DIP("vcgt.%c%u %c%u, %c%u, %c%u\n",
   3638                    U ? 'u' : 's', 8 << size,
   3639                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   3640                    mreg);
   3641             } else {
   3642                /* VCGE  */
   3643                /* VCGE res, argn, argm
   3644                     is equal to
   3645                   VCGT tmp, argm, argn
   3646                   VNOT res, tmp */
   3647                assign(res,
   3648                       unop(Q ? Iop_NotV128 : Iop_Not64,
   3649                            binop(op, mkexpr(arg_m), mkexpr(arg_n))));
   3650                DIP("vcge.%c%u %c%u, %c%u, %c%u\n",
   3651                    U ? 'u' : 's', 8 << size,
   3652                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   3653                    mreg);
   3654             }
   3655          }
   3656          break;
   3657       case 4:
   3658          if (B == 0) {
   3659             /* VSHL */
   3660             IROp op = Iop_INVALID, sub_op = Iop_INVALID;
   3661             IRTemp tmp = IRTemp_INVALID;
   3662             if (U) {
   3663                switch (size) {
   3664                   case 0: op = Q ? Iop_Shl8x16 : Iop_Shl8x8; break;
   3665                   case 1: op = Q ? Iop_Shl16x8 : Iop_Shl16x4; break;
   3666                   case 2: op = Q ? Iop_Shl32x4 : Iop_Shl32x2; break;
   3667                   case 3: op = Q ? Iop_Shl64x2 : Iop_Shl64; break;
   3668                   default: vassert(0);
   3669                }
   3670             } else {
   3671                tmp = newTemp(Q ? Ity_V128 : Ity_I64);
   3672                switch (size) {
   3673                   case 0:
   3674                      op = Q ? Iop_Sar8x16 : Iop_Sar8x8;
   3675                      sub_op = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   3676                      break;
   3677                   case 1:
   3678                      op = Q ? Iop_Sar16x8 : Iop_Sar16x4;
   3679                      sub_op = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   3680                      break;
   3681                   case 2:
   3682                      op = Q ? Iop_Sar32x4 : Iop_Sar32x2;
   3683                      sub_op = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   3684                      break;
   3685                   case 3:
   3686                      op = Q ? Iop_Sar64x2 : Iop_Sar64;
   3687                      sub_op = Q ? Iop_Sub64x2 : Iop_Sub64;
   3688                      break;
   3689                   default:
   3690                      vassert(0);
   3691                }
   3692             }
   3693             if (U) {
   3694                if (!Q && (size == 3))
   3695                   assign(res, binop(op, mkexpr(arg_m),
   3696                                         unop(Iop_64to8, mkexpr(arg_n))));
   3697                else
   3698                   assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
   3699             } else {
   3700                if (Q)
   3701                   assign(tmp, binop(sub_op,
   3702                                     binop(Iop_64HLtoV128, mkU64(0), mkU64(0)),
   3703                                     mkexpr(arg_n)));
   3704                else
   3705                   assign(tmp, binop(sub_op, mkU64(0), mkexpr(arg_n)));
   3706                if (!Q && (size == 3))
   3707                   assign(res, binop(op, mkexpr(arg_m),
   3708                                         unop(Iop_64to8, mkexpr(tmp))));
   3709                else
   3710                   assign(res, binop(op, mkexpr(arg_m), mkexpr(tmp)));
   3711             }
   3712             DIP("vshl.%c%u %c%u, %c%u, %c%u\n",
   3713                 U ? 'u' : 's', 8 << size,
   3714                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
   3715                 nreg);
   3716          } else {
   3717             /* VQSHL */
   3718             IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt;
   3719             IRTemp tmp, shval, mask, old_shval;
   3720             UInt i;
   3721             ULong esize;
   3722             cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
   3723             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
   3724             if (U) {
   3725                switch (size) {
   3726                   case 0:
   3727                      op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
   3728                      op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
   3729                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3730                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   3731                      break;
   3732                   case 1:
   3733                      op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
   3734                      op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
   3735                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3736                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   3737                      break;
   3738                   case 2:
   3739                      op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
   3740                      op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
   3741                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3742                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   3743                      break;
   3744                   case 3:
   3745                      op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
   3746                      op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
   3747                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   3748                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   3749                      break;
   3750                   default:
   3751                      vassert(0);
   3752                }
   3753             } else {
   3754                switch (size) {
   3755                   case 0:
   3756                      op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
   3757                      op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
   3758                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3759                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   3760                      break;
   3761                   case 1:
   3762                      op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
   3763                      op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
   3764                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3765                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   3766                      break;
   3767                   case 2:
   3768                      op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
   3769                      op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
   3770                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3771                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   3772                      break;
   3773                   case 3:
   3774                      op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
   3775                      op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
   3776                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   3777                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   3778                      break;
   3779                   default:
   3780                      vassert(0);
   3781                }
   3782             }
   3783             if (Q) {
   3784                tmp = newTemp(Ity_V128);
   3785                shval = newTemp(Ity_V128);
   3786                mask = newTemp(Ity_V128);
   3787             } else {
   3788                tmp = newTemp(Ity_I64);
   3789                shval = newTemp(Ity_I64);
   3790                mask = newTemp(Ity_I64);
   3791             }
   3792             assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
   3793             /* Only least significant byte from second argument is used.
   3794                Copy this byte to the whole vector element. */
   3795             assign(shval, binop(op_shrn,
   3796                                 binop(op_shln,
   3797                                        mkexpr(arg_n),
   3798                                        mkU8((8 << size) - 8)),
   3799                                 mkU8((8 << size) - 8)));
   3800             for(i = 0; i < size; i++) {
   3801                old_shval = shval;
   3802                shval = newTemp(Q ? Ity_V128 : Ity_I64);
   3803                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
   3804                                    mkexpr(old_shval),
   3805                                    binop(op_shln,
   3806                                          mkexpr(old_shval),
   3807                                          mkU8(8 << i))));
   3808             }
   3809             /* If shift is greater or equal to the element size and
   3810                element is non-zero, then QC flag should be set. */
   3811             esize = (8 << size) - 1;
   3812             esize = (esize <<  8) | esize;
   3813             esize = (esize << 16) | esize;
   3814             esize = (esize << 32) | esize;
   3815             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   3816                              binop(cmp_gt, mkexpr(shval),
   3817                                            Q ? mkU128(esize) : mkU64(esize)),
   3818                              unop(cmp_neq, mkexpr(arg_m))),
   3819                        Q ? mkU128(0) : mkU64(0),
   3820                        Q, condT);
   3821             /* Othervise QC flag should be set if shift value is positive and
   3822                result beign rightshifted the same value is not equal to left
   3823                argument. */
   3824             assign(mask, binop(cmp_gt, mkexpr(shval),
   3825                                        Q ? mkU128(0) : mkU64(0)));
   3826             if (!Q && size == 3)
   3827                assign(tmp, binop(op_rev, mkexpr(res),
   3828                                          unop(Iop_64to8, mkexpr(arg_n))));
   3829             else
   3830                assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
   3831             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   3832                              mkexpr(tmp), mkexpr(mask)),
   3833                        binop(Q ? Iop_AndV128 : Iop_And64,
   3834                              mkexpr(arg_m), mkexpr(mask)),
   3835                        Q, condT);
   3836             DIP("vqshl.%c%u %c%u, %c%u, %c%u\n",
   3837                 U ? 'u' : 's', 8 << size,
   3838                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
   3839                 nreg);
   3840          }
   3841          break;
   3842       case 5:
   3843          if (B == 0) {
   3844             /* VRSHL */
   3845             IROp op, op_shrn, op_shln, cmp_gt, op_add;
   3846             IRTemp shval, old_shval, imm_val, round;
   3847             UInt i;
   3848             ULong imm;
   3849             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
   3850             imm = 1L;
   3851             switch (size) {
   3852                case 0: imm = (imm <<  8) | imm; /* fall through */
   3853                case 1: imm = (imm << 16) | imm; /* fall through */
   3854                case 2: imm = (imm << 32) | imm; /* fall through */
   3855                case 3: break;
   3856                default: vassert(0);
   3857             }
   3858             imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
   3859             round = newTemp(Q ? Ity_V128 : Ity_I64);
   3860             assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
   3861             if (U) {
   3862                switch (size) {
   3863                   case 0:
   3864                      op = Q ? Iop_Shl8x16 : Iop_Shl8x8;
   3865                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
   3866                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3867                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   3868                      break;
   3869                   case 1:
   3870                      op = Q ? Iop_Shl16x8 : Iop_Shl16x4;
   3871                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
   3872                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3873                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   3874                      break;
   3875                   case 2:
   3876                      op = Q ? Iop_Shl32x4 : Iop_Shl32x2;
   3877                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
   3878                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3879                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   3880                      break;
   3881                   case 3:
   3882                      op = Q ? Iop_Shl64x2 : Iop_Shl64;
   3883                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
   3884                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   3885                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   3886                      break;
   3887                   default:
   3888                      vassert(0);
   3889                }
   3890             } else {
   3891                switch (size) {
   3892                   case 0:
   3893                      op = Q ? Iop_Sal8x16 : Iop_Sal8x8;
   3894                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
   3895                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3896                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   3897                      break;
   3898                   case 1:
   3899                      op = Q ? Iop_Sal16x8 : Iop_Sal16x4;
   3900                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
   3901                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3902                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   3903                      break;
   3904                   case 2:
   3905                      op = Q ? Iop_Sal32x4 : Iop_Sal32x2;
   3906                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
   3907                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3908                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   3909                      break;
   3910                   case 3:
   3911                      op = Q ? Iop_Sal64x2 : Iop_Sal64x1;
   3912                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
   3913                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   3914                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   3915                      break;
   3916                   default:
   3917                      vassert(0);
   3918                }
   3919             }
   3920             if (Q) {
   3921                shval = newTemp(Ity_V128);
   3922             } else {
   3923                shval = newTemp(Ity_I64);
   3924             }
   3925             /* Only least significant byte from second argument is used.
   3926                Copy this byte to the whole vector element. */
   3927             assign(shval, binop(op_shrn,
   3928                                 binop(op_shln,
   3929                                        mkexpr(arg_n),
   3930                                        mkU8((8 << size) - 8)),
   3931                                 mkU8((8 << size) - 8)));
   3932             for (i = 0; i < size; i++) {
   3933                old_shval = shval;
   3934                shval = newTemp(Q ? Ity_V128 : Ity_I64);
   3935                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
   3936                                    mkexpr(old_shval),
   3937                                    binop(op_shln,
   3938                                          mkexpr(old_shval),
   3939                                          mkU8(8 << i))));
   3940             }
   3941             /* Compute the result */
   3942             if (!Q && size == 3 && U) {
   3943                assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
   3944                                    binop(op,
   3945                                          mkexpr(arg_m),
   3946                                          unop(Iop_64to8,
   3947                                               binop(op_add,
   3948                                                     mkexpr(arg_n),
   3949                                                     mkexpr(imm_val)))),
   3950                                    binop(Q ? Iop_AndV128 : Iop_And64,
   3951                                          mkexpr(imm_val),
   3952                                          binop(cmp_gt,
   3953                                                Q ? mkU128(0) : mkU64(0),
   3954                                                mkexpr(arg_n)))));
   3955                assign(res, binop(op_add,
   3956                                  binop(op,
   3957                                        mkexpr(arg_m),
   3958                                        unop(Iop_64to8, mkexpr(arg_n))),
   3959                                  mkexpr(round)));
   3960             } else {
   3961                assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
   3962                                    binop(op,
   3963                                          mkexpr(arg_m),
   3964                                          binop(op_add,
   3965                                                mkexpr(arg_n),
   3966                                                mkexpr(imm_val))),
   3967                                    binop(Q ? Iop_AndV128 : Iop_And64,
   3968                                          mkexpr(imm_val),
   3969                                          binop(cmp_gt,
   3970                                                Q ? mkU128(0) : mkU64(0),
   3971                                                mkexpr(arg_n)))));
   3972                assign(res, binop(op_add,
   3973                                  binop(op, mkexpr(arg_m), mkexpr(arg_n)),
   3974                                  mkexpr(round)));
   3975             }
   3976             DIP("vrshl.%c%u %c%u, %c%u, %c%u\n",
   3977                 U ? 'u' : 's', 8 << size,
   3978                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
   3979                 nreg);
   3980          } else {
   3981             /* VQRSHL */
   3982             IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt, op_add;
   3983             IRTemp tmp, shval, mask, old_shval, imm_val, round;
   3984             UInt i;
   3985             ULong esize, imm;
   3986             cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
   3987             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
   3988             imm = 1L;
   3989             switch (size) {
   3990                case 0: imm = (imm <<  8) | imm; /* fall through */
   3991                case 1: imm = (imm << 16) | imm; /* fall through */
   3992                case 2: imm = (imm << 32) | imm; /* fall through */
   3993                case 3: break;
   3994                default: vassert(0);
   3995             }
   3996             imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
   3997             round = newTemp(Q ? Ity_V128 : Ity_I64);
   3998             assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
   3999             if (U) {
   4000                switch (size) {
   4001                   case 0:
   4002                      op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
   4003                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
   4004                      op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
   4005                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   4006                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   4007                      break;
   4008                   case 1:
   4009                      op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
   4010                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
   4011                      op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
   4012                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   4013                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   4014                      break;
   4015                   case 2:
   4016                      op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
   4017                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
   4018                      op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
   4019                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   4020                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   4021                      break;
   4022                   case 3:
   4023                      op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
   4024                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
   4025                      op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
   4026                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   4027                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   4028                      break;
   4029                   default:
   4030                      vassert(0);
   4031                }
   4032             } else {
   4033                switch (size) {
   4034                   case 0:
   4035                      op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
   4036                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
   4037                      op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
   4038                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   4039                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   4040                      break;
   4041                   case 1:
   4042                      op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
   4043                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
   4044                      op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
   4045                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   4046                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   4047                      break;
   4048                   case 2:
   4049                      op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
   4050                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
   4051                      op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
   4052                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   4053                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   4054                      break;
   4055                   case 3:
   4056                      op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
   4057                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
   4058                      op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
   4059                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   4060                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   4061                      break;
   4062                   default:
   4063                      vassert(0);
   4064                }
   4065             }
   4066             if (Q) {
   4067                tmp = newTemp(Ity_V128);
   4068                shval = newTemp(Ity_V128);
   4069                mask = newTemp(Ity_V128);
   4070             } else {
   4071                tmp = newTemp(Ity_I64);
   4072                shval = newTemp(Ity_I64);
   4073                mask = newTemp(Ity_I64);
   4074             }
   4075             /* Only least significant byte from second argument is used.
   4076                Copy this byte to the whole vector element. */
   4077             assign(shval, binop(op_shrn,
   4078                                 binop(op_shln,
   4079                                        mkexpr(arg_n),
   4080                                        mkU8((8 << size) - 8)),
   4081                                 mkU8((8 << size) - 8)));
   4082             for (i = 0; i < size; i++) {
   4083                old_shval = shval;
   4084                shval = newTemp(Q ? Ity_V128 : Ity_I64);
   4085                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
   4086                                    mkexpr(old_shval),
   4087                                    binop(op_shln,
   4088                                          mkexpr(old_shval),
   4089                                          mkU8(8 << i))));
   4090             }
   4091             /* Compute the result */
   4092             assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
   4093                                 binop(op,
   4094                                       mkexpr(arg_m),
   4095                                       binop(op_add,
   4096                                             mkexpr(arg_n),
   4097                                             mkexpr(imm_val))),
   4098                                 binop(Q ? Iop_AndV128 : Iop_And64,
   4099                                       mkexpr(imm_val),
   4100                                       binop(cmp_gt,
   4101                                             Q ? mkU128(0) : mkU64(0),
   4102                                             mkexpr(arg_n)))));
   4103             assign(res, binop(op_add,
   4104                               binop(op, mkexpr(arg_m), mkexpr(arg_n)),
   4105                               mkexpr(round)));
   4106             /* If shift is greater or equal to the element size and element is
   4107                non-zero, then QC flag should be set. */
   4108             esize = (8 << size) - 1;
   4109             esize = (esize <<  8) | esize;
   4110             esize = (esize << 16) | esize;
   4111             esize = (esize << 32) | esize;
   4112             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   4113                              binop(cmp_gt, mkexpr(shval),
   4114                                            Q ? mkU128(esize) : mkU64(esize)),
   4115                              unop(cmp_neq, mkexpr(arg_m))),
   4116                        Q ? mkU128(0) : mkU64(0),
   4117                        Q, condT);
   4118             /* Othervise QC flag should be set if shift value is positive and
   4119                result beign rightshifted the same value is not equal to left
   4120                argument. */
   4121             assign(mask, binop(cmp_gt, mkexpr(shval),
   4122                                Q ? mkU128(0) : mkU64(0)));
   4123             if (!Q && size == 3)
   4124                assign(tmp, binop(op_rev, mkexpr(res),
   4125                                          unop(Iop_64to8, mkexpr(arg_n))));
   4126             else
   4127                assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
   4128             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   4129                              mkexpr(tmp), mkexpr(mask)),
   4130                        binop(Q ? Iop_AndV128 : Iop_And64,
   4131                              mkexpr(arg_m), mkexpr(mask)),
   4132                        Q, condT);
   4133             DIP("vqrshl.%c%u %c%u, %c%u, %c%u\n",
   4134                 U ? 'u' : 's', 8 << size,
   4135                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
   4136                 nreg);
   4137          }
   4138          break;
   4139       case 6:
   4140          /* VMAX, VMIN  */
   4141          if (B == 0) {
   4142             /* VMAX */
   4143             IROp op;
   4144             if (U == 0) {
   4145                switch (size) {
   4146                   case 0: op = Q ? Iop_Max8Sx16 : Iop_Max8Sx8; break;
   4147                   case 1: op = Q ? Iop_Max16Sx8 : Iop_Max16Sx4; break;
   4148                   case 2: op = Q ? Iop_Max32Sx4 : Iop_Max32Sx2; break;
   4149                   case 3: return False;
   4150                   default: vassert(0);
   4151                }
   4152             } else {
   4153                switch (size) {
   4154                   case 0: op = Q ? Iop_Max8Ux16 : Iop_Max8Ux8; break;
   4155                   case 1: op = Q ? Iop_Max16Ux8 : Iop_Max16Ux4; break;
   4156                   case 2: op = Q ? Iop_Max32Ux4 : Iop_Max32Ux2; break;
   4157                   case 3: return False;
   4158                   default: vassert(0);
   4159                }
   4160             }
   4161             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4162             DIP("vmax.%c%u %c%u, %c%u, %c%u\n",
   4163                 U ? 'u' : 's', 8 << size,
   4164                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4165                 mreg);
   4166          } else {
   4167             /* VMIN */
   4168             IROp op;
   4169             if (U == 0) {
   4170                switch (size) {
   4171                   case 0: op = Q ? Iop_Min8Sx16 : Iop_Min8Sx8; break;
   4172                   case 1: op = Q ? Iop_Min16Sx8 : Iop_Min16Sx4; break;
   4173                   case 2: op = Q ? Iop_Min32Sx4 : Iop_Min32Sx2; break;
   4174                   case 3: return False;
   4175                   default: vassert(0);
   4176                }
   4177             } else {
   4178                switch (size) {
   4179                   case 0: op = Q ? Iop_Min8Ux16 : Iop_Min8Ux8; break;
   4180                   case 1: op = Q ? Iop_Min16Ux8 : Iop_Min16Ux4; break;
   4181                   case 2: op = Q ? Iop_Min32Ux4 : Iop_Min32Ux2; break;
   4182                   case 3: return False;
   4183                   default: vassert(0);
   4184                }
   4185             }
   4186             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4187             DIP("vmin.%c%u %c%u, %c%u, %c%u\n",
   4188                 U ? 'u' : 's', 8 << size,
   4189                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4190                 mreg);
   4191          }
   4192          break;
   4193       case 7:
   4194          if (B == 0) {
   4195             /* VABD */
   4196             IROp op_cmp, op_sub;
   4197             IRTemp cond;
   4198             if ((theInstr >> 23) & 1) {
   4199                vpanic("VABDL should not be in dis_neon_data_3same\n");
   4200             }
   4201             if (Q) {
   4202                switch (size) {
   4203                   case 0:
   4204                      op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
   4205                      op_sub = Iop_Sub8x16;
   4206                      break;
   4207                   case 1:
   4208                      op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
   4209                      op_sub = Iop_Sub16x8;
   4210                      break;
   4211                   case 2:
   4212                      op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
   4213                      op_sub = Iop_Sub32x4;
   4214                      break;
   4215                   case 3:
   4216                      return False;
   4217                   default:
   4218                      vassert(0);
   4219                }
   4220             } else {
   4221                switch (size) {
   4222                   case 0:
   4223                      op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
   4224                      op_sub = Iop_Sub8x8;
   4225                      break;
   4226                   case 1:
   4227                      op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
   4228                      op_sub = Iop_Sub16x4;
   4229                      break;
   4230                   case 2:
   4231                      op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
   4232                      op_sub = Iop_Sub32x2;
   4233                      break;
   4234                   case 3:
   4235                      return False;
   4236                   default:
   4237                      vassert(0);
   4238                }
   4239             }
   4240             if (Q) {
   4241                cond = newTemp(Ity_V128);
   4242             } else {
   4243                cond = newTemp(Ity_I64);
   4244             }
   4245             assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
   4246             assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
   4247                               binop(Q ? Iop_AndV128 : Iop_And64,
   4248                                     binop(op_sub, mkexpr(arg_n),
   4249                                                   mkexpr(arg_m)),
   4250                                     mkexpr(cond)),
   4251                               binop(Q ? Iop_AndV128 : Iop_And64,
   4252                                     binop(op_sub, mkexpr(arg_m),
   4253                                                   mkexpr(arg_n)),
   4254                                     unop(Q ? Iop_NotV128 : Iop_Not64,
   4255                                          mkexpr(cond)))));
   4256             DIP("vabd.%c%u %c%u, %c%u, %c%u\n",
   4257                 U ? 'u' : 's', 8 << size,
   4258                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4259                 mreg);
   4260          } else {
   4261             /* VABA */
   4262             IROp op_cmp, op_sub, op_add;
   4263             IRTemp cond, acc, tmp;
   4264             if ((theInstr >> 23) & 1) {
   4265                vpanic("VABAL should not be in dis_neon_data_3same");
   4266             }
   4267             if (Q) {
   4268                switch (size) {
   4269                   case 0:
   4270                      op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
   4271                      op_sub = Iop_Sub8x16;
   4272                      op_add = Iop_Add8x16;
   4273                      break;
   4274                   case 1:
   4275                      op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
   4276                      op_sub = Iop_Sub16x8;
   4277                      op_add = Iop_Add16x8;
   4278                      break;
   4279                   case 2:
   4280                      op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
   4281                      op_sub = Iop_Sub32x4;
   4282                      op_add = Iop_Add32x4;
   4283                      break;
   4284                   case 3:
   4285                      return False;
   4286                   default:
   4287                      vassert(0);
   4288                }
   4289             } else {
   4290                switch (size) {
   4291                   case 0:
   4292                      op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
   4293                      op_sub = Iop_Sub8x8;
   4294                      op_add = Iop_Add8x8;
   4295                      break;
   4296                   case 1:
   4297                      op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
   4298                      op_sub = Iop_Sub16x4;
   4299                      op_add = Iop_Add16x4;
   4300                      break;
   4301                   case 2:
   4302                      op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
   4303                      op_sub = Iop_Sub32x2;
   4304                      op_add = Iop_Add32x2;
   4305                      break;
   4306                   case 3:
   4307                      return False;
   4308                   default:
   4309                      vassert(0);
   4310                }
   4311             }
   4312             if (Q) {
   4313                cond = newTemp(Ity_V128);
   4314                acc = newTemp(Ity_V128);
   4315                tmp = newTemp(Ity_V128);
   4316                assign(acc, getQReg(dreg));
   4317             } else {
   4318                cond = newTemp(Ity_I64);
   4319                acc = newTemp(Ity_I64);
   4320                tmp = newTemp(Ity_I64);
   4321                assign(acc, getDRegI64(dreg));
   4322             }
   4323             assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
   4324             assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
   4325                               binop(Q ? Iop_AndV128 : Iop_And64,
   4326                                     binop(op_sub, mkexpr(arg_n),
   4327                                                   mkexpr(arg_m)),
   4328                                     mkexpr(cond)),
   4329                               binop(Q ? Iop_AndV128 : Iop_And64,
   4330                                     binop(op_sub, mkexpr(arg_m),
   4331                                                   mkexpr(arg_n)),
   4332                                     unop(Q ? Iop_NotV128 : Iop_Not64,
   4333                                          mkexpr(cond)))));
   4334             assign(res, binop(op_add, mkexpr(acc), mkexpr(tmp)));
   4335             DIP("vaba.%c%u %c%u, %c%u, %c%u\n",
   4336                 U ? 'u' : 's', 8 << size,
   4337                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4338                 mreg);
   4339          }
   4340          break;
   4341       case 8:
   4342          if (B == 0) {
   4343             IROp op;
   4344             if (U == 0) {
   4345                /* VADD  */
   4346                switch (size) {
   4347                   case 0: op = Q ? Iop_Add8x16 : Iop_Add8x8; break;
   4348                   case 1: op = Q ? Iop_Add16x8 : Iop_Add16x4; break;
   4349                   case 2: op = Q ? Iop_Add32x4 : Iop_Add32x2; break;
   4350                   case 3: op = Q ? Iop_Add64x2 : Iop_Add64; break;
   4351                   default: vassert(0);
   4352                }
   4353                DIP("vadd.i%u %c%u, %c%u, %c%u\n",
   4354                    8 << size, Q ? 'q' : 'd',
   4355                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4356             } else {
   4357                /* VSUB  */
   4358                switch (size) {
   4359                   case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
   4360                   case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
   4361                   case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
   4362                   case 3: op = Q ? Iop_Sub64x2 : Iop_Sub64; break;
   4363                   default: vassert(0);
   4364                }
   4365                DIP("vsub.i%u %c%u, %c%u, %c%u\n",
   4366                    8 << size, Q ? 'q' : 'd',
   4367                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4368             }
   4369             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4370          } else {
   4371             IROp op;
   4372             switch (size) {
   4373                case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
   4374                case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
   4375                case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
   4376                case 3: op = Q ? Iop_CmpNEZ64x2 : Iop_CmpwNEZ64; break;
   4377                default: vassert(0);
   4378             }
   4379             if (U == 0) {
   4380                /* VTST  */
   4381                assign(res, unop(op, binop(Q ? Iop_AndV128 : Iop_And64,
   4382                                           mkexpr(arg_n),
   4383                                           mkexpr(arg_m))));
   4384                DIP("vtst.%u %c%u, %c%u, %c%u\n",
   4385                    8 << size, Q ? 'q' : 'd',
   4386                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4387             } else {
   4388                /* VCEQ  */
   4389                assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
   4390                                 unop(op,
   4391                                      binop(Q ? Iop_XorV128 : Iop_Xor64,
   4392                                            mkexpr(arg_n),
   4393                                            mkexpr(arg_m)))));
   4394                DIP("vceq.i%u %c%u, %c%u, %c%u\n",
   4395                    8 << size, Q ? 'q' : 'd',
   4396                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4397             }
   4398          }
   4399          break;
   4400       case 9:
   4401          if (B == 0) {
   4402             /* VMLA, VMLS (integer) */
   4403             IROp op, op2;
   4404             UInt P = (theInstr >> 24) & 1;
   4405             if (P) {
   4406                switch (size) {
   4407                   case 0:
   4408                      op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
   4409                      op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   4410                      break;
   4411                   case 1:
   4412                      op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
   4413                      op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   4414                      break;
   4415                   case 2:
   4416                      op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
   4417                      op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   4418                      break;
   4419                   case 3:
   4420                      return False;
   4421                   default:
   4422                      vassert(0);
   4423                }
   4424             } else {
   4425                switch (size) {
   4426                   case 0:
   4427                      op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
   4428                      op2 = Q ? Iop_Add8x16 : Iop_Add8x8;
   4429                      break;
   4430                   case 1:
   4431                      op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
   4432                      op2 = Q ? Iop_Add16x8 : Iop_Add16x4;
   4433                      break;
   4434                   case 2:
   4435                      op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
   4436                      op2 = Q ? Iop_Add32x4 : Iop_Add32x2;
   4437                      break;
   4438                   case 3:
   4439                      return False;
   4440                   default:
   4441                      vassert(0);
   4442                }
   4443             }
   4444             assign(res, binop(op2,
   4445                               Q ? getQReg(dreg) : getDRegI64(dreg),
   4446                               binop(op, mkexpr(arg_n), mkexpr(arg_m))));
   4447             DIP("vml%c.i%u %c%u, %c%u, %c%u\n",
   4448                 P ? 's' : 'a', 8 << size,
   4449                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4450                 mreg);
   4451          } else {
   4452             /* VMUL */
   4453             IROp op;
   4454             UInt P = (theInstr >> 24) & 1;
   4455             if (P) {
   4456                switch (size) {
   4457                   case 0:
   4458                      op = Q ? Iop_PolynomialMul8x16 : Iop_PolynomialMul8x8;
   4459                      break;
   4460                   case 1: case 2: case 3: return False;
   4461                   default: vassert(0);
   4462                }
   4463             } else {
   4464                switch (size) {
   4465                   case 0: op = Q ? Iop_Mul8x16 : Iop_Mul8x8; break;
   4466                   case 1: op = Q ? Iop_Mul16x8 : Iop_Mul16x4; break;
   4467                   case 2: op = Q ? Iop_Mul32x4 : Iop_Mul32x2; break;
   4468                   case 3: return False;
   4469                   default: vassert(0);
   4470                }
   4471             }
   4472             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4473             DIP("vmul.%c%u %c%u, %c%u, %c%u\n",
   4474                 P ? 'p' : 'i', 8 << size,
   4475                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4476                 mreg);
   4477          }
   4478          break;
   4479       case 10: {
   4480          /* VPMAX, VPMIN  */
   4481          UInt P = (theInstr >> 4) & 1;
   4482          IROp op;
   4483          if (Q)
   4484             return False;
   4485          if (P) {
   4486             switch (size) {
   4487                case 0: op = U ? Iop_PwMin8Ux8  : Iop_PwMin8Sx8; break;
   4488                case 1: op = U ? Iop_PwMin16Ux4 : Iop_PwMin16Sx4; break;
   4489                case 2: op = U ? Iop_PwMin32Ux2 : Iop_PwMin32Sx2; break;
   4490                case 3: return False;
   4491                default: vassert(0);
   4492             }
   4493          } else {
   4494             switch (size) {
   4495                case 0: op = U ? Iop_PwMax8Ux8  : Iop_PwMax8Sx8; break;
   4496                case 1: op = U ? Iop_PwMax16Ux4 : Iop_PwMax16Sx4; break;
   4497                case 2: op = U ? Iop_PwMax32Ux2 : Iop_PwMax32Sx2; break;
   4498                case 3: return False;
   4499                default: vassert(0);
   4500             }
   4501          }
   4502          assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4503          DIP("vp%s.%c%u %c%u, %c%u, %c%u\n",
   4504              P ? "min" : "max", U ? 'u' : 's',
   4505              8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
   4506              Q ? 'q' : 'd', mreg);
   4507          break;
   4508       }
   4509       case 11:
   4510          if (B == 0) {
   4511             if (U == 0) {
   4512                /* VQDMULH  */
   4513                IROp op ,op2;
   4514                ULong imm;
   4515                switch (size) {
   4516                   case 0: case 3:
   4517                      return False;
   4518                   case 1:
   4519                      op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
   4520                      op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
   4521                      imm = 1LL << 15;
   4522                      imm = (imm << 16) | imm;
   4523                      imm = (imm << 32) | imm;
   4524                      break;
   4525                   case 2:
   4526                      op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
   4527                      op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
   4528                      imm = 1LL << 31;
   4529                      imm = (imm << 32) | imm;
   4530                      break;
   4531                   default:
   4532                      vassert(0);
   4533                }
   4534                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4535                setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   4536                                 binop(op2, mkexpr(arg_n),
   4537                                            Q ? mkU128(imm) : mkU64(imm)),
   4538                                 binop(op2, mkexpr(arg_m),
   4539                                            Q ? mkU128(imm) : mkU64(imm))),
   4540                           Q ? mkU128(0) : mkU64(0),
   4541                           Q, condT);
   4542                DIP("vqdmulh.s%u %c%u, %c%u, %c%u\n",
   4543                    8 << size, Q ? 'q' : 'd',
   4544                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4545             } else {
   4546                /* VQRDMULH */
   4547                IROp op ,op2;
   4548                ULong imm;
   4549                switch(size) {
   4550                   case 0: case 3:
   4551                      return False;
   4552                   case 1:
   4553                      imm = 1LL << 15;
   4554                      imm = (imm << 16) | imm;
   4555                      imm = (imm << 32) | imm;
   4556                      op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
   4557                      op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
   4558                      break;
   4559                   case 2:
   4560                      imm = 1LL << 31;
   4561                      imm = (imm << 32) | imm;
   4562                      op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
   4563                      op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
   4564                      break;
   4565                   default:
   4566                      vassert(0);
   4567                }
   4568                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4569                setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   4570                                 binop(op2, mkexpr(arg_n),
   4571                                            Q ? mkU128(imm) : mkU64(imm)),
   4572                                 binop(op2, mkexpr(arg_m),
   4573                                            Q ? mkU128(imm) : mkU64(imm))),
   4574                           Q ? mkU128(0) : mkU64(0),
   4575                           Q, condT);
   4576                DIP("vqrdmulh.s%u %c%u, %c%u, %c%u\n",
   4577                    8 << size, Q ? 'q' : 'd',
   4578                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4579             }
   4580          } else {
   4581             if (U == 0) {
   4582                /* VPADD */
   4583                IROp op;
   4584                if (Q)
   4585                   return False;
   4586                switch (size) {
   4587                   case 0: op = Q ? Iop_PwAdd8x16 : Iop_PwAdd8x8;  break;
   4588                   case 1: op = Q ? Iop_PwAdd16x8 : Iop_PwAdd16x4; break;
   4589                   case 2: op = Q ? Iop_PwAdd32x4 : Iop_PwAdd32x2; break;
   4590                   case 3: return False;
   4591                   default: vassert(0);
   4592                }
   4593                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4594                DIP("vpadd.i%d %c%u, %c%u, %c%u\n",
   4595                    8 << size, Q ? 'q' : 'd',
   4596                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4597             }
   4598          }
   4599          break;
   4600       /* Starting from here these are FP SIMD cases */
   4601       case 13:
   4602          if (B == 0) {
   4603             IROp op;
   4604             if (U == 0) {
   4605                if ((C >> 1) == 0) {
   4606                   /* VADD  */
   4607                   op = Q ? Iop_Add32Fx4 : Iop_Add32Fx2 ;
   4608                   DIP("vadd.f32 %c%u, %c%u, %c%u\n",
   4609                       Q ? 'q' : 'd', dreg,
   4610                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4611                } else {
   4612                   /* VSUB  */
   4613                   op = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2 ;
   4614                   DIP("vsub.f32 %c%u, %c%u, %c%u\n",
   4615                       Q ? 'q' : 'd', dreg,
   4616                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4617                }
   4618             } else {
   4619                if ((C >> 1) == 0) {
   4620                   /* VPADD */
   4621                   if (Q)
   4622                      return False;
   4623                   op = Iop_PwAdd32Fx2;
   4624                   DIP("vpadd.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
   4625                } else {
   4626                   /* VABD  */
   4627                   if (Q) {
   4628                      assign(res, unop(Iop_Abs32Fx4,
   4629                                       triop(Iop_Sub32Fx4,
   4630                                             get_FAKE_roundingmode(),
   4631                                             mkexpr(arg_n),
   4632                                             mkexpr(arg_m))));
   4633                   } else {
   4634                      assign(res, unop(Iop_Abs32Fx2,
   4635                                       binop(Iop_Sub32Fx2,
   4636                                             mkexpr(arg_n),
   4637                                             mkexpr(arg_m))));
   4638                   }
   4639                   DIP("vabd.f32 %c%u, %c%u, %c%u\n",
   4640                       Q ? 'q' : 'd', dreg,
   4641                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4642                   break;
   4643                }
   4644             }
   4645             assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
   4646          } else {
   4647             if (U == 0) {
   4648                /* VMLA, VMLS  */
   4649                IROp op, op2;
   4650                UInt P = (theInstr >> 21) & 1;
   4651                if (P) {
   4652                   switch (size & 1) {
   4653                      case 0:
   4654                         op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
   4655                         op2 = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
   4656                         break;
   4657                      case 1: return False;
   4658                      default: vassert(0);
   4659                   }
   4660                } else {
   4661                   switch (size & 1) {
   4662                      case 0:
   4663                         op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
   4664                         op2 = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
   4665                         break;
   4666                      case 1: return False;
   4667                      default: vassert(0);
   4668                   }
   4669                }
   4670                assign(res, binop_w_fake_RM(
   4671                               op2,
   4672                               Q ? getQReg(dreg) : getDRegI64(dreg),
   4673                               binop_w_fake_RM(op, mkexpr(arg_n),
   4674                                                   mkexpr(arg_m))));
   4675 
   4676                DIP("vml%c.f32 %c%u, %c%u, %c%u\n",
   4677                    P ? 's' : 'a', Q ? 'q' : 'd',
   4678                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4679             } else {
   4680                /* VMUL  */
   4681                IROp op;
   4682                if ((C >> 1) != 0)
   4683                   return False;
   4684                op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2 ;
   4685                assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
   4686                DIP("vmul.f32 %c%u, %c%u, %c%u\n",
   4687                    Q ? 'q' : 'd', dreg,
   4688                    Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4689             }
   4690          }
   4691          break;
   4692       case 14:
   4693          if (B == 0) {
   4694             if (U == 0) {
   4695                if ((C >> 1) == 0) {
   4696                   /* VCEQ  */
   4697                   IROp op;
   4698                   if ((theInstr >> 20) & 1)
   4699                      return False;
   4700                   op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2;
   4701                   assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4702                   DIP("vceq.f32 %c%u, %c%u, %c%u\n",
   4703                       Q ? 'q' : 'd', dreg,
   4704                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4705                } else {
   4706                   return False;
   4707                }
   4708             } else {
   4709                if ((C >> 1) == 0) {
   4710                   /* VCGE  */
   4711                   IROp op;
   4712                   if ((theInstr >> 20) & 1)
   4713                      return False;
   4714                   op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
   4715                   assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4716                   DIP("vcge.f32 %c%u, %c%u, %c%u\n",
   4717                       Q ? 'q' : 'd', dreg,
   4718                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4719                } else {
   4720                   /* VCGT  */
   4721                   IROp op;
   4722                   if ((theInstr >> 20) & 1)
   4723                      return False;
   4724                   op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
   4725                   assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4726                   DIP("vcgt.f32 %c%u, %c%u, %c%u\n",
   4727                       Q ? 'q' : 'd', dreg,
   4728                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4729                }
   4730             }
   4731          } else {
   4732             if (U == 1) {
   4733                /* VACGE, VACGT */
   4734                UInt op_bit = (theInstr >> 21) & 1;
   4735                IROp op, op2;
   4736                op2 = Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2;
   4737                if (op_bit) {
   4738                   op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
   4739                   assign(res, binop(op,
   4740                                     unop(op2, mkexpr(arg_n)),
   4741                                     unop(op2, mkexpr(arg_m))));
   4742                } else {
   4743                   op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
   4744                   assign(res, binop(op,
   4745                                     unop(op2, mkexpr(arg_n)),
   4746                                     unop(op2, mkexpr(arg_m))));
   4747                }
   4748                DIP("vacg%c.f32 %c%u, %c%u, %c%u\n", op_bit ? 't' : 'e',
   4749                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
   4750                    Q ? 'q' : 'd', mreg);
   4751             }
   4752          }
   4753          break;
   4754       case 15:
   4755          if (B == 0) {
   4756             if (U == 0) {
   4757                /* VMAX, VMIN  */
   4758                IROp op;
   4759                if ((theInstr >> 20) & 1)
   4760                   return False;
   4761                if ((theInstr >> 21) & 1) {
   4762                   op = Q ? Iop_Min32Fx4 : Iop_Min32Fx2;
   4763                   DIP("vmin.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
   4764                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4765                } else {
   4766                   op = Q ? Iop_Max32Fx4 : Iop_Max32Fx2;
   4767                   DIP("vmax.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
   4768                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4769                }
   4770                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4771             } else {
   4772                /* VPMAX, VPMIN   */
   4773                IROp op;
   4774                if (Q)
   4775                   return False;
   4776                if ((theInstr >> 20) & 1)
   4777                   return False;
   4778                if ((theInstr >> 21) & 1) {
   4779                   op = Iop_PwMin32Fx2;
   4780                   DIP("vpmin.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
   4781                } else {
   4782                   op = Iop_PwMax32Fx2;
   4783                   DIP("vpmax.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
   4784                }
   4785                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4786             }
   4787          } else {
   4788             if (U == 0) {
   4789                if ((C >> 1) == 0) {
   4790                   /* VRECPS */
   4791                   if ((theInstr >> 20) & 1)
   4792                      return False;
   4793                   assign(res, binop(Q ? Iop_RecipStep32Fx4
   4794                                       : Iop_RecipStep32Fx2,
   4795                                     mkexpr(arg_n),
   4796                                     mkexpr(arg_m)));
   4797                   DIP("vrecps.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
   4798                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4799                } else {
   4800                   /* VRSQRTS  */
   4801                   if ((theInstr >> 20) & 1)
   4802                      return False;
   4803                   assign(res, binop(Q ? Iop_RSqrtStep32Fx4
   4804                                       : Iop_RSqrtStep32Fx2,
   4805                                     mkexpr(arg_n),
   4806                                     mkexpr(arg_m)));
   4807                   DIP("vrsqrts.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
   4808                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4809                }
   4810             }
   4811          }
   4812          break;
   4813    }
   4814 
   4815    if (Q) {
   4816       putQReg(dreg, mkexpr(res), condT);
   4817    } else {
   4818       putDRegI64(dreg, mkexpr(res), condT);
   4819    }
   4820 
   4821    return True;
   4822 }
   4823 
   4824 /* A7.4.2 Three registers of different length */
   4825 static
   4826 Bool dis_neon_data_3diff ( UInt theInstr, IRTemp condT )
   4827 {
   4828    UInt A = (theInstr >> 8) & 0xf;
   4829    UInt B = (theInstr >> 20) & 3;
   4830    UInt U = (theInstr >> 24) & 1;
   4831    UInt P = (theInstr >> 9) & 1;
   4832    UInt mreg = get_neon_m_regno(theInstr);
   4833    UInt nreg = get_neon_n_regno(theInstr);
   4834    UInt dreg = get_neon_d_regno(theInstr);
   4835    UInt size = B;
   4836    ULong imm;
   4837    IRTemp res, arg_m, arg_n, cond, tmp;
   4838    IROp cvt, cvt2, cmp, op, op2, sh, add;
   4839    switch (A) {
   4840       case 0: case 1: case 2: case 3:
   4841          /* VADDL, VADDW, VSUBL, VSUBW */
   4842          if (dreg & 1)
   4843             return False;
   4844          dreg >>= 1;
   4845          size = B;
   4846          switch (size) {
   4847             case 0:
   4848                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
   4849                op = (A & 2) ? Iop_Sub16x8 : Iop_Add16x8;
   4850                break;
   4851             case 1:
   4852                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
   4853                op = (A & 2) ? Iop_Sub32x4 : Iop_Add32x4;
   4854                break;
   4855             case 2:
   4856                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
   4857                op = (A & 2) ? Iop_Sub64x2 : Iop_Add64x2;
   4858                break;
   4859             case 3:
   4860                return False;
   4861             default:
   4862                vassert(0);
   4863          }
   4864          arg_n = newTemp(Ity_V128);
   4865          arg_m = newTemp(Ity_V128);
   4866          if (A & 1) {
   4867             if (nreg & 1)
   4868                return False;
   4869             nreg >>= 1;
   4870             assign(arg_n, getQReg(nreg));
   4871          } else {
   4872             assign(arg_n, unop(cvt, getDRegI64(nreg)));
   4873          }
   4874          assign(arg_m, unop(cvt, getDRegI64(mreg)));
   4875          putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
   4876                        condT);
   4877          DIP("v%s%c.%c%u q%u, %c%u, d%u\n", (A & 2) ? "sub" : "add",
   4878              (A & 1) ? 'w' : 'l', U ? 'u' : 's', 8 << size, dreg,
   4879              (A & 1) ? 'q' : 'd', nreg, mreg);
   4880          return True;
   4881       case 4:
   4882          /* VADDHN, VRADDHN */
   4883          if (mreg & 1)
   4884             return False;
   4885          mreg >>= 1;
   4886          if (nreg & 1)
   4887             return False;
   4888          nreg >>= 1;
   4889          size = B;
   4890          switch (size) {
   4891             case 0:
   4892                op = Iop_Add16x8;
   4893                cvt = Iop_NarrowUn16to8x8;
   4894                sh = Iop_ShrN16x8;
   4895                imm = 1U << 7;
   4896                imm = (imm << 16) | imm;
   4897                imm = (imm << 32) | imm;
   4898                break;
   4899             case 1:
   4900                op = Iop_Add32x4;
   4901                cvt = Iop_NarrowUn32to16x4;
   4902                sh = Iop_ShrN32x4;
   4903                imm = 1U << 15;
   4904                imm = (imm << 32) | imm;
   4905                break;
   4906             case 2:
   4907                op = Iop_Add64x2;
   4908                cvt = Iop_NarrowUn64to32x2;
   4909                sh = Iop_ShrN64x2;
   4910                imm = 1U << 31;
   4911                break;
   4912             case 3:
   4913                return False;
   4914             default:
   4915                vassert(0);
   4916          }
   4917          tmp = newTemp(Ity_V128);
   4918          res = newTemp(Ity_V128);
   4919          assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
   4920          if (U) {
   4921             /* VRADDHN */
   4922             assign(res, binop(op, mkexpr(tmp),
   4923                      binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
   4924          } else {
   4925             assign(res, mkexpr(tmp));
   4926          }
   4927          putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
   4928                     condT);
   4929          DIP("v%saddhn.i%u d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
   4930              nreg, mreg);
   4931          return True;
   4932       case 5:
   4933          /* VABAL */
   4934          if (!((theInstr >> 23) & 1)) {
   4935             vpanic("VABA should not be in dis_neon_data_3diff\n");
   4936          }
   4937          if (dreg & 1)
   4938             return False;
   4939          dreg >>= 1;
   4940          switch (size) {
   4941             case 0:
   4942                cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
   4943                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
   4944                cvt2 = Iop_Widen8Sto16x8;
   4945                op = Iop_Sub16x8;
   4946                op2 = Iop_Add16x8;
   4947                break;
   4948             case 1:
   4949                cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
   4950                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
   4951                cvt2 = Iop_Widen16Sto32x4;
   4952                op = Iop_Sub32x4;
   4953                op2 = Iop_Add32x4;
   4954                break;
   4955             case 2:
   4956                cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
   4957                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
   4958                cvt2 = Iop_Widen32Sto64x2;
   4959                op = Iop_Sub64x2;
   4960                op2 = Iop_Add64x2;
   4961                break;
   4962             case 3:
   4963                return False;
   4964             default:
   4965                vassert(0);
   4966          }
   4967          arg_n = newTemp(Ity_V128);
   4968          arg_m = newTemp(Ity_V128);
   4969          cond = newTemp(Ity_V128);
   4970          res = newTemp(Ity_V128);
   4971          assign(arg_n, unop(cvt, getDRegI64(nreg)));
   4972          assign(arg_m, unop(cvt, getDRegI64(mreg)));
   4973          assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
   4974                                             getDRegI64(mreg))));
   4975          assign(res, binop(op2,
   4976                            binop(Iop_OrV128,
   4977                                  binop(Iop_AndV128,
   4978                                        binop(op, mkexpr(arg_n), mkexpr(arg_m)),
   4979                                        mkexpr(cond)),
   4980                                  binop(Iop_AndV128,
   4981                                        binop(op, mkexpr(arg_m), mkexpr(arg_n)),
   4982                                        unop(Iop_NotV128, mkexpr(cond)))),
   4983                            getQReg(dreg)));
   4984          putQReg(dreg, mkexpr(res), condT);
   4985          DIP("vabal.%c%u q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
   4986              nreg, mreg);
   4987          return True;
   4988       case 6:
   4989          /* VSUBHN, VRSUBHN */
   4990          if (mreg & 1)
   4991             return False;
   4992          mreg >>= 1;
   4993          if (nreg & 1)
   4994             return False;
   4995          nreg >>= 1;
   4996          size = B;
   4997          switch (size) {
   4998             case 0:
   4999                op = Iop_Sub16x8;
   5000                op2 = Iop_Add16x8;
   5001                cvt = Iop_NarrowUn16to8x8;
   5002                sh = Iop_ShrN16x8;
   5003                imm = 1U << 7;
   5004                imm = (imm << 16) | imm;
   5005                imm = (imm << 32) | imm;
   5006                break;
   5007             case 1:
   5008                op = Iop_Sub32x4;
   5009                op2 = Iop_Add32x4;
   5010                cvt = Iop_NarrowUn32to16x4;
   5011                sh = Iop_ShrN32x4;
   5012                imm = 1U << 15;
   5013                imm = (imm << 32) | imm;
   5014                break;
   5015             case 2:
   5016                op = Iop_Sub64x2;
   5017                op2 = Iop_Add64x2;
   5018                cvt = Iop_NarrowUn64to32x2;
   5019                sh = Iop_ShrN64x2;
   5020                imm = 1U << 31;
   5021                break;
   5022             case 3:
   5023                return False;
   5024             default:
   5025                vassert(0);
   5026          }
   5027          tmp = newTemp(Ity_V128);
   5028          res = newTemp(Ity_V128);
   5029          assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
   5030          if (U) {
   5031             /* VRSUBHN */
   5032             assign(res, binop(op2, mkexpr(tmp),
   5033                      binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
   5034          } else {
   5035             assign(res, mkexpr(tmp));
   5036          }
   5037          putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
   5038                     condT);
   5039          DIP("v%ssubhn.i%u d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
   5040              nreg, mreg);
   5041          return True;
   5042       case 7:
   5043          /* VABDL */
   5044          if (!((theInstr >> 23) & 1)) {
   5045             vpanic("VABL should not be in dis_neon_data_3diff\n");
   5046          }
   5047          if (dreg & 1)
   5048             return False;
   5049          dreg >>= 1;
   5050          switch (size) {
   5051             case 0:
   5052                cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
   5053                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
   5054                cvt2 = Iop_Widen8Sto16x8;
   5055                op = Iop_Sub16x8;
   5056                break;
   5057             case 1:
   5058                cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
   5059                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
   5060                cvt2 = Iop_Widen16Sto32x4;
   5061                op = Iop_Sub32x4;
   5062                break;
   5063             case 2:
   5064                cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
   5065                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
   5066                cvt2 = Iop_Widen32Sto64x2;
   5067                op = Iop_Sub64x2;
   5068                break;
   5069             case 3:
   5070                return False;
   5071             default:
   5072                vassert(0);
   5073          }
   5074          arg_n = newTemp(Ity_V128);
   5075          arg_m = newTemp(Ity_V128);
   5076          cond = newTemp(Ity_V128);
   5077          res = newTemp(Ity_V128);
   5078          assign(arg_n, unop(cvt, getDRegI64(nreg)));
   5079          assign(arg_m, unop(cvt, getDRegI64(mreg)));
   5080          assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
   5081                                             getDRegI64(mreg))));
   5082          assign(res, binop(Iop_OrV128,
   5083                            binop(Iop_AndV128,
   5084                                  binop(op, mkexpr(arg_n), mkexpr(arg_m)),
   5085                                  mkexpr(cond)),
   5086                            binop(Iop_AndV128,
   5087                                  binop(op, mkexpr(arg_m), mkexpr(arg_n)),
   5088                                  unop(Iop_NotV128, mkexpr(cond)))));
   5089          putQReg(dreg, mkexpr(res), condT);
   5090          DIP("vabdl.%c%u q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
   5091              nreg, mreg);
   5092          return True;
   5093       case 8:
   5094       case 10:
   5095          /* VMLAL, VMLSL (integer) */
   5096          if (dreg & 1)
   5097             return False;
   5098          dreg >>= 1;
   5099          size = B;
   5100          switch (size) {
   5101             case 0:
   5102                op = U ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
   5103                op2 = P ? Iop_Sub16x8 : Iop_Add16x8;
   5104                break;
   5105             case 1:
   5106                op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
   5107                op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
   5108                break;
   5109             case 2:
   5110                op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
   5111                op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
   5112                break;
   5113             case 3:
   5114                return False;
   5115             default:
   5116                vassert(0);
   5117          }
   5118          res = newTemp(Ity_V128);
   5119          assign(res, binop(op, getDRegI64(nreg),getDRegI64(mreg)));
   5120          putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
   5121          DIP("vml%cl.%c%u q%u, d%u, d%u\n", P ? 's' : 'a', U ? 'u' : 's',
   5122              8 << size, dreg, nreg, mreg);
   5123          return True;
   5124       case 9:
   5125       case 11:
   5126          /* VQDMLAL, VQDMLSL */
   5127          if (U)
   5128             return False;
   5129          if (dreg & 1)
   5130             return False;
   5131          dreg >>= 1;
   5132          size = B;
   5133          switch (size) {
   5134             case 0: case 3:
   5135                return False;
   5136             case 1:
   5137                op = Iop_QDMull16Sx4;
   5138                cmp = Iop_CmpEQ16x4;
   5139                add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
   5140                op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
   5141                imm = 1LL << 15;
   5142                imm = (imm << 16) | imm;
   5143                imm = (imm << 32) | imm;
   5144                break;
   5145             case 2:
   5146                op = Iop_QDMull32Sx2;
   5147                cmp = Iop_CmpEQ32x2;
   5148                add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
   5149                op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
   5150                imm = 1LL << 31;
   5151                imm = (imm << 32) | imm;
   5152                break;
   5153             default:
   5154                vassert(0);
   5155          }
   5156          res = newTemp(Ity_V128);
   5157          tmp = newTemp(Ity_V128);
   5158          assign(res, binop(op, getDRegI64(nreg), getDRegI64(mreg)));
   5159          assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
   5160          setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
   5161                     True, condT);
   5162          setFlag_QC(binop(Iop_And64,
   5163                           binop(cmp, getDRegI64(nreg), mkU64(imm)),
   5164                           binop(cmp, getDRegI64(mreg), mkU64(imm))),
   5165                     mkU64(0),
   5166                     False, condT);
   5167          putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
   5168          DIP("vqdml%cl.s%u q%u, d%u, d%u\n", P ? 's' : 'a', 8 << size, dreg,
   5169              nreg, mreg);
   5170          return True;
   5171       case 12:
   5172       case 14:
   5173          /* VMULL (integer or polynomial) */
   5174          if (dreg & 1)
   5175             return False;
   5176          dreg >>= 1;
   5177          size = B;
   5178          switch (size) {
   5179             case 0:
   5180                op = (U) ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
   5181                if (P)
   5182                   op = Iop_PolynomialMull8x8;
   5183                break;
   5184             case 1:
   5185                op = (U) ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
   5186                break;
   5187             case 2:
   5188                op = (U) ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
   5189                break;
   5190             default:
   5191                vassert(0);
   5192          }
   5193          putQReg(dreg, binop(op, getDRegI64(nreg),
   5194                                  getDRegI64(mreg)), condT);
   5195          DIP("vmull.%c%u q%u, d%u, d%u\n", P ? 'p' : (U ? 'u' : 's'),
   5196                8 << size, dreg, nreg, mreg);
   5197          return True;
   5198       case 13:
   5199          /* VQDMULL */
   5200          if (U)
   5201             return False;
   5202          if (dreg & 1)
   5203             return False;
   5204          dreg >>= 1;
   5205          size = B;
   5206          switch (size) {
   5207             case 0:
   5208             case 3:
   5209                return False;
   5210             case 1:
   5211                op = Iop_QDMull16Sx4;
   5212                op2 = Iop_CmpEQ16x4;
   5213                imm = 1LL << 15;
   5214                imm = (imm << 16) | imm;
   5215                imm = (imm << 32) | imm;
   5216                break;
   5217             case 2:
   5218                op = Iop_QDMull32Sx2;
   5219                op2 = Iop_CmpEQ32x2;
   5220                imm = 1LL << 31;
   5221                imm = (imm << 32) | imm;
   5222                break;
   5223             default:
   5224                vassert(0);
   5225          }
   5226          putQReg(dreg, binop(op, getDRegI64(nreg), getDRegI64(mreg)),
   5227                condT);
   5228          setFlag_QC(binop(Iop_And64,
   5229                           binop(op2, getDRegI64(nreg), mkU64(imm)),
   5230                           binop(op2, getDRegI64(mreg), mkU64(imm))),
   5231                     mkU64(0),
   5232                     False, condT);
   5233          DIP("vqdmull.s%u q%u, d%u, d%u\n", 8 << size, dreg, nreg, mreg);
   5234          return True;
   5235       default:
   5236          return False;
   5237    }
   5238    return False;
   5239 }
   5240 
   5241 /* A7.4.3 Two registers and a scalar */
   5242 static
   5243 Bool dis_neon_data_2reg_and_scalar ( UInt theInstr, IRTemp condT )
   5244 {
   5245 #  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
   5246    UInt U = INSN(24,24);
   5247    UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
   5248    UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
   5249    UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
   5250    UInt size = INSN(21,20);
   5251    UInt index;
   5252    UInt Q = INSN(24,24);
   5253 
   5254    if (INSN(27,25) != 1 || INSN(23,23) != 1
   5255        || INSN(6,6) != 1 || INSN(4,4) != 0)
   5256       return False;
   5257 
   5258    /* VMLA, VMLS (scalar)  */
   5259    if ((INSN(11,8) & BITS4(1,0,1,0)) == BITS4(0,0,0,0)) {
   5260       IRTemp res, arg_m, arg_n;
   5261       IROp dup, get, op, op2, add, sub;
   5262       if (Q) {
   5263          if ((dreg & 1) || (nreg & 1))
   5264             return False;
   5265          dreg >>= 1;
   5266          nreg >>= 1;
   5267          res = newTemp(Ity_V128);
   5268          arg_m = newTemp(Ity_V128);
   5269          arg_n = newTemp(Ity_V128);
   5270          assign(arg_n, getQReg(nreg));
   5271          switch(size) {
   5272             case 1:
   5273                dup = Iop_Dup16x8;
   5274                get = Iop_GetElem16x4;
   5275                index = mreg >> 3;
   5276                mreg &= 7;
   5277                break;
   5278             case 2:
   5279                dup = Iop_Dup32x4;
   5280                get = Iop_GetElem32x2;
   5281                index = mreg >> 4;
   5282                mreg &= 0xf;
   5283                break;
   5284             case 0:
   5285             case 3:
   5286                return False;
   5287             default:
   5288                vassert(0);
   5289          }
   5290          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5291       } else {
   5292          res = newTemp(Ity_I64);
   5293          arg_m = newTemp(Ity_I64);
   5294          arg_n = newTemp(Ity_I64);
   5295          assign(arg_n, getDRegI64(nreg));
   5296          switch(size) {
   5297             case 1:
   5298                dup = Iop_Dup16x4;
   5299                get = Iop_GetElem16x4;
   5300                index = mreg >> 3;
   5301                mreg &= 7;
   5302                break;
   5303             case 2:
   5304                dup = Iop_Dup32x2;
   5305                get = Iop_GetElem32x2;
   5306                index = mreg >> 4;
   5307                mreg &= 0xf;
   5308                break;
   5309             case 0:
   5310             case 3:
   5311                return False;
   5312             default:
   5313                vassert(0);
   5314          }
   5315          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5316       }
   5317       if (INSN(8,8)) {
   5318          switch (size) {
   5319             case 2:
   5320                op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
   5321                add = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
   5322                sub = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
   5323                break;
   5324             case 0:
   5325             case 1:
   5326             case 3:
   5327                return False;
   5328             default:
   5329                vassert(0);
   5330          }
   5331       } else {
   5332          switch (size) {
   5333             case 1:
   5334                op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
   5335                add = Q ? Iop_Add16x8 : Iop_Add16x4;
   5336                sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   5337                break;
   5338             case 2:
   5339                op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
   5340                add = Q ? Iop_Add32x4 : Iop_Add32x2;
   5341                sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   5342                break;
   5343             case 0:
   5344             case 3:
   5345                return False;
   5346             default:
   5347                vassert(0);
   5348          }
   5349       }
   5350       op2 = INSN(10,10) ? sub : add;
   5351       assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
   5352       if (Q)
   5353          putQReg(dreg, binop_w_fake_RM(op2, getQReg(dreg), mkexpr(res)),
   5354                  condT);
   5355       else
   5356          putDRegI64(dreg, binop(op2, getDRegI64(dreg), mkexpr(res)),
   5357                     condT);
   5358       DIP("vml%c.%c%u %c%u, %c%u, d%u[%u]\n", INSN(10,10) ? 's' : 'a',
   5359             INSN(8,8) ? 'f' : 'i', 8 << size,
   5360             Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, mreg, index);
   5361       return True;
   5362    }
   5363 
   5364    /* VMLAL, VMLSL (scalar)   */
   5365    if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,0)) {
   5366       IRTemp res, arg_m, arg_n;
   5367       IROp dup, get, op, op2, add, sub;
   5368       if (dreg & 1)
   5369          return False;
   5370       dreg >>= 1;
   5371       res = newTemp(Ity_V128);
   5372       arg_m = newTemp(Ity_I64);
   5373       arg_n = newTemp(Ity_I64);
   5374       assign(arg_n, getDRegI64(nreg));
   5375       switch(size) {
   5376          case 1:
   5377             dup = Iop_Dup16x4;
   5378             get = Iop_GetElem16x4;
   5379             index = mreg >> 3;
   5380             mreg &= 7;
   5381             break;
   5382          case 2:
   5383             dup = Iop_Dup32x2;
   5384             get = Iop_GetElem32x2;
   5385             index = mreg >> 4;
   5386             mreg &= 0xf;
   5387             break;
   5388          case 0:
   5389          case 3:
   5390             return False;
   5391          default:
   5392             vassert(0);
   5393       }
   5394       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5395       switch (size) {
   5396          case 1:
   5397             op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
   5398             add = Iop_Add32x4;
   5399             sub = Iop_Sub32x4;
   5400             break;
   5401          case 2:
   5402             op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
   5403             add = Iop_Add64x2;
   5404             sub = Iop_Sub64x2;
   5405             break;
   5406          case 0:
   5407          case 3:
   5408             return False;
   5409          default:
   5410             vassert(0);
   5411       }
   5412       op2 = INSN(10,10) ? sub : add;
   5413       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   5414       putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
   5415       DIP("vml%cl.%c%u q%u, d%u, d%u[%u]\n",
   5416           INSN(10,10) ? 's' : 'a', U ? 'u' : 's',
   5417           8 << size, dreg, nreg, mreg, index);
   5418       return True;
   5419    }
   5420 
   5421    /* VQDMLAL, VQDMLSL (scalar)  */
   5422    if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,1) && !U) {
   5423       IRTemp res, arg_m, arg_n, tmp;
   5424       IROp dup, get, op, op2, add, cmp;
   5425       UInt P = INSN(10,10);
   5426       ULong imm;
   5427       if (dreg & 1)
   5428          return False;
   5429       dreg >>= 1;
   5430       res = newTemp(Ity_V128);
   5431       arg_m = newTemp(Ity_I64);
   5432       arg_n = newTemp(Ity_I64);
   5433       assign(arg_n, getDRegI64(nreg));
   5434       switch(size) {
   5435          case 1:
   5436             dup = Iop_Dup16x4;
   5437             get = Iop_GetElem16x4;
   5438             index = mreg >> 3;
   5439             mreg &= 7;
   5440             break;
   5441          case 2:
   5442             dup = Iop_Dup32x2;
   5443             get = Iop_GetElem32x2;
   5444             index = mreg >> 4;
   5445             mreg &= 0xf;
   5446             break;
   5447          case 0:
   5448          case 3:
   5449             return False;
   5450          default:
   5451             vassert(0);
   5452       }
   5453       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5454       switch (size) {
   5455          case 0:
   5456          case 3:
   5457             return False;
   5458          case 1:
   5459             op = Iop_QDMull16Sx4;
   5460             cmp = Iop_CmpEQ16x4;
   5461             add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
   5462             op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
   5463             imm = 1LL << 15;
   5464             imm = (imm << 16) | imm;
   5465             imm = (imm << 32) | imm;
   5466             break;
   5467          case 2:
   5468             op = Iop_QDMull32Sx2;
   5469             cmp = Iop_CmpEQ32x2;
   5470             add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
   5471             op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
   5472             imm = 1LL << 31;
   5473             imm = (imm << 32) | imm;
   5474             break;
   5475          default:
   5476             vassert(0);
   5477       }
   5478       res = newTemp(Ity_V128);
   5479       tmp = newTemp(Ity_V128);
   5480       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   5481       assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
   5482       setFlag_QC(binop(Iop_And64,
   5483                        binop(cmp, mkexpr(arg_n), mkU64(imm)),
   5484                        binop(cmp, mkexpr(arg_m), mkU64(imm))),
   5485                  mkU64(0),
   5486                  False, condT);
   5487       setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
   5488                  True, condT);
   5489       putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
   5490       DIP("vqdml%cl.s%u q%u, d%u, d%u[%u]\n", P ? 's' : 'a', 8 << size,
   5491           dreg, nreg, mreg, index);
   5492       return True;
   5493    }
   5494 
   5495    /* VMUL (by scalar)  */
   5496    if ((INSN(11,8) & BITS4(1,1,1,0)) == BITS4(1,0,0,0)) {
   5497       IRTemp res, arg_m, arg_n;
   5498       IROp dup, get, op;
   5499       if (Q) {
   5500          if ((dreg & 1) || (nreg & 1))
   5501             return False;
   5502          dreg >>= 1;
   5503          nreg >>= 1;
   5504          res = newTemp(Ity_V128);
   5505          arg_m = newTemp(Ity_V128);
   5506          arg_n = newTemp(Ity_V128);
   5507          assign(arg_n, getQReg(nreg));
   5508          switch(size) {
   5509             case 1:
   5510                dup = Iop_Dup16x8;
   5511                get = Iop_GetElem16x4;
   5512                index = mreg >> 3;
   5513                mreg &= 7;
   5514                break;
   5515             case 2:
   5516                dup = Iop_Dup32x4;
   5517                get = Iop_GetElem32x2;
   5518                index = mreg >> 4;
   5519                mreg &= 0xf;
   5520                break;
   5521             case 0:
   5522             case 3:
   5523                return False;
   5524             default:
   5525                vassert(0);
   5526          }
   5527          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5528       } else {
   5529          res = newTemp(Ity_I64);
   5530          arg_m = newTemp(Ity_I64);
   5531          arg_n = newTemp(Ity_I64);
   5532          assign(arg_n, getDRegI64(nreg));
   5533          switch(size) {
   5534             case 1:
   5535                dup = Iop_Dup16x4;
   5536                get = Iop_GetElem16x4;
   5537                index = mreg >> 3;
   5538                mreg &= 7;
   5539                break;
   5540             case 2:
   5541                dup = Iop_Dup32x2;
   5542                get = Iop_GetElem32x2;
   5543                index = mreg >> 4;
   5544                mreg &= 0xf;
   5545                break;
   5546             case 0:
   5547             case 3:
   5548                return False;
   5549             default:
   5550                vassert(0);
   5551          }
   5552          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5553       }
   5554       if (INSN(8,8)) {
   5555          switch (size) {
   5556             case 2:
   5557                op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
   5558                break;
   5559             case 0:
   5560             case 1:
   5561             case 3:
   5562                return False;
   5563             default:
   5564                vassert(0);
   5565          }
   5566       } else {
   5567          switch (size) {
   5568             case 1:
   5569                op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
   5570                break;
   5571             case 2:
   5572                op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
   5573                break;
   5574             case 0:
   5575             case 3:
   5576                return False;
   5577             default:
   5578                vassert(0);
   5579          }
   5580       }
   5581       assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
   5582       if (Q)
   5583          putQReg(dreg, mkexpr(res), condT);
   5584       else
   5585          putDRegI64(dreg, mkexpr(res), condT);
   5586       DIP("vmul.%c%u %c%u, %c%u, d%u[%u]\n", INSN(8,8) ? 'f' : 'i',
   5587           8 << size, Q ? 'q' : 'd', dreg,
   5588           Q ? 'q' : 'd', nreg, mreg, index);
   5589       return True;
   5590    }
   5591 
   5592    /* VMULL (scalar) */
   5593    if (INSN(11,8) == BITS4(1,0,1,0)) {
   5594       IRTemp res, arg_m, arg_n;
   5595       IROp dup, get, op;
   5596       if (dreg & 1)
   5597          return False;
   5598       dreg >>= 1;
   5599       res = newTemp(Ity_V128);
   5600       arg_m = newTemp(Ity_I64);
   5601       arg_n = newTemp(Ity_I64);
   5602       assign(arg_n, getDRegI64(nreg));
   5603       switch(size) {
   5604          case 1:
   5605             dup = Iop_Dup16x4;
   5606             get = Iop_GetElem16x4;
   5607             index = mreg >> 3;
   5608             mreg &= 7;
   5609             break;
   5610          case 2:
   5611             dup = Iop_Dup32x2;
   5612             get = Iop_GetElem32x2;
   5613             index = mreg >> 4;
   5614             mreg &= 0xf;
   5615             break;
   5616          case 0:
   5617          case 3:
   5618             return False;
   5619          default:
   5620             vassert(0);
   5621       }
   5622       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5623       switch (size) {
   5624          case 1: op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4; break;
   5625          case 2: op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2; break;
   5626          case 0: case 3: return False;
   5627          default: vassert(0);
   5628       }
   5629       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   5630       putQReg(dreg, mkexpr(res), condT);
   5631       DIP("vmull.%c%u q%u, d%u, d%u[%u]\n", U ? 'u' : 's', 8 << size, dreg,
   5632           nreg, mreg, index);
   5633       return True;
   5634    }
   5635 
   5636    /* VQDMULL */
   5637    if (INSN(11,8) == BITS4(1,0,1,1) && !U) {
   5638       IROp op ,op2, dup, get;
   5639       ULong imm;
   5640       IRTemp arg_m, arg_n;
   5641       if (dreg & 1)
   5642          return False;
   5643       dreg >>= 1;
   5644       arg_m = newTemp(Ity_I64);
   5645       arg_n = newTemp(Ity_I64);
   5646       assign(arg_n, getDRegI64(nreg));
   5647       switch(size) {
   5648          case 1:
   5649             dup = Iop_Dup16x4;
   5650             get = Iop_GetElem16x4;
   5651             index = mreg >> 3;
   5652             mreg &= 7;
   5653             break;
   5654          case 2:
   5655             dup = Iop_Dup32x2;
   5656             get = Iop_GetElem32x2;
   5657             index = mreg >> 4;
   5658             mreg &= 0xf;
   5659             break;
   5660          case 0:
   5661          case 3:
   5662             return False;
   5663          default:
   5664             vassert(0);
   5665       }
   5666       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5667       switch (size) {
   5668          case 0:
   5669          case 3:
   5670             return False;
   5671          case 1:
   5672             op = Iop_QDMull16Sx4;
   5673             op2 = Iop_CmpEQ16x4;
   5674             imm = 1LL << 15;
   5675             imm = (imm << 16) | imm;
   5676             imm = (imm << 32) | imm;
   5677             break;
   5678          case 2:
   5679             op = Iop_QDMull32Sx2;
   5680             op2 = Iop_CmpEQ32x2;
   5681             imm = 1LL << 31;
   5682             imm = (imm << 32) | imm;
   5683             break;
   5684          default:
   5685             vassert(0);
   5686       }
   5687       putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
   5688             condT);
   5689       setFlag_QC(binop(Iop_And64,
   5690                        binop(op2, mkexpr(arg_n), mkU64(imm)),
   5691                        binop(op2, mkexpr(arg_m), mkU64(imm))),
   5692                  mkU64(0),
   5693                  False, condT);
   5694       DIP("vqdmull.s%u q%u, d%u, d%u[%u]\n", 8 << size, dreg, nreg, mreg,
   5695           index);
   5696       return True;
   5697    }
   5698 
   5699    /* VQDMULH */
   5700    if (INSN(11,8) == BITS4(1,1,0,0)) {
   5701       IROp op ,op2, dup, get;
   5702       ULong imm;
   5703       IRTemp res, arg_m, arg_n;
   5704       if (Q) {
   5705          if ((dreg & 1) || (nreg & 1))
   5706             return False;
   5707          dreg >>= 1;
   5708          nreg >>= 1;
   5709          res = newTemp(Ity_V128);
   5710          arg_m = newTemp(Ity_V128);
   5711          arg_n = newTemp(Ity_V128);
   5712          assign(arg_n, getQReg(nreg));
   5713          switch(size) {
   5714             case 1:
   5715                dup = Iop_Dup16x8;
   5716                get = Iop_GetElem16x4;
   5717                index = mreg >> 3;
   5718                mreg &= 7;
   5719                break;
   5720             case 2:
   5721                dup = Iop_Dup32x4;
   5722                get = Iop_GetElem32x2;
   5723                index = mreg >> 4;
   5724                mreg &= 0xf;
   5725                break;
   5726             case 0:
   5727             case 3:
   5728                return False;
   5729             default:
   5730                vassert(0);
   5731          }
   5732          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5733       } else {
   5734          res = newTemp(Ity_I64);
   5735          arg_m = newTemp(Ity_I64);
   5736          arg_n = newTemp(Ity_I64);
   5737          assign(arg_n, getDRegI64(nreg));
   5738          switch(size) {
   5739             case 1:
   5740                dup = Iop_Dup16x4;
   5741                get = Iop_GetElem16x4;
   5742                index = mreg >> 3;
   5743                mreg &= 7;
   5744                break;
   5745             case 2:
   5746                dup = Iop_Dup32x2;
   5747                get = Iop_GetElem32x2;
   5748                index = mreg >> 4;
   5749                mreg &= 0xf;
   5750                break;
   5751             case 0:
   5752             case 3:
   5753                return False;
   5754             default:
   5755                vassert(0);
   5756          }
   5757          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5758       }
   5759       switch (size) {
   5760          case 0:
   5761          case 3:
   5762             return False;
   5763          case 1:
   5764             op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
   5765             op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
   5766             imm = 1LL << 15;
   5767             imm = (imm << 16) | imm;
   5768             imm = (imm << 32) | imm;
   5769             break;
   5770          case 2:
   5771             op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
   5772             op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
   5773             imm = 1LL << 31;
   5774             imm = (imm << 32) | imm;
   5775             break;
   5776          default:
   5777             vassert(0);
   5778       }
   5779       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   5780       setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   5781                        binop(op2, mkexpr(arg_n),
   5782                                   Q ? mkU128(imm) : mkU64(imm)),
   5783                        binop(op2, mkexpr(arg_m),
   5784                              Q ? mkU128(imm) : mkU64(imm))),
   5785                  Q ? mkU128(0) : mkU64(0),
   5786                  Q, condT);
   5787       if (Q)
   5788          putQReg(dreg, mkexpr(res), condT);
   5789       else
   5790          putDRegI64(dreg, mkexpr(res), condT);
   5791       DIP("vqdmulh.s%u %c%u, %c%u, d%u[%u]\n",
   5792           8 << size, Q ? 'q' : 'd', dreg,
   5793           Q ? 'q' : 'd', nreg, mreg, index);
   5794       return True;
   5795    }
   5796 
   5797    /* VQRDMULH (scalar) */
   5798    if (INSN(11,8) == BITS4(1,1,0,1)) {
   5799       IROp op ,op2, dup, get;
   5800       ULong imm;
   5801       IRTemp res, arg_m, arg_n;
   5802       if (Q) {
   5803          if ((dreg & 1) || (nreg & 1))
   5804             return False;
   5805          dreg >>= 1;
   5806          nreg >>= 1;
   5807          res = newTemp(Ity_V128);
   5808          arg_m = newTemp(Ity_V128);
   5809          arg_n = newTemp(Ity_V128);
   5810          assign(arg_n, getQReg(nreg));
   5811          switch(size) {
   5812             case 1:
   5813                dup = Iop_Dup16x8;
   5814                get = Iop_GetElem16x4;
   5815                index = mreg >> 3;
   5816                mreg &= 7;
   5817                break;
   5818             case 2:
   5819                dup = Iop_Dup32x4;
   5820                get = Iop_GetElem32x2;
   5821                index = mreg >> 4;
   5822                mreg &= 0xf;
   5823                break;
   5824             case 0:
   5825             case 3:
   5826                return False;
   5827             default:
   5828                vassert(0);
   5829          }
   5830          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5831       } else {
   5832          res = newTemp(Ity_I64);
   5833          arg_m = newTemp(Ity_I64);
   5834          arg_n = newTemp(Ity_I64);
   5835          assign(arg_n, getDRegI64(nreg));
   5836          switch(size) {
   5837             case 1:
   5838                dup = Iop_Dup16x4;
   5839                get = Iop_GetElem16x4;
   5840                index = mreg >> 3;
   5841                mreg &= 7;
   5842                break;
   5843             case 2:
   5844                dup = Iop_Dup32x2;
   5845                get = Iop_GetElem32x2;
   5846                index = mreg >> 4;
   5847                mreg &= 0xf;
   5848                break;
   5849             case 0:
   5850             case 3:
   5851                return False;
   5852             default:
   5853                vassert(0);
   5854          }
   5855          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5856       }
   5857       switch (size) {
   5858          case 0:
   5859          case 3:
   5860             return False;
   5861          case 1:
   5862             op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
   5863             op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
   5864             imm = 1LL << 15;
   5865             imm = (imm << 16) | imm;
   5866             imm = (imm << 32) | imm;
   5867             break;
   5868          case 2:
   5869             op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
   5870             op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
   5871             imm = 1LL << 31;
   5872             imm = (imm << 32) | imm;
   5873             break;
   5874          default:
   5875             vassert(0);
   5876       }
   5877       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   5878       setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   5879                        binop(op2, mkexpr(arg_n),
   5880                                   Q ? mkU128(imm) : mkU64(imm)),
   5881                        binop(op2, mkexpr(arg_m),
   5882                                   Q ? mkU128(imm) : mkU64(imm))),
   5883                  Q ? mkU128(0) : mkU64(0),
   5884                  Q, condT);
   5885       if (Q)
   5886          putQReg(dreg, mkexpr(res), condT);
   5887       else
   5888          putDRegI64(dreg, mkexpr(res), condT);
   5889       DIP("vqrdmulh.s%u %c%u, %c%u, d%u[%u]\n",
   5890           8 << size, Q ? 'q' : 'd', dreg,
   5891           Q ? 'q' : 'd', nreg, mreg, index);
   5892       return True;
   5893    }
   5894 
   5895    return False;
   5896 #  undef INSN
   5897 }
   5898 
   5899 /* A7.4.4 Two registers and a shift amount */
   5900 static
   5901 Bool dis_neon_data_2reg_and_shift ( UInt theInstr, IRTemp condT )
   5902 {
   5903    UInt A = (theInstr >> 8) & 0xf;
   5904    UInt B = (theInstr >> 6) & 1;
   5905    UInt L = (theInstr >> 7) & 1;
   5906    UInt U = (theInstr >> 24) & 1;
   5907    UInt Q = B;
   5908    UInt imm6 = (theInstr >> 16) & 0x3f;
   5909    UInt shift_imm;
   5910    UInt size = 4;
   5911    UInt tmp;
   5912    UInt mreg = get_neon_m_regno(theInstr);
   5913    UInt dreg = get_neon_d_regno(theInstr);
   5914    ULong imm = 0;
   5915    IROp op, cvt, add = Iop_INVALID, cvt2, op_rev;
   5916    IRTemp reg_m, res, mask;
   5917 
   5918    if (L == 0 && ((theInstr >> 19) & 7) == 0)
   5919       /* It is one reg and immediate */
   5920       return False;
   5921 
   5922    tmp = (L << 6) | imm6;
   5923    if (tmp & 0x40) {
   5924       size = 3;
   5925       shift_imm = 64 - imm6;
   5926    } else if (tmp & 0x20) {
   5927       size = 2;
   5928       shift_imm = 64 - imm6;
   5929    } else if (tmp & 0x10) {
   5930       size = 1;
   5931       shift_imm = 32 - imm6;
   5932    } else if (tmp & 0x8) {
   5933       size = 0;
   5934       shift_imm = 16 - imm6;
   5935    } else {
   5936       return False;
   5937    }
   5938 
   5939    switch (A) {
   5940       case 3:
   5941       case 2:
   5942          /* VRSHR, VRSRA */
   5943          if (shift_imm > 0) {
   5944             IRExpr *imm_val;
   5945             imm = 1L;
   5946             switch (size) {
   5947                case 0:
   5948                   imm = (imm << 8) | imm;
   5949                   /* fall through */
   5950                case 1:
   5951                   imm = (imm << 16) | imm;
   5952                   /* fall through */
   5953                case 2:
   5954                   imm = (imm << 32) | imm;
   5955                   /* fall through */
   5956                case 3:
   5957                   break;
   5958                default:
   5959                   vassert(0);
   5960             }
   5961             if (Q) {
   5962                reg_m = newTemp(Ity_V128);
   5963                res = newTemp(Ity_V128);
   5964                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
   5965                assign(reg_m, getQReg(mreg));
   5966                switch (size) {
   5967                   case 0:
   5968                      add = Iop_Add8x16;
   5969                      op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
   5970                      break;
   5971                   case 1:
   5972                      add = Iop_Add16x8;
   5973                      op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
   5974                      break;
   5975                   case 2:
   5976                      add = Iop_Add32x4;
   5977                      op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
   5978                      break;
   5979                   case 3:
   5980                      add = Iop_Add64x2;
   5981                      op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
   5982                      break;
   5983                   default:
   5984                      vassert(0);
   5985                }
   5986             } else {
   5987                reg_m = newTemp(Ity_I64);
   5988                res = newTemp(Ity_I64);
   5989                imm_val = mkU64(imm);
   5990                assign(reg_m, getDRegI64(mreg));
   5991                switch (size) {
   5992                   case 0:
   5993                      add = Iop_Add8x8;
   5994                      op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
   5995                      break;
   5996                   case 1:
   5997                      add = Iop_Add16x4;
   5998                      op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
   5999                      break;
   6000                   case 2:
   6001                      add = Iop_Add32x2;
   6002                      op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
   6003                      break;
   6004                   case 3:
   6005                      add = Iop_Add64;
   6006                      op = U ? Iop_Shr64 : Iop_Sar64;
   6007                      break;
   6008                   default:
   6009                      vassert(0);
   6010                }
   6011             }
   6012             assign(res,
   6013                    binop(add,
   6014                          binop(op,
   6015                                mkexpr(reg_m),
   6016                                mkU8(shift_imm)),
   6017                          binop(Q ? Iop_AndV128 : Iop_And64,
   6018                                binop(op,
   6019                                      mkexpr(reg_m),
   6020                                      mkU8(shift_imm - 1)),
   6021                                imm_val)));
   6022          } else {
   6023             if (Q) {
   6024                res = newTemp(Ity_V128);
   6025                assign(res, getQReg(mreg));
   6026             } else {
   6027                res = newTemp(Ity_I64);
   6028                assign(res, getDRegI64(mreg));
   6029             }
   6030          }
   6031          if (A == 3) {
   6032             if (Q) {
   6033                putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
   6034                              condT);
   6035             } else {
   6036                putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
   6037                                 condT);
   6038             }
   6039             DIP("vrsra.%c%u %c%u, %c%u, #%u\n",
   6040                 U ? 'u' : 's', 8 << size,
   6041                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6042          } else {
   6043             if (Q) {
   6044                putQReg(dreg, mkexpr(res), condT);
   6045             } else {
   6046                putDRegI64(dreg, mkexpr(res), condT);
   6047             }
   6048             DIP("vrshr.%c%u %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
   6049                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6050          }
   6051          return True;
   6052       case 1:
   6053       case 0:
   6054          /* VSHR, VSRA */
   6055          if (Q) {
   6056             reg_m = newTemp(Ity_V128);
   6057             assign(reg_m, getQReg(mreg));
   6058             res = newTemp(Ity_V128);
   6059          } else {
   6060             reg_m = newTemp(Ity_I64);
   6061             assign(reg_m, getDRegI64(mreg));
   6062             res = newTemp(Ity_I64);
   6063          }
   6064          if (Q) {
   6065             switch (size) {
   6066                case 0:
   6067                   op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
   6068                   add = Iop_Add8x16;
   6069                   break;
   6070                case 1:
   6071                   op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
   6072                   add = Iop_Add16x8;
   6073                   break;
   6074                case 2:
   6075                   op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
   6076                   add = Iop_Add32x4;
   6077                   break;
   6078                case 3:
   6079                   op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
   6080                   add = Iop_Add64x2;
   6081                   break;
   6082                default:
   6083                   vassert(0);
   6084             }
   6085          } else {
   6086             switch (size) {
   6087                case 0:
   6088                   op =  U ? Iop_ShrN8x8 : Iop_SarN8x8;
   6089                   add = Iop_Add8x8;
   6090                   break;
   6091                case 1:
   6092                   op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
   6093                   add = Iop_Add16x4;
   6094                   break;
   6095                case 2:
   6096                   op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
   6097                   add = Iop_Add32x2;
   6098                   break;
   6099                case 3:
   6100                   op = U ? Iop_Shr64 : Iop_Sar64;
   6101                   add = Iop_Add64;
   6102                   break;
   6103                default:
   6104                   vassert(0);
   6105             }
   6106          }
   6107          assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
   6108          if (A == 1) {
   6109             if (Q) {
   6110                putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
   6111                              condT);
   6112             } else {
   6113                putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
   6114                                 condT);
   6115             }
   6116             DIP("vsra.%c%u %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
   6117                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6118          } else {
   6119             if (Q) {
   6120                putQReg(dreg, mkexpr(res), condT);
   6121             } else {
   6122                putDRegI64(dreg, mkexpr(res), condT);
   6123             }
   6124             DIP("vshr.%c%u %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
   6125                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6126          }
   6127          return True;
   6128       case 4:
   6129          /* VSRI */
   6130          if (!U)
   6131             return False;
   6132          if (Q) {
   6133             res = newTemp(Ity_V128);
   6134             mask = newTemp(Ity_V128);
   6135          } else {
   6136             res = newTemp(Ity_I64);
   6137             mask = newTemp(Ity_I64);
   6138          }
   6139          switch (size) {
   6140             case 0: op = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; break;
   6141             case 1: op = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; break;
   6142             case 2: op = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; break;
   6143             case 3: op = Q ? Iop_ShrN64x2 : Iop_Shr64; break;
   6144             default: vassert(0);
   6145          }
   6146          if (Q) {
   6147             assign(mask, binop(op, binop(Iop_64HLtoV128,
   6148                                          mkU64(0xFFFFFFFFFFFFFFFFLL),
   6149                                          mkU64(0xFFFFFFFFFFFFFFFFLL)),
   6150                                mkU8(shift_imm)));
   6151             assign(res, binop(Iop_OrV128,
   6152                               binop(Iop_AndV128,
   6153                                     getQReg(dreg),
   6154                                     unop(Iop_NotV128,
   6155                                          mkexpr(mask))),
   6156                               binop(op,
   6157                                     getQReg(mreg),
   6158                                     mkU8(shift_imm))));
   6159             putQReg(dreg, mkexpr(res), condT);
   6160          } else {
   6161             assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
   6162                                mkU8(shift_imm)));
   6163             assign(res, binop(Iop_Or64,
   6164                               binop(Iop_And64,
   6165                                     getDRegI64(dreg),
   6166                                     unop(Iop_Not64,
   6167                                          mkexpr(mask))),
   6168                               binop(op,
   6169                                     getDRegI64(mreg),
   6170                                     mkU8(shift_imm))));
   6171             putDRegI64(dreg, mkexpr(res), condT);
   6172          }
   6173          DIP("vsri.%u %c%u, %c%u, #%u\n",
   6174              8 << size, Q ? 'q' : 'd', dreg,
   6175              Q ? 'q' : 'd', mreg, shift_imm);
   6176          return True;
   6177       case 5:
   6178          if (U) {
   6179             /* VSLI */
   6180             shift_imm = 8 * (1 << size) - shift_imm;
   6181             if (Q) {
   6182                res = newTemp(Ity_V128);
   6183                mask = newTemp(Ity_V128);
   6184             } else {
   6185                res = newTemp(Ity_I64);
   6186                mask = newTemp(Ity_I64);
   6187             }
   6188             switch (size) {
   6189                case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
   6190                case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
   6191                case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
   6192                case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
   6193                default: vassert(0);
   6194             }
   6195             if (Q) {
   6196                assign(mask, binop(op, binop(Iop_64HLtoV128,
   6197                                             mkU64(0xFFFFFFFFFFFFFFFFLL),
   6198                                             mkU64(0xFFFFFFFFFFFFFFFFLL)),
   6199                                   mkU8(shift_imm)));
   6200                assign(res, binop(Iop_OrV128,
   6201                                  binop(Iop_AndV128,
   6202                                        getQReg(dreg),
   6203                                        unop(Iop_NotV128,
   6204                                             mkexpr(mask))),
   6205                                  binop(op,
   6206                                        getQReg(mreg),
   6207                                        mkU8(shift_imm))));
   6208                putQReg(dreg, mkexpr(res), condT);
   6209             } else {
   6210                assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
   6211                                   mkU8(shift_imm)));
   6212                assign(res, binop(Iop_Or64,
   6213                                  binop(Iop_And64,
   6214                                        getDRegI64(dreg),
   6215                                        unop(Iop_Not64,
   6216                                             mkexpr(mask))),
   6217                                  binop(op,
   6218                                        getDRegI64(mreg),
   6219                                        mkU8(shift_imm))));
   6220                putDRegI64(dreg, mkexpr(res), condT);
   6221             }
   6222             DIP("vsli.%u %c%u, %c%u, #%u\n",
   6223                 8 << size, Q ? 'q' : 'd', dreg,
   6224                 Q ? 'q' : 'd', mreg, shift_imm);
   6225             return True;
   6226          } else {
   6227             /* VSHL #imm */
   6228             shift_imm = 8 * (1 << size) - shift_imm;
   6229             if (Q) {
   6230                res = newTemp(Ity_V128);
   6231             } else {
   6232                res = newTemp(Ity_I64);
   6233             }
   6234             switch (size) {
   6235                case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
   6236                case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
   6237                case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
   6238                case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
   6239                default: vassert(0);
   6240             }
   6241             assign(res, binop(op, Q ? getQReg(mreg) : getDRegI64(mreg),
   6242                      mkU8(shift_imm)));
   6243             if (Q) {
   6244                putQReg(dreg, mkexpr(res), condT);
   6245             } else {
   6246                putDRegI64(dreg, mkexpr(res), condT);
   6247             }
   6248             DIP("vshl.i%u %c%u, %c%u, #%u\n",
   6249                 8 << size, Q ? 'q' : 'd', dreg,
   6250                 Q ? 'q' : 'd', mreg, shift_imm);
   6251             return True;
   6252          }
   6253          break;
   6254       case 6:
   6255       case 7:
   6256          /* VQSHL, VQSHLU */
   6257          shift_imm = 8 * (1 << size) - shift_imm;
   6258          if (U) {
   6259             if (A & 1) {
   6260                switch (size) {
   6261                   case 0:
   6262                      op = Q ? Iop_QShlNsatUU8x16 : Iop_QShlNsatUU8x8;
   6263                      op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   6264                      break;
   6265                   case 1:
   6266                      op = Q ? Iop_QShlNsatUU16x8 : Iop_QShlNsatUU16x4;
   6267                      op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   6268                      break;
   6269                   case 2:
   6270                      op = Q ? Iop_QShlNsatUU32x4 : Iop_QShlNsatUU32x2;
   6271                      op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   6272                      break;
   6273                   case 3:
   6274                      op = Q ? Iop_QShlNsatUU64x2 : Iop_QShlNsatUU64x1;
   6275                      op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
   6276                      break;
   6277                   default:
   6278                      vassert(0);
   6279                }
   6280                DIP("vqshl.u%u %c%u, %c%u, #%u\n",
   6281                    8 << size,
   6282                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6283             } else {
   6284                switch (size) {
   6285                   case 0:
   6286                      op = Q ? Iop_QShlNsatSU8x16 : Iop_QShlNsatSU8x8;
   6287                      op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   6288                      break;
   6289                   case 1:
   6290                      op = Q ? Iop_QShlNsatSU16x8 : Iop_QShlNsatSU16x4;
   6291                      op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   6292                      break;
   6293                   case 2:
   6294                      op = Q ? Iop_QShlNsatSU32x4 : Iop_QShlNsatSU32x2;
   6295                      op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   6296                      break;
   6297                   case 3:
   6298                      op = Q ? Iop_QShlNsatSU64x2 : Iop_QShlNsatSU64x1;
   6299                      op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
   6300                      break;
   6301                   default:
   6302                      vassert(0);
   6303                }
   6304                DIP("vqshlu.s%u %c%u, %c%u, #%u\n",
   6305                    8 << size,
   6306                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6307             }
   6308          } else {
   6309             if (!(A & 1))
   6310                return False;
   6311             switch (size) {
   6312                case 0:
   6313                   op = Q ? Iop_QShlNsatSS8x16 : Iop_QShlNsatSS8x8;
   6314                   op_rev = Q ? Iop_SarN8x16 : Iop_SarN8x8;
   6315                   break;
   6316                case 1:
   6317                   op = Q ? Iop_QShlNsatSS16x8 : Iop_QShlNsatSS16x4;
   6318                   op_rev = Q ? Iop_SarN16x8 : Iop_SarN16x4;
   6319                   break;
   6320                case 2:
   6321                   op = Q ? Iop_QShlNsatSS32x4 : Iop_QShlNsatSS32x2;
   6322                   op_rev = Q ? Iop_SarN32x4 : Iop_SarN32x2;
   6323                   break;
   6324                case 3:
   6325                   op = Q ? Iop_QShlNsatSS64x2 : Iop_QShlNsatSS64x1;
   6326                   op_rev = Q ? Iop_SarN64x2 : Iop_Sar64;
   6327                   break;
   6328                default:
   6329                   vassert(0);
   6330             }
   6331             DIP("vqshl.s%u %c%u, %c%u, #%u\n",
   6332                 8 << size,
   6333                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6334          }
   6335          if (Q) {
   6336             tmp = newTemp(Ity_V128);
   6337             res = newTemp(Ity_V128);
   6338             reg_m = newTemp(Ity_V128);
   6339             assign(reg_m, getQReg(mreg));
   6340          } else {
   6341             tmp = newTemp(Ity_I64);
   6342             res = newTemp(Ity_I64);
   6343             reg_m = newTemp(Ity_I64);
   6344             assign(reg_m, getDRegI64(mreg));
   6345          }
   6346          assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
   6347          assign(tmp, binop(op_rev, mkexpr(res), mkU8(shift_imm)));
   6348          setFlag_QC(mkexpr(tmp), mkexpr(reg_m), Q, condT);
   6349          if (Q)
   6350             putQReg(dreg, mkexpr(res), condT);
   6351          else
   6352             putDRegI64(dreg, mkexpr(res), condT);
   6353          return True;
   6354       case 8:
   6355          if (!U) {
   6356             if (L == 1)
   6357                return False;
   6358             size++;
   6359             dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
   6360             mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
   6361             if (mreg & 1)
   6362                return False;
   6363             mreg >>= 1;
   6364             if (!B) {
   6365                /* VSHRN*/
   6366                IROp narOp;
   6367                reg_m = newTemp(Ity_V128);
   6368                assign(reg_m, getQReg(mreg));
   6369                res = newTemp(Ity_I64);
   6370                switch (size) {
   6371                   case 1:
   6372                      op = Iop_ShrN16x8;
   6373                      narOp = Iop_NarrowUn16to8x8;
   6374                      break;
   6375                   case 2:
   6376                      op = Iop_ShrN32x4;
   6377                      narOp = Iop_NarrowUn32to16x4;
   6378                      break;
   6379                   case 3:
   6380                      op = Iop_ShrN64x2;
   6381                      narOp = Iop_NarrowUn64to32x2;
   6382                      break;
   6383                   default:
   6384                      vassert(0);
   6385                }
   6386                assign(res, unop(narOp,
   6387                                 binop(op,
   6388                                       mkexpr(reg_m),
   6389                                       mkU8(shift_imm))));
   6390                putDRegI64(dreg, mkexpr(res), condT);
   6391                DIP("vshrn.i%u d%u, q%u, #%u\n", 8 << size, dreg, mreg,
   6392                    shift_imm);
   6393                return True;
   6394             } else {
   6395                /* VRSHRN   */
   6396                IROp addOp, shOp, narOp;
   6397                IRExpr *imm_val;
   6398                reg_m = newTemp(Ity_V128);
   6399                assign(reg_m, getQReg(mreg));
   6400                res = newTemp(Ity_I64);
   6401                imm = 1L;
   6402                switch (size) {
   6403                   case 0: imm = (imm <<  8) | imm; /* fall through */
   6404                   case 1: imm = (imm << 16) | imm; /* fall through */
   6405                   case 2: imm = (imm << 32) | imm; /* fall through */
   6406                   case 3: break;
   6407                   default: vassert(0);
   6408                }
   6409                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
   6410                switch (size) {
   6411                   case 1:
   6412                      addOp = Iop_Add16x8;
   6413                      shOp = Iop_ShrN16x8;
   6414                      narOp = Iop_NarrowUn16to8x8;
   6415                      break;
   6416                   case 2:
   6417                      addOp = Iop_Add32x4;
   6418                      shOp = Iop_ShrN32x4;
   6419                      narOp = Iop_NarrowUn32to16x4;
   6420                      break;
   6421                   case 3:
   6422                      addOp = Iop_Add64x2;
   6423                      shOp = Iop_ShrN64x2;
   6424                      narOp = Iop_NarrowUn64to32x2;
   6425                      break;
   6426                   default:
   6427                      vassert(0);
   6428                }
   6429                assign(res, unop(narOp,
   6430                                 binop(addOp,
   6431                                       binop(shOp,
   6432                                             mkexpr(reg_m),
   6433                                             mkU8(shift_imm)),
   6434                                       binop(Iop_AndV128,
   6435                                             binop(shOp,
   6436                                                   mkexpr(reg_m),
   6437                                                   mkU8(shift_imm - 1)),
   6438                                             imm_val))));
   6439                putDRegI64(dreg, mkexpr(res), condT);
   6440                if (shift_imm == 0) {
   6441                   DIP("vmov%u d%u, q%u, #%u\n", 8 << size, dreg, mreg,
   6442                       shift_imm);
   6443                } else {
   6444                   DIP("vrshrn.i%u d%u, q%u, #%u\n", 8 << size, dreg, mreg,
   6445                       shift_imm);
   6446                }
   6447                return True;
   6448             }
   6449          } else {
   6450             /* fall through */
   6451          }
   6452       case 9:
   6453          dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
   6454          mreg = ((theInstr >>  1) & 0x10) | (theInstr & 0xF);
   6455          if (mreg & 1)
   6456             return False;
   6457          mreg >>= 1;
   6458          size++;
   6459          if ((theInstr >> 8) & 1) {
   6460             switch (size) {
   6461                case 1:
   6462                   op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
   6463                   cvt = U ? Iop_QNarrowUn16Uto8Ux8 : Iop_QNarrowUn16Sto8Sx8;
   6464                   cvt2 = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
   6465                   break;
   6466                case 2:
   6467                   op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
   6468                   cvt = U ? Iop_QNarrowUn32Uto16Ux4 : Iop_QNarrowUn32Sto16Sx4;
   6469                   cvt2 = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
   6470                   break;
   6471                case 3:
   6472                   op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
   6473                   cvt = U ? Iop_QNarrowUn64Uto32Ux2 : Iop_QNarrowUn64Sto32Sx2;
   6474                   cvt2 = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
   6475                   break;
   6476                default:
   6477                   vassert(0);
   6478             }
   6479             DIP("vq%sshrn.%c%u d%u, q%u, #%u\n", B ? "r" : "",
   6480                 U ? 'u' : 's', 8 << size, dreg, mreg, shift_imm);
   6481          } else {
   6482             vassert(U);
   6483             switch (size) {
   6484                case 1:
   6485                   op = Iop_SarN16x8;
   6486                   cvt = Iop_QNarrowUn16Sto8Ux8;
   6487                   cvt2 = Iop_Widen8Uto16x8;
   6488                   break;
   6489                case 2:
   6490                   op = Iop_SarN32x4;
   6491                   cvt = Iop_QNarrowUn32Sto16Ux4;
   6492                   cvt2 = Iop_Widen16Uto32x4;
   6493                   break;
   6494                case 3:
   6495                   op = Iop_SarN64x2;
   6496                   cvt = Iop_QNarrowUn64Sto32Ux2;
   6497                   cvt2 = Iop_Widen32Uto64x2;
   6498                   break;
   6499                default:
   6500                   vassert(0);
   6501             }
   6502             DIP("vq%sshrun.s%u d%u, q%u, #%u\n", B ? "r" : "",
   6503                 8 << size, dreg, mreg, shift_imm);
   6504          }
   6505          if (B) {
   6506             if (shift_imm > 0) {
   6507                imm = 1;
   6508                switch (size) {
   6509                   case 1: imm = (imm << 16) | imm; /* fall through */
   6510                   case 2: imm = (imm << 32) | imm; /* fall through */
   6511                   case 3: break;
   6512                   case 0: default: vassert(0);
   6513                }
   6514                switch (size) {
   6515                   case 1: add = Iop_Add16x8; break;
   6516                   case 2: add = Iop_Add32x4; break;
   6517                   case 3: add = Iop_Add64x2; break;
   6518                   case 0: default: vassert(0);
   6519                }
   6520             }
   6521          }
   6522          reg_m = newTemp(Ity_V128);
   6523          res = newTemp(Ity_V128);
   6524          assign(reg_m, getQReg(mreg));
   6525          if (B) {
   6526             /* VQRSHRN, VQRSHRUN */
   6527             assign(res, binop(add,
   6528                               binop(op, mkexpr(reg_m), mkU8(shift_imm)),
   6529                               binop(Iop_AndV128,
   6530                                     binop(op,
   6531                                           mkexpr(reg_m),
   6532                                           mkU8(shift_imm - 1)),
   6533                                     mkU128(imm))));
   6534          } else {
   6535             /* VQSHRN, VQSHRUN */
   6536             assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
   6537          }
   6538          setFlag_QC(unop(cvt2, unop(cvt, mkexpr(res))), mkexpr(res),
   6539                     True, condT);
   6540          putDRegI64(dreg, unop(cvt, mkexpr(res)), condT);
   6541          return True;
   6542       case 10:
   6543          /* VSHLL
   6544             VMOVL ::= VSHLL #0 */
   6545          if (B)
   6546             return False;
   6547          if (dreg & 1)
   6548             return False;
   6549          dreg >>= 1;
   6550          shift_imm = (8 << size) - shift_imm;
   6551          res = newTemp(Ity_V128);
   6552          switch (size) {
   6553             case 0:
   6554                op = Iop_ShlN16x8;
   6555                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
   6556                break;
   6557             case 1:
   6558                op = Iop_ShlN32x4;
   6559                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
   6560                break;
   6561             case 2:
   6562                op = Iop_ShlN64x2;
   6563                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
   6564                break;
   6565             case 3:
   6566                return False;
   6567             default:
   6568                vassert(0);
   6569          }
   6570          assign(res, binop(op, unop(cvt, getDRegI64(mreg)), mkU8(shift_imm)));
   6571          putQReg(dreg, mkexpr(res), condT);
   6572          if (shift_imm == 0) {
   6573             DIP("vmovl.%c%u q%u, d%u\n", U ? 'u' : 's', 8 << size,
   6574                 dreg, mreg);
   6575          } else {
   6576             DIP("vshll.%c%u q%u, d%u, #%u\n", U ? 'u' : 's', 8 << size,
   6577                 dreg, mreg, shift_imm);
   6578          }
   6579          return True;
   6580       case 14:
   6581       case 15:
   6582          /* VCVT floating-point <-> fixed-point */
   6583          if ((theInstr >> 8) & 1) {
   6584             if (U) {
   6585                op = Q ? Iop_F32ToFixed32Ux4_RZ : Iop_F32ToFixed32Ux2_RZ;
   6586             } else {
   6587                op = Q ? Iop_F32ToFixed32Sx4_RZ : Iop_F32ToFixed32Sx2_RZ;
   6588             }
   6589             DIP("vcvt.%c32.f32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
   6590                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
   6591                 64 - ((theInstr >> 16) & 0x3f));
   6592          } else {
   6593             if (U) {
   6594                op = Q ? Iop_Fixed32UToF32x4_RN : Iop_Fixed32UToF32x2_RN;
   6595             } else {
   6596                op = Q ? Iop_Fixed32SToF32x4_RN : Iop_Fixed32SToF32x2_RN;
   6597             }
   6598             DIP("vcvt.f32.%c32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
   6599                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
   6600                 64 - ((theInstr >> 16) & 0x3f));
   6601          }
   6602          if (((theInstr >> 21) & 1) == 0)
   6603             return False;
   6604          if (Q) {
   6605             putQReg(dreg, binop(op, getQReg(mreg),
   6606                      mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
   6607          } else {
   6608             putDRegI64(dreg, binop(op, getDRegI64(mreg),
   6609                        mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
   6610          }
   6611          return True;
   6612       default:
   6613          return False;
   6614 
   6615    }
   6616    return False;
   6617 }
   6618 
   6619 /* A7.4.5 Two registers, miscellaneous */
   6620 static
   6621 Bool dis_neon_data_2reg_misc ( UInt theInstr, IRTemp condT )
   6622 {
   6623    UInt A = (theInstr >> 16) & 3;
   6624    UInt B = (theInstr >> 6) & 0x1f;
   6625    UInt Q = (theInstr >> 6) & 1;
   6626    UInt U = (theInstr >> 24) & 1;
   6627    UInt size = (theInstr >> 18) & 3;
   6628    UInt dreg = get_neon_d_regno(theInstr);
   6629    UInt mreg = get_neon_m_regno(theInstr);
   6630    UInt F = (theInstr >> 10) & 1;
   6631    IRTemp arg_d = IRTemp_INVALID;
   6632    IRTemp arg_m = IRTemp_INVALID;
   6633    IRTemp res = IRTemp_INVALID;
   6634    switch (A) {
   6635       case 0:
   6636          if (Q) {
   6637             arg_m = newTemp(Ity_V128);
   6638             res = newTemp(Ity_V128);
   6639             assign(arg_m, getQReg(mreg));
   6640          } else {
   6641             arg_m = newTemp(Ity_I64);
   6642             res = newTemp(Ity_I64);
   6643             assign(arg_m, getDRegI64(mreg));
   6644          }
   6645          switch (B >> 1) {
   6646             case 0: {
   6647                /* VREV64 */
   6648                IROp op;
   6649                switch (size) {
   6650                   case 0:
   6651                      op = Q ? Iop_Reverse8sIn64_x2 : Iop_Reverse8sIn64_x1;
   6652                      break;
   6653                   case 1:
   6654                      op = Q ? Iop_Reverse16sIn64_x2 : Iop_Reverse16sIn64_x1;
   6655                      break;
   6656                   case 2:
   6657                      op = Q ? Iop_Reverse32sIn64_x2 : Iop_Reverse32sIn64_x1;
   6658                      break;
   6659                   case 3:
   6660                      return False;
   6661                   default:
   6662                      vassert(0);
   6663                }
   6664                assign(res, unop(op, mkexpr(arg_m)));
   6665                DIP("vrev64.%u %c%u, %c%u\n", 8 << size,
   6666                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   6667                break;
   6668             }
   6669             case 1: {
   6670                /* VREV32 */
   6671                IROp op;
   6672                switch (size) {
   6673                   case 0:
   6674                      op = Q ? Iop_Reverse8sIn32_x4 : Iop_Reverse8sIn32_x2;
   6675                      break;
   6676                   case 1:
   6677                      op = Q ? Iop_Reverse16sIn32_x4 : Iop_Reverse16sIn32_x2;
   6678                      break;
   6679                   case 2:
   6680                   case 3:
   6681                      return False;
   6682                   default:
   6683                      vassert(0);
   6684                }
   6685                assign(res, unop(op, mkexpr(arg_m)));
   6686                DIP("vrev32.%u %c%u, %c%u\n", 8 << size,
   6687                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   6688                break;
   6689             }
   6690             case 2: {
   6691                /* VREV16 */
   6692                IROp op;
   6693                switch (size) {
   6694                   case 0:
   6695                      op = Q ? Iop_Reverse8sIn16_x8 : Iop_Reverse8sIn16_x4;
   6696                      break;
   6697                   case 1:
   6698                   case 2:
   6699                   case 3:
   6700                      return False;
   6701                   default:
   6702                      vassert(0);
   6703                }
   6704                assign(res, unop(op, mkexpr(arg_m)));
   6705                DIP("vrev16.%u %c%u, %c%u\n", 8 << size,
   6706                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   6707                break;
   6708             }
   6709             case 3:
   6710                return False;
   6711             case 4:
   6712             case 5: {
   6713                /* VPADDL */
   6714                IROp op;
   6715                U = (theInstr >> 7) & 1;
   6716                if (Q) {
   6717                   switch (size) {
   6718                      case 0: op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16; break;
   6719                      case 1: op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8; break;
   6720                      case 2: op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4; break;
   6721                      case 3: return False;
   6722                      default: vassert(0);
   6723                   }
   6724                } else {
   6725                   switch (size) {
   6726                      case 0: op = U ? Iop_PwAddL8Ux8  : Iop_PwAddL8Sx8;  break;
   6727                      case 1: op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4; break;
   6728                      case 2: op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2; break;
   6729                      case 3: return False;
   6730                      default: vassert(0);
   6731                   }
   6732                }
   6733                assign(res, unop(op, mkexpr(arg_m)));
   6734                DIP("vpaddl.%c%u %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
   6735                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   6736                break;
   6737             }
   6738             case 6:
   6739             case 7:
   6740                return False;
   6741             case 8: {
   6742                /* VCLS */
   6743                IROp op;
   6744                switch (size) {
   6745                   case 0: op = Q ? Iop_Cls8x16 : Iop_Cls8x8; break;
   6746                   case 1: op = Q ? Iop_Cls16x8 : Iop_Cls16x4; break;
   6747                   case 2: op = Q ? Iop_Cls32x4 : Iop_Cls32x2; break;
   6748                   case 3: return False;
   6749                   default: vassert(0);
   6750                }
   6751                assign(res, unop(op, mkexpr(arg_m)));
   6752                DIP("vcls.s%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
   6753                    Q ? 'q' : 'd', mreg);
   6754                break;
   6755             }
   6756             case 9: {
   6757                /* VCLZ */
   6758                IROp op;
   6759                switch (size) {
   6760                   case 0: op = Q ? Iop_Clz8x16 : Iop_Clz8x8; break;
   6761                   case 1: op = Q ? Iop_Clz16x8 : Iop_Clz16x4; break;
   6762                   case 2: op = Q ? Iop_Clz32x4 : Iop_Clz32x2; break;
   6763                   case 3: return False;
   6764                   default: vassert(0);
   6765                }
   6766                assign(res, unop(op, mkexpr(arg_m)));
   6767                DIP("vclz.i%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
   6768                    Q ? 'q' : 'd', mreg);
   6769                break;
   6770             }
   6771             case 10:
   6772                /* VCNT */
   6773                assign(res, unop(Q ? Iop_Cnt8x16 : Iop_Cnt8x8, mkexpr(arg_m)));
   6774                DIP("vcnt.8 %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
   6775                    mreg);
   6776                break;
   6777             case 11:
   6778                /* VMVN */
   6779                if (Q)
   6780                   assign(res, unop(Iop_NotV128, mkexpr(arg_m)));
   6781                else
   6782                   assign(res, unop(Iop_Not64, mkexpr(arg_m)));
   6783                DIP("vmvn %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
   6784                    mreg);
   6785                break;
   6786             case 12:
   6787             case 13: {
   6788                /* VPADAL */
   6789                IROp op, add_op;
   6790                U = (theInstr >> 7) & 1;
   6791                if (Q) {
   6792                   switch (size) {
   6793                      case 0:
   6794                         op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16;
   6795                         add_op = Iop_Add16x8;
   6796                         break;
   6797                      case 1:
   6798                         op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8;
   6799                         add_op = Iop_Add32x4;
   6800                         break;
   6801                      case 2:
   6802                         op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4;
   6803                         add_op = Iop_Add64x2;
   6804                         break;
   6805                      case 3:
   6806                         return False;
   6807                      default:
   6808                         vassert(0);
   6809                   }
   6810                } else {
   6811                   switch (size) {
   6812                      case 0:
   6813                         op = U ? Iop_PwAddL8Ux8 : Iop_PwAddL8Sx8;
   6814                         add_op = Iop_Add16x4;
   6815                         break;
   6816                      case 1:
   6817                         op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4;
   6818                         add_op = Iop_Add32x2;
   6819                         break;
   6820                      case 2:
   6821                         op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2;
   6822                         add_op = Iop_Add64;
   6823                         break;
   6824                      case 3:
   6825                         return False;
   6826                      default:
   6827                         vassert(0);
   6828                   }
   6829                }
   6830                if (Q) {
   6831                   arg_d = newTemp(Ity_V128);
   6832                   assign(arg_d, getQReg(dreg));
   6833                } else {
   6834                   arg_d = newTemp(Ity_I64);
   6835                   assign(arg_d, getDRegI64(dreg));
   6836                }
   6837                assign(res, binop(add_op, unop(op, mkexpr(arg_m)),
   6838                                          mkexpr(arg_d)));
   6839                DIP("vpadal.%c%u %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
   6840                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   6841                break;
   6842             }
   6843             case 14: {
   6844                /* VQABS */
   6845                IROp op_sub, op_qsub, op_cmp;
   6846                IRTemp mask, tmp;
   6847                IRExpr *zero1, *zero2;
   6848                IRExpr *neg, *neg2;
   6849                if (Q) {
   6850                   zero1 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   6851                   zero2 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   6852                   mask = newTemp(Ity_V128);
   6853                   tmp = newTemp(Ity_V128);
   6854                } else {
   6855                   zero1 = mkU64(0);
   6856                   zero2 = mkU64(0);
   6857                   mask = newTemp(Ity_I64);
   6858                   tmp = newTemp(Ity_I64);
   6859                }
   6860                switch (size) {
   6861                   case 0:
   6862                      op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   6863                      op_qsub = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
   6864                      op_cmp = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
   6865                      break;
   6866                   case 1:
   6867                      op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   6868                      op_qsub = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
   6869                      op_cmp = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4;
   6870                      break;
   6871                   case 2:
   6872                      op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   6873                      op_qsub = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
   6874                      op_cmp = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2;
   6875                      break;
   6876                   case 3:
   6877                      return False;
   6878                   default:
   6879                      vassert(0);
   6880                }
   6881                assign(mask, binop(op_cmp, mkexpr(arg_m), zero1));
   6882                neg = binop(op_qsub, zero2, mkexpr(arg_m));
   6883                neg2 = binop(op_sub, zero2, mkexpr(arg_m));
   6884                assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
   6885                                  binop(Q ? Iop_AndV128 : Iop_And64,
   6886                                        mkexpr(mask),
   6887                                        mkexpr(arg_m)),
   6888                                  binop(Q ? Iop_AndV128 : Iop_And64,
   6889                                        unop(Q ? Iop_NotV128 : Iop_Not64,
   6890                                             mkexpr(mask)),
   6891                                        neg)));
   6892                assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
   6893                                  binop(Q ? Iop_AndV128 : Iop_And64,
   6894                                        mkexpr(mask),
   6895                                        mkexpr(arg_m)),
   6896                                  binop(Q ? Iop_AndV128 : Iop_And64,
   6897                                        unop(Q ? Iop_NotV128 : Iop_Not64,
   6898                                             mkexpr(mask)),
   6899                                        neg2)));
   6900                setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
   6901                DIP("vqabs.s%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
   6902                    Q ? 'q' : 'd', mreg);
   6903                break;
   6904             }
   6905             case 15: {
   6906                /* VQNEG */
   6907                IROp op, op2;
   6908                IRExpr *zero;
   6909                if (Q) {
   6910                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   6911                } else {
   6912                   zero = mkU64(0);
   6913                }
   6914                switch (size) {
   6915                   case 0:
   6916                      op = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
   6917                      op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   6918                      break;
   6919                   case 1:
   6920                      op = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
   6921                      op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   6922                      break;
   6923                   case 2:
   6924                      op = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
   6925                      op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   6926                      break;
   6927                   case 3:
   6928                      return False;
   6929                   default:
   6930                      vassert(0);
   6931                }
   6932                assign(res, binop(op, zero, mkexpr(arg_m)));
   6933                setFlag_QC(mkexpr(res), binop(op2, zero, mkexpr(arg_m)),
   6934                           Q, condT);
   6935                DIP("vqneg.s%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
   6936                    Q ? 'q' : 'd', mreg);
   6937                break;
   6938             }
   6939             default:
   6940                vassert(0);
   6941          }
   6942          if (Q) {
   6943             putQReg(dreg, mkexpr(res), condT);
   6944          } else {
   6945             putDRegI64(dreg, mkexpr(res), condT);
   6946          }
   6947          return True;
   6948       case 1:
   6949          if (Q) {
   6950             arg_m = newTemp(Ity_V128);
   6951             res = newTemp(Ity_V128);
   6952             assign(arg_m, getQReg(mreg));
   6953          } else {
   6954             arg_m = newTemp(Ity_I64);
   6955             res = newTemp(Ity_I64);
   6956             assign(arg_m, getDRegI64(mreg));
   6957          }
   6958          switch ((B >> 1) & 0x7) {
   6959             case 0: {
   6960                /* VCGT #0 */
   6961                IRExpr *zero;
   6962                IROp op;
   6963                if (Q) {
   6964                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   6965                } else {
   6966                   zero = mkU64(0);
   6967                }
   6968                if (F) {
   6969                   switch (size) {
   6970                      case 0: case 1: case 3: return False;
   6971                      case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
   6972                      default: vassert(0);
   6973                   }
   6974                } else {
   6975                   switch (size) {
   6976                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
   6977                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
   6978                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
   6979                      case 3: return False;
   6980                      default: vassert(0);
   6981                   }
   6982                }
   6983                assign(res, binop(op, mkexpr(arg_m), zero));
   6984                DIP("vcgt.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
   6985                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   6986                break;
   6987             }
   6988             case 1: {
   6989                /* VCGE #0 */
   6990                IROp op;
   6991                IRExpr *zero;
   6992                if (Q) {
   6993                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   6994                } else {
   6995                   zero = mkU64(0);
   6996                }
   6997                if (F) {
   6998                   switch (size) {
   6999                      case 0: case 1: case 3: return False;
   7000                      case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
   7001                      default: vassert(0);
   7002                   }
   7003                   assign(res, binop(op, mkexpr(arg_m), zero));
   7004                } else {
   7005                   switch (size) {
   7006                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
   7007                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
   7008                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
   7009                      case 3: return False;
   7010                      default: vassert(0);
   7011                   }
   7012                   assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
   7013                                    binop(op, zero, mkexpr(arg_m))));
   7014                }
   7015                DIP("vcge.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
   7016                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7017                break;
   7018             }
   7019             case 2: {
   7020                /* VCEQ #0 */
   7021                IROp op;
   7022                IRExpr *zero;
   7023                if (F) {
   7024                   if (Q) {
   7025                      zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   7026                   } else {
   7027                      zero = mkU64(0);
   7028                   }
   7029                   switch (size) {
   7030                      case 0: case 1: case 3: return False;
   7031                      case 2: op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2; break;
   7032                      default: vassert(0);
   7033                   }
   7034                   assign(res, binop(op, zero, mkexpr(arg_m)));
   7035                } else {
   7036                   switch (size) {
   7037                      case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
   7038                      case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
   7039                      case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
   7040                      case 3: return False;
   7041                      default: vassert(0);
   7042                   }
   7043                   assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
   7044                                    unop(op, mkexpr(arg_m))));
   7045                }
   7046                DIP("vceq.%c%u %c%u, %c%u, #0\n", F ? 'f' : 'i', 8 << size,
   7047                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7048                break;
   7049             }
   7050             case 3: {
   7051                /* VCLE #0 */
   7052                IRExpr *zero;
   7053                IROp op;
   7054                if (Q) {
   7055                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   7056                } else {
   7057                   zero = mkU64(0);
   7058                }
   7059                if (F) {
   7060                   switch (size) {
   7061                      case 0: case 1: case 3: return False;
   7062                      case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
   7063                      default: vassert(0);
   7064                   }
   7065                   assign(res, binop(op, zero, mkexpr(arg_m)));
   7066                } else {
   7067                   switch (size) {
   7068                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
   7069                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
   7070                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
   7071                      case 3: return False;
   7072                      default: vassert(0);
   7073                   }
   7074                   assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
   7075                                    binop(op, mkexpr(arg_m), zero)));
   7076                }
   7077                DIP("vcle.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
   7078                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7079                break;
   7080             }
   7081             case 4: {
   7082                /* VCLT #0 */
   7083                IROp op;
   7084                IRExpr *zero;
   7085                if (Q) {
   7086                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   7087                } else {
   7088                   zero = mkU64(0);
   7089                }
   7090                if (F) {
   7091                   switch (size) {
   7092                      case 0: case 1: case 3: return False;
   7093                      case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
   7094                      default: vassert(0);
   7095                   }
   7096                   assign(res, binop(op, zero, mkexpr(arg_m)));
   7097                } else {
   7098                   switch (size) {
   7099                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
   7100                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
   7101                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
   7102                      case 3: return False;
   7103                      default: vassert(0);
   7104                   }
   7105                   assign(res, binop(op, zero, mkexpr(arg_m)));
   7106                }
   7107                DIP("vclt.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
   7108                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7109                break;
   7110             }
   7111             case 5:
   7112                return False;
   7113             case 6: {
   7114                /* VABS */
   7115                if (!F) {
   7116                   IROp op;
   7117                   switch(size) {
   7118                      case 0: op = Q ? Iop_Abs8x16 : Iop_Abs8x8; break;
   7119                      case 1: op = Q ? Iop_Abs16x8 : Iop_Abs16x4; break;
   7120                      case 2: op = Q ? Iop_Abs32x4 : Iop_Abs32x2; break;
   7121                      case 3: return False;
   7122                      default: vassert(0);
   7123                   }
   7124                   assign(res, unop(op, mkexpr(arg_m)));
   7125                } else {
   7126                   assign(res, unop(Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2,
   7127                                    mkexpr(arg_m)));
   7128                }
   7129                DIP("vabs.%c%u %c%u, %c%u\n",
   7130                    F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
   7131                    Q ? 'q' : 'd', mreg);
   7132                break;
   7133             }
   7134             case 7: {
   7135                /* VNEG */
   7136                IROp op;
   7137                IRExpr *zero;
   7138                if (F) {
   7139                   switch (size) {
   7140                      case 0: case 1: case 3: return False;
   7141                      case 2: op = Q ? Iop_Neg32Fx4 : Iop_Neg32Fx2; break;
   7142                      default: vassert(0);
   7143                   }
   7144                   assign(res, unop(op, mkexpr(arg_m)));
   7145                } else {
   7146                   if (Q) {
   7147                      zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   7148                   } else {
   7149                      zero = mkU64(0);
   7150                   }
   7151                   switch (size) {
   7152                      case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
   7153                      case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
   7154                      case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
   7155                      case 3: return False;
   7156                      default: vassert(0);
   7157                   }
   7158                   assign(res, binop(op, zero, mkexpr(arg_m)));
   7159                }
   7160                DIP("vneg.%c%u %c%u, %c%u\n",
   7161                    F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
   7162                    Q ? 'q' : 'd', mreg);
   7163                break;
   7164             }
   7165             default:
   7166                vassert(0);
   7167          }
   7168          if (Q) {
   7169             putQReg(dreg, mkexpr(res), condT);
   7170          } else {
   7171             putDRegI64(dreg, mkexpr(res), condT);
   7172          }
   7173          return True;
   7174       case 2:
   7175          if ((B >> 1) == 0) {
   7176             /* VSWP */
   7177             if (Q) {
   7178                arg_m = newTemp(Ity_V128);
   7179                assign(arg_m, getQReg(mreg));
   7180                putQReg(mreg, getQReg(dreg), condT);
   7181                putQReg(dreg, mkexpr(arg_m), condT);
   7182             } else {
   7183                arg_m = newTemp(Ity_I64);
   7184                assign(arg_m, getDRegI64(mreg));
   7185                putDRegI64(mreg, getDRegI64(dreg), condT);
   7186                putDRegI64(dreg, mkexpr(arg_m), condT);
   7187             }
   7188             DIP("vswp %c%u, %c%u\n",
   7189                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7190             return True;
   7191          } else if ((B >> 1) == 1) {
   7192             /* VTRN */
   7193             IROp op_odd = Iop_INVALID, op_even = Iop_INVALID;
   7194             IRTemp old_m, old_d, new_d, new_m;
   7195             if (Q) {
   7196                old_m = newTemp(Ity_V128);
   7197                old_d = newTemp(Ity_V128);
   7198                new_m = newTemp(Ity_V128);
   7199                new_d = newTemp(Ity_V128);
   7200                assign(old_m, getQReg(mreg));
   7201                assign(old_d, getQReg(dreg));
   7202             } else {
   7203                old_m = newTemp(Ity_I64);
   7204                old_d = newTemp(Ity_I64);
   7205                new_m = newTemp(Ity_I64);
   7206                new_d = newTemp(Ity_I64);
   7207                assign(old_m, getDRegI64(mreg));
   7208                assign(old_d, getDRegI64(dreg));
   7209             }
   7210             if (Q) {
   7211                switch (size) {
   7212                   case 0:
   7213                      op_odd  = Iop_InterleaveOddLanes8x16;
   7214                      op_even = Iop_InterleaveEvenLanes8x16;
   7215                      break;
   7216                   case 1:
   7217                      op_odd  = Iop_InterleaveOddLanes16x8;
   7218                      op_even = Iop_InterleaveEvenLanes16x8;
   7219                      break;
   7220                   case 2:
   7221                      op_odd  = Iop_InterleaveOddLanes32x4;
   7222                      op_even = Iop_InterleaveEvenLanes32x4;
   7223                      break;
   7224                   case 3:
   7225                      return False;
   7226                   default:
   7227                      vassert(0);
   7228                }
   7229             } else {
   7230                switch (size) {
   7231                   case 0:
   7232                      op_odd  = Iop_InterleaveOddLanes8x8;
   7233                      op_even = Iop_InterleaveEvenLanes8x8;
   7234                      break;
   7235                   case 1:
   7236                      op_odd  = Iop_InterleaveOddLanes16x4;
   7237                      op_even = Iop_InterleaveEvenLanes16x4;
   7238                      break;
   7239                   case 2:
   7240                      op_odd  = Iop_InterleaveHI32x2;
   7241                      op_even = Iop_InterleaveLO32x2;
   7242                      break;
   7243                   case 3:
   7244                      return False;
   7245                   default:
   7246                      vassert(0);
   7247                }
   7248             }
   7249             assign(new_d, binop(op_even, mkexpr(old_m), mkexpr(old_d)));
   7250             assign(new_m, binop(op_odd, mkexpr(old_m), mkexpr(old_d)));
   7251             if (Q) {
   7252                putQReg(dreg, mkexpr(new_d), condT);
   7253                putQReg(mreg, mkexpr(new_m), condT);
   7254             } else {
   7255                putDRegI64(dreg, mkexpr(new_d), condT);
   7256                putDRegI64(mreg, mkexpr(new_m), condT);
   7257             }
   7258             DIP("vtrn.%u %c%u, %c%u\n",
   7259                 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7260             return True;
   7261          } else if ((B >> 1) == 2) {
   7262             /* VUZP */
   7263             IROp op_even, op_odd;
   7264             IRTemp old_m, old_d, new_m, new_d;
   7265             if (!Q && size == 2)
   7266                return False;
   7267             if (Q) {
   7268                old_m = newTemp(Ity_V128);
   7269                old_d = newTemp(Ity_V128);
   7270                new_m = newTemp(Ity_V128);
   7271                new_d = newTemp(Ity_V128);
   7272                assign(old_m, getQReg(mreg));
   7273                assign(old_d, getQReg(dreg));
   7274             } else {
   7275                old_m = newTemp(Ity_I64);
   7276                old_d = newTemp(Ity_I64);
   7277                new_m = newTemp(Ity_I64);
   7278                new_d = newTemp(Ity_I64);
   7279                assign(old_m, getDRegI64(mreg));
   7280                assign(old_d, getDRegI64(dreg));
   7281             }
   7282             switch (size) {
   7283                case 0:
   7284                   op_odd  = Q ? Iop_CatOddLanes8x16 : Iop_CatOddLanes8x8;
   7285                   op_even = Q ? Iop_CatEvenLanes8x16 : Iop_CatEvenLanes8x8;
   7286                   break;
   7287                case 1:
   7288                   op_odd  = Q ? Iop_CatOddLanes16x8 : Iop_CatOddLanes16x4;
   7289                   op_even = Q ? Iop_CatEvenLanes16x8 : Iop_CatEvenLanes16x4;
   7290                   break;
   7291                case 2:
   7292                   op_odd  = Iop_CatOddLanes32x4;
   7293                   op_even = Iop_CatEvenLanes32x4;
   7294                   break;
   7295                case 3:
   7296                   return False;
   7297                default:
   7298                   vassert(0);
   7299             }
   7300             assign(new_d, binop(op_even, mkexpr(old_m), mkexpr(old_d)));
   7301             assign(new_m, binop(op_odd,  mkexpr(old_m), mkexpr(old_d)));
   7302             if (Q) {
   7303                putQReg(dreg, mkexpr(new_d), condT);
   7304                putQReg(mreg, mkexpr(new_m), condT);
   7305             } else {
   7306                putDRegI64(dreg, mkexpr(new_d), condT);
   7307                putDRegI64(mreg, mkexpr(new_m), condT);
   7308             }
   7309             DIP("vuzp.%u %c%u, %c%u\n",
   7310                 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7311             return True;
   7312          } else if ((B >> 1) == 3) {
   7313             /* VZIP */
   7314             IROp op_lo, op_hi;
   7315             IRTemp old_m, old_d, new_m, new_d;
   7316             if (!Q && size == 2)
   7317                return False;
   7318             if (Q) {
   7319                old_m = newTemp(Ity_V128);
   7320                old_d = newTemp(Ity_V128);
   7321                new_m = newTemp(Ity_V128);
   7322                new_d = newTemp(Ity_V128);
   7323                assign(old_m, getQReg(mreg));
   7324                assign(old_d, getQReg(dreg));
   7325             } else {
   7326                old_m = newTemp(Ity_I64);
   7327                old_d = newTemp(Ity_I64);
   7328                new_m = newTemp(Ity_I64);
   7329                new_d = newTemp(Ity_I64);
   7330                assign(old_m, getDRegI64(mreg));
   7331                assign(old_d, getDRegI64(dreg));
   7332             }
   7333             switch (size) {
   7334                case 0:
   7335                   op_hi = Q ? Iop_InterleaveHI8x16 : Iop_InterleaveHI8x8;
   7336                   op_lo = Q ? Iop_InterleaveLO8x16 : Iop_InterleaveLO8x8;
   7337                   break;
   7338                case 1:
   7339                   op_hi = Q ? Iop_InterleaveHI16x8 : Iop_InterleaveHI16x4;
   7340                   op_lo = Q ? Iop_InterleaveLO16x8 : Iop_InterleaveLO16x4;
   7341                   break;
   7342                case 2:
   7343                   op_hi = Iop_InterleaveHI32x4;
   7344                   op_lo = Iop_InterleaveLO32x4;
   7345                   break;
   7346                case 3:
   7347                   return False;
   7348                default:
   7349                   vassert(0);
   7350             }
   7351             assign(new_d, binop(op_lo, mkexpr(old_m), mkexpr(old_d)));
   7352             assign(new_m, binop(op_hi, mkexpr(old_m), mkexpr(old_d)));
   7353             if (Q) {
   7354                putQReg(dreg, mkexpr(new_d), condT);
   7355                putQReg(mreg, mkexpr(new_m), condT);
   7356             } else {
   7357                putDRegI64(dreg, mkexpr(new_d), condT);
   7358                putDRegI64(mreg, mkexpr(new_m), condT);
   7359             }
   7360             DIP("vzip.%u %c%u, %c%u\n",
   7361                 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7362             return True;
   7363          } else if (B == 8) {
   7364             /* VMOVN */
   7365             IROp op;
   7366             mreg >>= 1;
   7367             switch (size) {
   7368                case 0: op = Iop_NarrowUn16to8x8;  break;
   7369                case 1: op = Iop_NarrowUn32to16x4; break;
   7370                case 2: op = Iop_NarrowUn64to32x2; break;
   7371                case 3: return False;
   7372                default: vassert(0);
   7373             }
   7374             putDRegI64(dreg, unop(op, getQReg(mreg)), condT);
   7375             DIP("vmovn.i%u d%u, q%u\n", 16 << size, dreg, mreg);
   7376             return True;
   7377          } else if (B == 9 || (B >> 1) == 5) {
   7378             /* VQMOVN, VQMOVUN */
   7379             IROp op, op2;
   7380             IRTemp tmp;
   7381             dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
   7382             mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
   7383             if (mreg & 1)
   7384                return False;
   7385             mreg >>= 1;
   7386             switch (size) {
   7387                case 0: op2 = Iop_NarrowUn16to8x8;  break;
   7388                case 1: op2 = Iop_NarrowUn32to16x4; break;
   7389                case 2: op2 = Iop_NarrowUn64to32x2; break;
   7390                case 3: return False;
   7391                default: vassert(0);
   7392             }
   7393             switch (B & 3) {
   7394                case 0:
   7395                   vassert(0);
   7396                case 1:
   7397                   switch (size) {
   7398                      case 0: op = Iop_QNarrowUn16Sto8Ux8;  break;
   7399                      case 1: op = Iop_QNarrowUn32Sto16Ux4; break;
   7400                      case 2: op = Iop_QNarrowUn64Sto32Ux2; break;
   7401                      case 3: return False;
   7402                      default: vassert(0);
   7403                   }
   7404                   DIP("vqmovun.s%u d%u, q%u\n", 16 << size, dreg, mreg);
   7405                   break;
   7406                case 2:
   7407                   switch (size) {
   7408                      case 0: op = Iop_QNarrowUn16Sto8Sx8;  break;
   7409                      case 1: op = Iop_QNarrowUn32Sto16Sx4; break;
   7410                      case 2: op = Iop_QNarrowUn64Sto32Sx2; break;
   7411                      case 3: return False;
   7412                      default: vassert(0);
   7413                   }
   7414                   DIP("vqmovn.s%u d%u, q%u\n", 16 << size, dreg, mreg);
   7415                   break;
   7416                case 3:
   7417                   switch (size) {
   7418                      case 0: op = Iop_QNarrowUn16Uto8Ux8;  break;
   7419                      case 1: op = Iop_QNarrowUn32Uto16Ux4; break;
   7420                      case 2: op = Iop_QNarrowUn64Uto32Ux2; break;
   7421                      case 3: return False;
   7422                      default: vassert(0);
   7423                   }
   7424                   DIP("vqmovn.u%u d%u, q%u\n", 16 << size, dreg, mreg);
   7425                   break;
   7426                default:
   7427                   vassert(0);
   7428             }
   7429             res = newTemp(Ity_I64);
   7430             tmp = newTemp(Ity_I64);
   7431             assign(res, unop(op, getQReg(mreg)));
   7432             assign(tmp, unop(op2, getQReg(mreg)));
   7433             setFlag_QC(mkexpr(res), mkexpr(tmp), False, condT);
   7434             putDRegI64(dreg, mkexpr(res), condT);
   7435             return True;
   7436          } else if (B == 12) {
   7437             /* VSHLL (maximum shift) */
   7438             IROp op, cvt;
   7439             UInt shift_imm;
   7440             if (Q)
   7441                return False;
   7442             if (dreg & 1)
   7443                return False;
   7444             dreg >>= 1;
   7445             shift_imm = 8 << size;
   7446             res = newTemp(Ity_V128);
   7447             switch (size) {
   7448                case 0: op = Iop_ShlN16x8; cvt = Iop_Widen8Uto16x8;  break;
   7449                case 1: op = Iop_ShlN32x4; cvt = Iop_Widen16Uto32x4; break;
   7450                case 2: op = Iop_ShlN64x2; cvt = Iop_Widen32Uto64x2; break;
   7451                case 3: return False;
   7452                default: vassert(0);
   7453             }
   7454             assign(res, binop(op, unop(cvt, getDRegI64(mreg)),
   7455                                   mkU8(shift_imm)));
   7456             putQReg(dreg, mkexpr(res), condT);
   7457             DIP("vshll.i%u q%u, d%u, #%u\n", 8 << size, dreg, mreg, 8 << size);
   7458             return True;
   7459          } else if ((B >> 3) == 3 && (B & 3) == 0) {
   7460             /* VCVT (half<->single) */
   7461             /* Half-precision extensions are needed to run this */
   7462             vassert(0); // ATC
   7463             if (((theInstr >> 18) & 3) != 1)
   7464                return False;
   7465             if ((theInstr >> 8) & 1) {
   7466                if (dreg & 1)
   7467                   return False;
   7468                dreg >>= 1;
   7469                putQReg(dreg, unop(Iop_F16toF32x4, getDRegI64(mreg)),
   7470                      condT);
   7471                DIP("vcvt.f32.f16 q%u, d%u\n", dreg, mreg);
   7472             } else {
   7473                if (mreg & 1)
   7474                   return False;
   7475                mreg >>= 1;
   7476                putDRegI64(dreg, unop(Iop_F32toF16x4, getQReg(mreg)),
   7477                                 condT);
   7478                DIP("vcvt.f16.f32 d%u, q%u\n", dreg, mreg);
   7479             }
   7480             return True;
   7481          } else {
   7482             return False;
   7483          }
   7484          vassert(0);
   7485          return True;
   7486       case 3:
   7487          if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,0)) {
   7488             /* VRECPE */
   7489             IROp op;
   7490             F = (theInstr >> 8) & 1;
   7491             if (size != 2)
   7492                return False;
   7493             if (Q) {
   7494                op = F ? Iop_RecipEst32Fx4 : Iop_RecipEst32Ux4;
   7495                putQReg(dreg, unop(op, getQReg(mreg)), condT);
   7496                DIP("vrecpe.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
   7497             } else {
   7498                op = F ? Iop_RecipEst32Fx2 : Iop_RecipEst32Ux2;
   7499                putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
   7500                DIP("vrecpe.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
   7501             }
   7502             return True;
   7503          } else if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,1)) {
   7504             /* VRSQRTE */
   7505             IROp op;
   7506             F = (B >> 2) & 1;
   7507             if (size != 2)
   7508                return False;
   7509             if (F) {
   7510                /* fp */
   7511                op = Q ? Iop_RSqrtEst32Fx4 : Iop_RSqrtEst32Fx2;
   7512             } else {
   7513                /* unsigned int */
   7514                op = Q ? Iop_RSqrtEst32Ux4 : Iop_RSqrtEst32Ux2;
   7515             }
   7516             if (Q) {
   7517                putQReg(dreg, unop(op, getQReg(mreg)), condT);
   7518                DIP("vrsqrte.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
   7519             } else {
   7520                putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
   7521                DIP("vrsqrte.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
   7522             }
   7523             return True;
   7524          } else if ((B >> 3) == 3) {
   7525             /* VCVT (fp<->integer) */
   7526             IROp op;
   7527             if (size != 2)
   7528                return False;
   7529             switch ((B >> 1) & 3) {
   7530                case 0:
   7531                   op = Q ? Iop_I32StoFx4 : Iop_I32StoFx2;
   7532                   DIP("vcvt.f32.s32 %c%u, %c%u\n",
   7533                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7534                   break;
   7535                case 1:
   7536                   op = Q ? Iop_I32UtoFx4 : Iop_I32UtoFx2;
   7537                   DIP("vcvt.f32.u32 %c%u, %c%u\n",
   7538                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7539                   break;
   7540                case 2:
   7541                   op = Q ? Iop_FtoI32Sx4_RZ : Iop_FtoI32Sx2_RZ;
   7542                   DIP("vcvt.s32.f32 %c%u, %c%u\n",
   7543                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7544                   break;
   7545                case 3:
   7546                   op = Q ? Iop_FtoI32Ux4_RZ : Iop_FtoI32Ux2_RZ;
   7547                   DIP("vcvt.u32.f32 %c%u, %c%u\n",
   7548                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7549                   break;
   7550                default:
   7551                   vassert(0);
   7552             }
   7553             if (Q) {
   7554                putQReg(dreg, unop(op, getQReg(mreg)), condT);
   7555             } else {
   7556                putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
   7557             }
   7558             return True;
   7559          } else {
   7560             return False;
   7561          }
   7562          vassert(0);
   7563          return True;
   7564       default:
   7565          vassert(0);
   7566    }
   7567    return False;
   7568 }
   7569 
   7570 /* A7.4.6 One register and a modified immediate value */
   7571 static
   7572 void ppNeonImm(UInt imm, UInt cmode, UInt op)
   7573 {
   7574    int i;
   7575    switch (cmode) {
   7576       case 0: case 1: case 8: case 9:
   7577          vex_printf("0x%x", imm);
   7578          break;
   7579       case 2: case 3: case 10: case 11:
   7580          vex_printf("0x%x00", imm);
   7581          break;
   7582       case 4: case 5:
   7583          vex_printf("0x%x0000", imm);
   7584          break;
   7585       case 6: case 7:
   7586          vex_printf("0x%x000000", imm);
   7587          break;
   7588       case 12:
   7589          vex_printf("0x%xff", imm);
   7590          break;
   7591       case 13:
   7592          vex_printf("0x%xffff", imm);
   7593          break;
   7594       case 14:
   7595          if (op) {
   7596             vex_printf("0x");
   7597             for (i = 7; i >= 0; i--)
   7598                vex_printf("%s", (imm & (1 << i)) ? "ff" : "00");
   7599          } else {
   7600             vex_printf("0x%x", imm);
   7601          }
   7602          break;
   7603       case 15:
   7604          vex_printf("0x%x", imm);
   7605          break;
   7606    }
   7607 }
   7608 
   7609 static
   7610 const char *ppNeonImmType(UInt cmode, UInt op)
   7611 {
   7612    switch (cmode) {
   7613       case 0 ... 7:
   7614       case 12: case 13:
   7615          return "i32";
   7616       case 8 ... 11:
   7617          return "i16";
   7618       case 14:
   7619          if (op)
   7620             return "i64";
   7621          else
   7622             return "i8";
   7623       case 15:
   7624          if (op)
   7625             vassert(0);
   7626          else
   7627             return "f32";
   7628       default:
   7629          vassert(0);
   7630    }
   7631 }
   7632 
   7633 static
   7634 void DIPimm(UInt imm, UInt cmode, UInt op,
   7635             const char *instr, UInt Q, UInt dreg)
   7636 {
   7637    if (vex_traceflags & VEX_TRACE_FE) {
   7638       vex_printf("%s.%s %c%u, #", instr,
   7639                  ppNeonImmType(cmode, op), Q ? 'q' : 'd', dreg);
   7640       ppNeonImm(imm, cmode, op);
   7641       vex_printf("\n");
   7642    }
   7643 }
   7644 
   7645 static
   7646 Bool dis_neon_data_1reg_and_imm ( UInt theInstr, IRTemp condT )
   7647 {
   7648    UInt dreg = get_neon_d_regno(theInstr);
   7649    ULong imm_raw = ((theInstr >> 17) & 0x80) | ((theInstr >> 12) & 0x70) |
   7650                   (theInstr & 0xf);
   7651    ULong imm_raw_pp = imm_raw;
   7652    UInt cmode = (theInstr >> 8) & 0xf;
   7653    UInt op_bit = (theInstr >> 5) & 1;
   7654    ULong imm = 0;
   7655    UInt Q = (theInstr >> 6) & 1;
   7656    int i, j;
   7657    UInt tmp;
   7658    IRExpr *imm_val;
   7659    IRExpr *expr;
   7660    IRTemp tmp_var;
   7661    switch(cmode) {
   7662       case 7: case 6:
   7663          imm_raw = imm_raw << 8;
   7664          /* fallthrough */
   7665       case 5: case 4:
   7666          imm_raw = imm_raw << 8;
   7667          /* fallthrough */
   7668       case 3: case 2:
   7669          imm_raw = imm_raw << 8;
   7670          /* fallthrough */
   7671       case 0: case 1:
   7672          imm = (imm_raw << 32) | imm_raw;
   7673          break;
   7674       case 11: case 10:
   7675          imm_raw = imm_raw << 8;
   7676          /* fallthrough */
   7677       case 9: case 8:
   7678          imm_raw = (imm_raw << 16) | imm_raw;
   7679          imm = (imm_raw << 32) | imm_raw;
   7680          break;
   7681       case 13:
   7682          imm_raw = (imm_raw << 8) | 0xff;
   7683          /* fallthrough */
   7684       case 12:
   7685          imm_raw = (imm_raw << 8) | 0xff;
   7686          imm = (imm_raw << 32) | imm_raw;
   7687          break;
   7688       case 14:
   7689          if (! op_bit) {
   7690             for(i = 0; i < 8; i++) {
   7691                imm = (imm << 8) | imm_raw;
   7692             }
   7693          } else {
   7694             for(i = 7; i >= 0; i--) {
   7695                tmp = 0;
   7696                for(j = 0; j < 8; j++) {
   7697                   tmp = (tmp << 1) | ((imm_raw >> i) & 1);
   7698                }
   7699                imm = (imm << 8) | tmp;
   7700             }
   7701          }
   7702          break;
   7703       case 15:
   7704          imm = (imm_raw & 0x80) << 5;
   7705          imm |= ((~imm_raw & 0x40) << 5);
   7706          for(i = 1; i <= 4; i++)
   7707             imm |= (imm_raw & 0x40) << i;
   7708          imm |= (imm_raw & 0x7f);
   7709          imm = imm << 19;
   7710          imm = (imm << 32) | imm;
   7711          break;
   7712       default:
   7713          return False;
   7714    }
   7715    if (Q) {
   7716       imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
   7717    } else {
   7718       imm_val = mkU64(imm);
   7719    }
   7720    if (((op_bit == 0) &&
   7721       (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 12) == 12))) ||
   7722       ((op_bit == 1) && (cmode == 14))) {
   7723       /* VMOV (immediate) */
   7724       if (Q) {
   7725          putQReg(dreg, imm_val, condT);
   7726       } else {
   7727          putDRegI64(dreg, imm_val, condT);
   7728       }
   7729       DIPimm(imm_raw_pp, cmode, op_bit, "vmov", Q, dreg);
   7730       return True;
   7731    }
   7732    if ((op_bit == 1) &&
   7733       (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 14) == 12))) {
   7734       /* VMVN (immediate) */
   7735       if (Q) {
   7736          putQReg(dreg, unop(Iop_NotV128, imm_val), condT);
   7737       } else {
   7738          putDRegI64(dreg, unop(Iop_Not64, imm_val), condT);
   7739       }
   7740       DIPimm(imm_raw_pp, cmode, op_bit, "vmvn", Q, dreg);
   7741       return True;
   7742    }
   7743    if (Q) {
   7744       tmp_var = newTemp(Ity_V128);
   7745       assign(tmp_var, getQReg(dreg));
   7746    } else {
   7747       tmp_var = newTemp(Ity_I64);
   7748       assign(tmp_var, getDRegI64(dreg));
   7749    }
   7750    if ((op_bit == 0) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
   7751       /* VORR (immediate) */
   7752       if (Q)
   7753          expr = binop(Iop_OrV128, mkexpr(tmp_var), imm_val);
   7754       else
   7755          expr = binop(Iop_Or64, mkexpr(tmp_var), imm_val);
   7756       DIPimm(imm_raw_pp, cmode, op_bit, "vorr", Q, dreg);
   7757    } else if ((op_bit == 1) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
   7758       /* VBIC (immediate) */
   7759       if (Q)
   7760          expr = binop(Iop_AndV128, mkexpr(tmp_var),
   7761                                    unop(Iop_NotV128, imm_val));
   7762       else
   7763          expr = binop(Iop_And64, mkexpr(tmp_var), unop(Iop_Not64, imm_val));
   7764       DIPimm(imm_raw_pp, cmode, op_bit, "vbic", Q, dreg);
   7765    } else {
   7766       return False;
   7767    }
   7768    if (Q)
   7769       putQReg(dreg, expr, condT);
   7770    else
   7771       putDRegI64(dreg, expr, condT);
   7772    return True;
   7773 }
   7774 
   7775 /* A7.4 Advanced SIMD data-processing instructions */
   7776 static
   7777 Bool dis_neon_data_processing ( UInt theInstr, IRTemp condT )
   7778 {
   7779    UInt A = (theInstr >> 19) & 0x1F;
   7780    UInt B = (theInstr >>  8) & 0xF;
   7781    UInt C = (theInstr >>  4) & 0xF;
   7782    UInt U = (theInstr >> 24) & 0x1;
   7783 
   7784    if (! (A & 0x10)) {
   7785       return dis_neon_data_3same(theInstr, condT);
   7786    }
   7787    if (((A & 0x17) == 0x10) && ((C & 0x9) == 0x1)) {
   7788       return dis_neon_data_1reg_and_imm(theInstr, condT);
   7789    }
   7790    if ((C & 1) == 1) {
   7791       return dis_neon_data_2reg_and_shift(theInstr, condT);
   7792    }
   7793    if (((C & 5) == 0) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
   7794       return dis_neon_data_3diff(theInstr, condT);
   7795    }
   7796    if (((C & 5) == 4) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
   7797       return dis_neon_data_2reg_and_scalar(theInstr, condT);
   7798    }
   7799    if ((A & 0x16) == 0x16) {
   7800       if ((U == 0) && ((C & 1) == 0)) {
   7801          return dis_neon_vext(theInstr, condT);
   7802       }
   7803       if ((U != 1) || ((C & 1) == 1))
   7804          return False;
   7805       if ((B & 8) == 0) {
   7806          return dis_neon_data_2reg_misc(theInstr, condT);
   7807       }
   7808       if ((B & 12) == 8) {
   7809          return dis_neon_vtb(theInstr, condT);
   7810       }
   7811       if ((B == 12) && ((C & 9) == 0)) {
   7812          return dis_neon_vdup(theInstr, condT);
   7813       }
   7814    }
   7815    return False;
   7816 }
   7817 
   7818 
   7819 /*------------------------------------------------------------*/
   7820 /*--- NEON loads and stores                                ---*/
   7821 /*------------------------------------------------------------*/
   7822 
   7823 /* For NEON memory operations, we use the standard scheme to handle
   7824    conditionalisation: generate a jump around the instruction if the
   7825    condition is false.  That's only necessary in Thumb mode, however,
   7826    since in ARM mode NEON instructions are unconditional. */
   7827 
   7828 /* A helper function for what follows.  It assumes we already went
   7829    uncond as per comments at the top of this section. */
   7830 static
   7831 void mk_neon_elem_load_to_one_lane( UInt rD, UInt inc, UInt index,
   7832                                     UInt N, UInt size, IRTemp addr )
   7833 {
   7834    UInt i;
   7835    switch (size) {
   7836       case 0:
   7837          putDRegI64(rD, triop(Iop_SetElem8x8, getDRegI64(rD), mkU8(index),
   7838                     loadLE(Ity_I8, mkexpr(addr))), IRTemp_INVALID);
   7839          break;
   7840       case 1:
   7841          putDRegI64(rD, triop(Iop_SetElem16x4, getDRegI64(rD), mkU8(index),
   7842                     loadLE(Ity_I16, mkexpr(addr))), IRTemp_INVALID);
   7843          break;
   7844       case 2:
   7845          putDRegI64(rD, triop(Iop_SetElem32x2, getDRegI64(rD), mkU8(index),
   7846                     loadLE(Ity_I32, mkexpr(addr))), IRTemp_INVALID);
   7847          break;
   7848       default:
   7849          vassert(0);
   7850    }
   7851    for (i = 1; i <= N; i++) {
   7852       switch (size) {
   7853          case 0:
   7854             putDRegI64(rD + i * inc,
   7855                        triop(Iop_SetElem8x8,
   7856                              getDRegI64(rD + i * inc),
   7857                              mkU8(index),
   7858                              loadLE(Ity_I8, binop(Iop_Add32,
   7859                                                   mkexpr(addr),
   7860                                                   mkU32(i * 1)))),
   7861                        IRTemp_INVALID);
   7862             break;
   7863          case 1:
   7864             putDRegI64(rD + i * inc,
   7865                        triop(Iop_SetElem16x4,
   7866                              getDRegI64(rD + i * inc),
   7867                              mkU8(index),
   7868                              loadLE(Ity_I16, binop(Iop_Add32,
   7869                                                    mkexpr(addr),
   7870                                                    mkU32(i * 2)))),
   7871                        IRTemp_INVALID);
   7872             break;
   7873          case 2:
   7874             putDRegI64(rD + i * inc,
   7875                        triop(Iop_SetElem32x2,
   7876                              getDRegI64(rD + i * inc),
   7877                              mkU8(index),
   7878                              loadLE(Ity_I32, binop(Iop_Add32,
   7879                                                    mkexpr(addr),
   7880                                                    mkU32(i * 4)))),
   7881                        IRTemp_INVALID);
   7882             break;
   7883          default:
   7884             vassert(0);
   7885       }
   7886    }
   7887 }
   7888 
   7889 /* A(nother) helper function for what follows.  It assumes we already
   7890    went uncond as per comments at the top of this section. */
   7891 static
   7892 void mk_neon_elem_store_from_one_lane( UInt rD, UInt inc, UInt index,
   7893                                        UInt N, UInt size, IRTemp addr )
   7894 {
   7895    UInt i;
   7896    switch (size) {
   7897       case 0:
   7898          storeLE(mkexpr(addr),
   7899                  binop(Iop_GetElem8x8, getDRegI64(rD), mkU8(index)));
   7900          break;
   7901       case 1:
   7902          storeLE(mkexpr(addr),
   7903                  binop(Iop_GetElem16x4, getDRegI64(rD), mkU8(index)));
   7904          break;
   7905       case 2:
   7906          storeLE(mkexpr(addr),
   7907                  binop(Iop_GetElem32x2, getDRegI64(rD), mkU8(index)));
   7908          break;
   7909       default:
   7910          vassert(0);
   7911    }
   7912    for (i = 1; i <= N; i++) {
   7913       switch (size) {
   7914          case 0:
   7915             storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 1)),
   7916                     binop(Iop_GetElem8x8, getDRegI64(rD + i * inc),
   7917                                           mkU8(index)));
   7918             break;
   7919          case 1:
   7920             storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 2)),
   7921                     binop(Iop_GetElem16x4, getDRegI64(rD + i * inc),
   7922                                            mkU8(index)));
   7923             break;
   7924          case 2:
   7925             storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 4)),
   7926                     binop(Iop_GetElem32x2, getDRegI64(rD + i * inc),
   7927                                            mkU8(index)));
   7928             break;
   7929          default:
   7930             vassert(0);
   7931       }
   7932    }
   7933 }
   7934 
   7935 /* Generate 2x64 -> 2x64 deinterleave code, for VLD2.  Caller must
   7936    make *u0 and *u1 be valid IRTemps before the call. */
   7937 static void math_DEINTERLEAVE_2 (/*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1,
   7938                                  IRTemp i0, IRTemp i1, Int laneszB)
   7939 {
   7940    /* The following assumes that the guest is little endian, and hence
   7941       that the memory-side (interleaved) data is stored
   7942       little-endianly. */
   7943    vassert(u0 && u1);
   7944    /* This is pretty easy, since we have primitives directly to
   7945       hand. */
   7946    if (laneszB == 4) {
   7947       // memLE(128 bits) == A0 B0 A1 B1
   7948       // i0 == B0 A0, i1 == B1 A1
   7949       // u0 == A1 A0, u1 == B1 B0
   7950       assign(*u0, binop(Iop_InterleaveLO32x2, mkexpr(i1), mkexpr(i0)));
   7951       assign(*u1, binop(Iop_InterleaveHI32x2, mkexpr(i1), mkexpr(i0)));
   7952    } else if (laneszB == 2) {
   7953       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3
   7954       // i0 == B1 A1 B0 A0, i1 == B3 A3 B2 A2
   7955       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0
   7956       assign(*u0, binop(Iop_CatEvenLanes16x4, mkexpr(i1), mkexpr(i0)));
   7957       assign(*u1, binop(Iop_CatOddLanes16x4,  mkexpr(i1), mkexpr(i0)));
   7958    } else if (laneszB == 1) {
   7959       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A6 B6 A7 B7
   7960       // i0 == B3 A3 B2 A2 B1 A1 B0 A0, i1 == B7 A7 B6 A6 B5 A5 B4 A4
   7961       // u0 == A7 A6 A5 A4 A3 A2 A1 A0, u1 == B7 B6 B5 B4 B3 B2 B1 B0
   7962       assign(*u0, binop(Iop_CatEvenLanes8x8, mkexpr(i1), mkexpr(i0)));
   7963       assign(*u1, binop(Iop_CatOddLanes8x8,  mkexpr(i1), mkexpr(i0)));
   7964    } else {
   7965       // Can never happen, since VLD2 only has valid lane widths of 32,
   7966       // 16 or 8 bits.
   7967       vpanic("math_DEINTERLEAVE_2");
   7968    }
   7969 }
   7970 
   7971 /* Generate 2x64 -> 2x64 interleave code, for VST2.  Caller must make
   7972    *u0 and *u1 be valid IRTemps before the call. */
   7973 static void math_INTERLEAVE_2 (/*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1,
   7974                                IRTemp u0, IRTemp u1, Int laneszB)
   7975 {
   7976    /* The following assumes that the guest is little endian, and hence
   7977       that the memory-side (interleaved) data is stored
   7978       little-endianly. */
   7979    vassert(i0 && i1);
   7980    /* This is pretty easy, since we have primitives directly to
   7981       hand. */
   7982    if (laneszB == 4) {
   7983       // memLE(128 bits) == A0 B0 A1 B1
   7984       // i0 == B0 A0, i1 == B1 A1
   7985       // u0 == A1 A0, u1 == B1 B0
   7986       assign(*i0, binop(Iop_InterleaveLO32x2, mkexpr(u1), mkexpr(u0)));
   7987       assign(*i1, binop(Iop_InterleaveHI32x2, mkexpr(u1), mkexpr(u0)));
   7988    } else if (laneszB == 2) {
   7989       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3
   7990       // i0 == B1 A1 B0 A0, i1 == B3 A3 B2 A2
   7991       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0
   7992       assign(*i0, binop(Iop_InterleaveLO16x4, mkexpr(u1), mkexpr(u0)));
   7993       assign(*i1, binop(Iop_InterleaveHI16x4, mkexpr(u1), mkexpr(u0)));
   7994    } else if (laneszB == 1) {
   7995       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A6 B6 A7 B7
   7996       // i0 == B3 A3 B2 A2 B1 A1 B0 A0, i1 == B7 A7 B6 A6 B5 A5 B4 A4
   7997       // u0 == A7 A6 A5 A4 A3 A2 A1 A0, u1 == B7 B6 B5 B4 B3 B2 B1 B0
   7998       assign(*i0, binop(Iop_InterleaveLO8x8, mkexpr(u1), mkexpr(u0)));
   7999       assign(*i1, binop(Iop_InterleaveHI8x8, mkexpr(u1), mkexpr(u0)));
   8000    } else {
   8001       // Can never happen, since VST2 only has valid lane widths of 32,
   8002       // 16 or 8 bits.
   8003       vpanic("math_INTERLEAVE_2");
   8004    }
   8005 }
   8006 
   8007 // Helper function for generating arbitrary slicing 'n' dicing of
   8008 // 3 8x8 vectors, as needed for VLD3.8 and VST3.8.
   8009 static IRExpr* math_PERM_8x8x3(const UChar* desc,
   8010                                IRTemp s0, IRTemp s1, IRTemp s2)
   8011 {
   8012    // desc is an array of 8 pairs, encoded as 16 bytes,
   8013    // that describe how to assemble the result lanes, starting with
   8014    // lane 7.  Each pair is: first component (0..2) says which of
   8015    // s0/s1/s2 to use.  Second component (0..7) is the lane number
   8016    // in the source to use.
   8017    UInt si;
   8018    for (si = 0; si < 7; si++) {
   8019       vassert(desc[2 * si + 0] <= 2);
   8020       vassert(desc[2 * si + 1] <= 7);
   8021    }
   8022    IRTemp h3 = newTemp(Ity_I64);
   8023    IRTemp h2 = newTemp(Ity_I64);
   8024    IRTemp h1 = newTemp(Ity_I64);
   8025    IRTemp h0 = newTemp(Ity_I64);
   8026    IRTemp srcs[3] = {s0, s1, s2};
   8027 #  define SRC_VEC(_lane)   mkexpr(srcs[desc[2 * (7-(_lane)) + 0]])
   8028 #  define SRC_SHIFT(_lane) mkU8(56-8*(desc[2 * (7-(_lane)) + 1]))
   8029    assign(h3, binop(Iop_InterleaveHI8x8,
   8030                     binop(Iop_Shl64, SRC_VEC(7), SRC_SHIFT(7)),
   8031                     binop(Iop_Shl64, SRC_VEC(6), SRC_SHIFT(6))));
   8032    assign(h2, binop(Iop_InterleaveHI8x8,
   8033                     binop(Iop_Shl64, SRC_VEC(5), SRC_SHIFT(5)),
   8034                     binop(Iop_Shl64, SRC_VEC(4), SRC_SHIFT(4))));
   8035    assign(h1, binop(Iop_InterleaveHI8x8,
   8036                     binop(Iop_Shl64, SRC_VEC(3), SRC_SHIFT(3)),
   8037                     binop(Iop_Shl64, SRC_VEC(2), SRC_SHIFT(2))));
   8038    assign(h0, binop(Iop_InterleaveHI8x8,
   8039                     binop(Iop_Shl64, SRC_VEC(1), SRC_SHIFT(1)),
   8040                     binop(Iop_Shl64, SRC_VEC(0), SRC_SHIFT(0))));
   8041 #  undef SRC_VEC
   8042 #  undef SRC_SHIFT
   8043    // Now h3..h0 are 64 bit vectors with useful information only
   8044    // in the top 16 bits.  We now concatentate those four 16-bit
   8045    // groups so as to produce the final result.
   8046    IRTemp w1 = newTemp(Ity_I64);
   8047    IRTemp w0 = newTemp(Ity_I64);
   8048    assign(w1, binop(Iop_InterleaveHI16x4, mkexpr(h3), mkexpr(h2)));
   8049    assign(w0, binop(Iop_InterleaveHI16x4, mkexpr(h1), mkexpr(h0)));
   8050    return binop(Iop_InterleaveHI32x2, mkexpr(w1), mkexpr(w0));
   8051 }
   8052 
   8053 /* Generate 3x64 -> 3x64 deinterleave code, for VLD3.  Caller must
   8054    make *u0, *u1 and *u2 be valid IRTemps before the call. */
   8055 static void math_DEINTERLEAVE_3 (
   8056                /*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1, /*OUT*/IRTemp* u2,
   8057                IRTemp i0, IRTemp i1, IRTemp i2, Int laneszB
   8058             )
   8059 {
   8060 #  define IHI32x2(_e1, _e2) binop(Iop_InterleaveHI32x2, (_e1), (_e2))
   8061 #  define IHI16x4(_e1, _e2) binop(Iop_InterleaveHI16x4, (_e1), (_e2))
   8062 #  define SHL64(_tmp, _amt) binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
   8063    /* The following assumes that the guest is little endian, and hence
   8064       that the memory-side (interleaved) data is stored
   8065       little-endianly. */
   8066    vassert(u0 && u1 && u2);
   8067    if (laneszB == 4) {
   8068       // memLE(192 bits) == A0 B0 C0 A1 B1 C1
   8069       // i0 == B0 A0, i1 == A1 C0, i2 == C1 B1
   8070       // u0 == A1 A0, u1 == B1 B0, u2 == C1 C0
   8071       assign(*u0, IHI32x2(SHL64(i1,  0), SHL64(i0, 32)));
   8072       assign(*u1, IHI32x2(SHL64(i2, 32), SHL64(i0,  0)));
   8073       assign(*u2, IHI32x2(SHL64(i2,  0), SHL64(i1, 32)));
   8074    } else if (laneszB == 2) {
   8075       // memLE(192 bits) == A0 B0 C0 A1, B1 C1 A2 B2, C2 A3 B3 C3
   8076       // i0 == A1 C0 B0 A0, i1 == B2 A2 C1 B1, i2 == C3 B3 A3 C2
   8077       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0, u2 == C3 C2 C1 C0
   8078 #     define XXX(_tmp3,_la3,_tmp2,_la2,_tmp1,_la1,_tmp0,_la0) \
   8079                 IHI32x2(                                      \
   8080                    IHI16x4(SHL64((_tmp3),48-16*(_la3)),       \
   8081                            SHL64((_tmp2),48-16*(_la2))),      \
   8082                    IHI16x4(SHL64((_tmp1),48-16*(_la1)),       \
   8083                            SHL64((_tmp0),48-16*(_la0))))
   8084       assign(*u0, XXX(i2,1, i1,2, i0,3, i0,0));
   8085       assign(*u1, XXX(i2,2, i1,3, i1,0, i0,1));
   8086       assign(*u2, XXX(i2,3, i2,0, i1,1, i0,2));
   8087 #     undef XXX
   8088    } else if (laneszB == 1) {
   8089       // These describe how the result vectors [7..0] are
   8090       // assembled from the source vectors.  Each pair is
   8091       // (source vector number, lane number).
   8092       static const UChar de0[16] = {2,5, 2,2, 1,7, 1,4, 1,1, 0,6, 0,3, 0,0};
   8093       static const UChar de1[16] = {2,6, 2,3, 2,0, 1,5, 1,2, 0,7, 0,4, 0,1};
   8094       static const UChar de2[16] = {2,7, 2,4, 2,1, 1,6, 1,3, 1,0, 0,5, 0,2};
   8095       assign(*u0, math_PERM_8x8x3(de0, i0, i1, i2));
   8096       assign(*u1, math_PERM_8x8x3(de1, i0, i1, i2));
   8097       assign(*u2, math_PERM_8x8x3(de2, i0, i1, i2));
   8098    } else {
   8099       // Can never happen, since VLD3 only has valid lane widths of 32,
   8100       // 16 or 8 bits.
   8101       vpanic("math_DEINTERLEAVE_3");
   8102    }
   8103 #  undef SHL64
   8104 #  undef IHI16x4
   8105 #  undef IHI32x2
   8106 }
   8107 
   8108 /* Generate 3x64 -> 3x64 interleave code, for VST3.  Caller must
   8109    make *i0, *i1 and *i2 be valid IRTemps before the call. */
   8110 static void math_INTERLEAVE_3 (
   8111                /*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1, /*OUT*/IRTemp* i2,
   8112                IRTemp u0, IRTemp u1, IRTemp u2, Int laneszB
   8113             )
   8114 {
   8115 #  define IHI32x2(_e1, _e2) binop(Iop_InterleaveHI32x2, (_e1), (_e2))
   8116 #  define IHI16x4(_e1, _e2) binop(Iop_InterleaveHI16x4, (_e1), (_e2))
   8117 #  define SHL64(_tmp, _amt) binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
   8118    /* The following assumes that the guest is little endian, and hence
   8119       that the memory-side (interleaved) data is stored
   8120       little-endianly. */
   8121    vassert(i0 && i1 && i2);
   8122    if (laneszB == 4) {
   8123       // memLE(192 bits) == A0 B0 C0 A1 B1 C1
   8124       // i0 == B0 A0, i1 == A1 C0, i2 == C1 B1
   8125       // u0 == A1 A0, u1 == B1 B0, u2 == C1 C0
   8126       assign(*i0, IHI32x2(SHL64(u1, 32), SHL64(u0, 32)));
   8127       assign(*i1, IHI32x2(SHL64(u0,  0), SHL64(u2, 32)));
   8128       assign(*i2, IHI32x2(SHL64(u2,  0), SHL64(u1,  0)));
   8129    } else if (laneszB == 2) {
   8130       // memLE(192 bits) == A0 B0 C0 A1, B1 C1 A2 B2, C2 A3 B3 C3
   8131       // i0 == A1 C0 B0 A0, i1 == B2 A2 C1 B1, i2 == C3 B3 A3 C2
   8132       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0, u2 == C3 C2 C1 C0
   8133 #     define XXX(_tmp3,_la3,_tmp2,_la2,_tmp1,_la1,_tmp0,_la0) \
   8134                 IHI32x2(                                      \
   8135                    IHI16x4(SHL64((_tmp3),48-16*(_la3)),       \
   8136                            SHL64((_tmp2),48-16*(_la2))),      \
   8137                    IHI16x4(SHL64((_tmp1),48-16*(_la1)),       \
   8138                            SHL64((_tmp0),48-16*(_la0))))
   8139       assign(*i0, XXX(u0,1, u2,0, u1,0, u0,0));
   8140       assign(*i1, XXX(u1,2, u0,2, u2,1, u1,1));
   8141       assign(*i2, XXX(u2,3, u1,3, u0,3, u2,2));
   8142 #     undef XXX
   8143    } else if (laneszB == 1) {
   8144       // These describe how the result vectors [7..0] are
   8145       // assembled from the source vectors.  Each pair is
   8146       // (source vector number, lane number).
   8147       static const UChar in0[16] = {1,2, 0,2, 2,1, 1,1, 0,1, 2,0, 1,0, 0,0};
   8148       static const UChar in1[16] = {0,5, 2,4, 1,4, 0,4, 2,3, 1,3, 0,3, 2,2};
   8149       static const UChar in2[16] = {2,7, 1,7, 0,7, 2,6, 1,6, 0,6, 2,5, 1,5};
   8150       assign(*i0, math_PERM_8x8x3(in0, u0, u1, u2));
   8151       assign(*i1, math_PERM_8x8x3(in1, u0, u1, u2));
   8152       assign(*i2, math_PERM_8x8x3(in2, u0, u1, u2));
   8153    } else {
   8154       // Can never happen, since VST3 only has valid lane widths of 32,
   8155       // 16 or 8 bits.
   8156       vpanic("math_INTERLEAVE_3");
   8157    }
   8158 #  undef SHL64
   8159 #  undef IHI16x4
   8160 #  undef IHI32x2
   8161 }
   8162 
   8163 /* Generate 4x64 -> 4x64 deinterleave code, for VLD4.  Caller must
   8164    make *u0, *u1, *u2 and *u3 be valid IRTemps before the call. */
   8165 static void math_DEINTERLEAVE_4 (
   8166                /*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1,
   8167                /*OUT*/IRTemp* u2, /*OUT*/IRTemp* u3,
   8168                IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3, Int laneszB
   8169             )
   8170 {
   8171 #  define IHI32x2(_t1, _t2) \
   8172              binop(Iop_InterleaveHI32x2, mkexpr(_t1), mkexpr(_t2))
   8173 #  define ILO32x2(_t1, _t2) \
   8174              binop(Iop_InterleaveLO32x2, mkexpr(_t1), mkexpr(_t2))
   8175 #  define IHI16x4(_t1, _t2) \
   8176              binop(Iop_InterleaveHI16x4, mkexpr(_t1), mkexpr(_t2))
   8177 #  define ILO16x4(_t1, _t2) \
   8178              binop(Iop_InterleaveLO16x4, mkexpr(_t1), mkexpr(_t2))
   8179 #  define IHI8x8(_t1, _e2) \
   8180              binop(Iop_InterleaveHI8x8, mkexpr(_t1), _e2)
   8181 #  define SHL64(_tmp, _amt) \
   8182              binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
   8183    /* The following assumes that the guest is little endian, and hence
   8184       that the memory-side (interleaved) data is stored
   8185       little-endianly. */
   8186    vassert(u0 && u1 && u2 && u3);
   8187    if (laneszB == 4) {
   8188       assign(*u0, ILO32x2(i2, i0));
   8189       assign(*u1, IHI32x2(i2, i0));
   8190       assign(*u2, ILO32x2(i3, i1));
   8191       assign(*u3, IHI32x2(i3, i1));
   8192    } else if (laneszB == 2) {
   8193       IRTemp b1b0a1a0 = newTemp(Ity_I64);
   8194       IRTemp b3b2a3a2 = newTemp(Ity_I64);
   8195       IRTemp d1d0c1c0 = newTemp(Ity_I64);
   8196       IRTemp d3d2c3c2 = newTemp(Ity_I64);
   8197       assign(b1b0a1a0, ILO16x4(i1, i0));
   8198       assign(b3b2a3a2, ILO16x4(i3, i2));
   8199       assign(d1d0c1c0, IHI16x4(i1, i0));
   8200       assign(d3d2c3c2, IHI16x4(i3, i2));
   8201       // And now do what we did for the 32-bit case.
   8202       assign(*u0, ILO32x2(b3b2a3a2, b1b0a1a0));
   8203       assign(*u1, IHI32x2(b3b2a3a2, b1b0a1a0));
   8204       assign(*u2, ILO32x2(d3d2c3c2, d1d0c1c0));
   8205       assign(*u3, IHI32x2(d3d2c3c2, d1d0c1c0));
   8206    } else if (laneszB == 1) {
   8207       // Deinterleave into 16-bit chunks, then do as the 16-bit case.
   8208       IRTemp i0x = newTemp(Ity_I64);
   8209       IRTemp i1x = newTemp(Ity_I64);
   8210       IRTemp i2x = newTemp(Ity_I64);
   8211       IRTemp i3x = newTemp(Ity_I64);
   8212       assign(i0x, IHI8x8(i0, SHL64(i0, 32)));
   8213       assign(i1x, IHI8x8(i1, SHL64(i1, 32)));
   8214       assign(i2x, IHI8x8(i2, SHL64(i2, 32)));
   8215       assign(i3x, IHI8x8(i3, SHL64(i3, 32)));
   8216       // From here on is like the 16 bit case.
   8217       IRTemp b1b0a1a0 = newTemp(Ity_I64);
   8218       IRTemp b3b2a3a2 = newTemp(Ity_I64);
   8219       IRTemp d1d0c1c0 = newTemp(Ity_I64);
   8220       IRTemp d3d2c3c2 = newTemp(Ity_I64);
   8221       assign(b1b0a1a0, ILO16x4(i1x, i0x));
   8222       assign(b3b2a3a2, ILO16x4(i3x, i2x));
   8223       assign(d1d0c1c0, IHI16x4(i1x, i0x));
   8224       assign(d3d2c3c2, IHI16x4(i3x, i2x));
   8225       // And now do what we did for the 32-bit case.
   8226       assign(*u0, ILO32x2(b3b2a3a2, b1b0a1a0));
   8227       assign(*u1, IHI32x2(b3b2a3a2, b1b0a1a0));
   8228       assign(*u2, ILO32x2(d3d2c3c2, d1d0c1c0));
   8229       assign(*u3, IHI32x2(d3d2c3c2, d1d0c1c0));
   8230    } else {
   8231       // Can never happen, since VLD4 only has valid lane widths of 32,
   8232       // 16 or 8 bits.
   8233       vpanic("math_DEINTERLEAVE_4");
   8234    }
   8235 #  undef SHL64
   8236 #  undef IHI8x8
   8237 #  undef ILO16x4
   8238 #  undef IHI16x4
   8239 #  undef ILO32x2
   8240 #  undef IHI32x2
   8241 }
   8242 
   8243 /* Generate 4x64 -> 4x64 interleave code, for VST4.  Caller must
   8244    make *i0, *i1, *i2 and *i3 be valid IRTemps before the call. */
   8245 static void math_INTERLEAVE_4 (
   8246                /*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1,
   8247                /*OUT*/IRTemp* i2, /*OUT*/IRTemp* i3,
   8248                IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3, Int laneszB
   8249             )
   8250 {
   8251 #  define IHI32x2(_t1, _t2) \
   8252              binop(Iop_InterleaveHI32x2, mkexpr(_t1), mkexpr(_t2))
   8253 #  define ILO32x2(_t1, _t2) \
   8254              binop(Iop_InterleaveLO32x2, mkexpr(_t1), mkexpr(_t2))
   8255 #  define CEV16x4(_t1, _t2) \
   8256              binop(Iop_CatEvenLanes16x4, mkexpr(_t1), mkexpr(_t2))
   8257 #  define COD16x4(_t1, _t2) \
   8258              binop(Iop_CatOddLanes16x4, mkexpr(_t1), mkexpr(_t2))
   8259 #  define COD8x8(_t1, _e2) \
   8260              binop(Iop_CatOddLanes8x8, mkexpr(_t1), _e2)
   8261 #  define SHL64(_tmp, _amt) \
   8262              binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
   8263    /* The following assumes that the guest is little endian, and hence
   8264       that the memory-side (interleaved) data is stored
   8265       little-endianly. */
   8266    vassert(u0 && u1 && u2 && u3);
   8267    if (laneszB == 4) {
   8268       assign(*i0, ILO32x2(u1, u0));
   8269       assign(*i1, ILO32x2(u3, u2));
   8270       assign(*i2, IHI32x2(u1, u0));
   8271       assign(*i3, IHI32x2(u3, u2));
   8272    } else if (laneszB == 2) {
   8273       // First, interleave at the 32-bit lane size.
   8274       IRTemp b1b0a1a0 = newTemp(Ity_I64);
   8275       IRTemp b3b2a3a2 = newTemp(Ity_I64);
   8276       IRTemp d1d0c1c0 = newTemp(Ity_I64);
   8277       IRTemp d3d2c3c2 = newTemp(Ity_I64);
   8278       assign(b1b0a1a0, ILO32x2(u1, u0));
   8279       assign(b3b2a3a2, IHI32x2(u1, u0));
   8280       assign(d1d0c1c0, ILO32x2(u3, u2));
   8281       assign(d3d2c3c2, IHI32x2(u3, u2));
   8282       // And interleave (cat) at the 16 bit size.
   8283       assign(*i0, CEV16x4(d1d0c1c0, b1b0a1a0));
   8284       assign(*i1, COD16x4(d1d0c1c0, b1b0a1a0));
   8285       assign(*i2, CEV16x4(d3d2c3c2, b3b2a3a2));
   8286       assign(*i3, COD16x4(d3d2c3c2, b3b2a3a2));
   8287    } else if (laneszB == 1) {
   8288       // First, interleave at the 32-bit lane size.
   8289       IRTemp b1b0a1a0 = newTemp(Ity_I64);
   8290       IRTemp b3b2a3a2 = newTemp(Ity_I64);
   8291       IRTemp d1d0c1c0 = newTemp(Ity_I64);
   8292       IRTemp d3d2c3c2 = newTemp(Ity_I64);
   8293       assign(b1b0a1a0, ILO32x2(u1, u0));
   8294       assign(b3b2a3a2, IHI32x2(u1, u0));
   8295       assign(d1d0c1c0, ILO32x2(u3, u2));
   8296       assign(d3d2c3c2, IHI32x2(u3, u2));
   8297       // And interleave (cat) at the 16 bit size.
   8298       IRTemp i0x = newTemp(Ity_I64);
   8299       IRTemp i1x = newTemp(Ity_I64);
   8300       IRTemp i2x = newTemp(Ity_I64);
   8301       IRTemp i3x = newTemp(Ity_I64);
   8302       assign(i0x, CEV16x4(d1d0c1c0, b1b0a1a0));
   8303       assign(i1x, COD16x4(d1d0c1c0, b1b0a1a0));
   8304       assign(i2x, CEV16x4(d3d2c3c2, b3b2a3a2));
   8305       assign(i3x, COD16x4(d3d2c3c2, b3b2a3a2));
   8306       // And rearrange within each word, to get the right 8 bit lanes.
   8307       assign(*i0, COD8x8(i0x, SHL64(i0x, 8)));
   8308       assign(*i1, COD8x8(i1x, SHL64(i1x, 8)));
   8309       assign(*i2, COD8x8(i2x, SHL64(i2x, 8)));
   8310       assign(*i3, COD8x8(i3x, SHL64(i3x, 8)));
   8311    } else {
   8312       // Can never happen, since VLD4 only has valid lane widths of 32,
   8313       // 16 or 8 bits.
   8314       vpanic("math_DEINTERLEAVE_4");
   8315    }
   8316 #  undef SHL64
   8317 #  undef COD8x8
   8318 #  undef COD16x4
   8319 #  undef CEV16x4
   8320 #  undef ILO32x2
   8321 #  undef IHI32x2
   8322 }
   8323 
   8324 /* A7.7 Advanced SIMD element or structure load/store instructions */
   8325 static
   8326 Bool dis_neon_load_or_store ( UInt theInstr,
   8327                               Bool isT, IRTemp condT )
   8328 {
   8329 #  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
   8330    UInt bA = INSN(23,23);
   8331    UInt fB = INSN(11,8);
   8332    UInt bL = INSN(21,21);
   8333    UInt rD = (INSN(22,22) << 4) | INSN(15,12);
   8334    UInt rN = INSN(19,16);
   8335    UInt rM = INSN(3,0);
   8336    UInt N, size, i, j;
   8337    UInt inc;
   8338    UInt regs = 1;
   8339 
   8340    if (isT) {
   8341       vassert(condT != IRTemp_INVALID);
   8342    } else {
   8343       vassert(condT == IRTemp_INVALID);
   8344    }
   8345    /* So now, if condT is not IRTemp_INVALID, we know we're
   8346       dealing with Thumb code. */
   8347 
   8348    if (INSN(20,20) != 0)
   8349       return False;
   8350 
   8351    IRTemp initialRn = newTemp(Ity_I32);
   8352    assign(initialRn, isT ? getIRegT(rN) : getIRegA(rN));
   8353 
   8354    IRTemp initialRm = newTemp(Ity_I32);
   8355    assign(initialRm, isT ? getIRegT(rM) : getIRegA(rM));
   8356 
   8357    /* There are 3 cases:
   8358       (1) VSTn / VLDn (n-element structure from/to one lane)
   8359       (2) VLDn (single element to all lanes)
   8360       (3) VSTn / VLDn (multiple n-element structures)
   8361    */
   8362    if (bA) {
   8363       N = fB & 3;
   8364       if ((fB >> 2) < 3) {
   8365          /* ------------ Case (1) ------------
   8366             VSTn / VLDn (n-element structure from/to one lane) */
   8367 
   8368          size = fB >> 2;
   8369 
   8370          switch (size) {
   8371             case 0: i = INSN(7,5); inc = 1; break;
   8372             case 1: i = INSN(7,6); inc = INSN(5,5) ? 2 : 1; break;
   8373             case 2: i = INSN(7,7); inc = INSN(6,6) ? 2 : 1; break;
   8374             case 3: return False;
   8375             default: vassert(0);
   8376          }
   8377 
   8378          IRTemp addr = newTemp(Ity_I32);
   8379          assign(addr, mkexpr(initialRn));
   8380 
   8381          // go uncond
   8382          if (condT != IRTemp_INVALID)
   8383             mk_skip_over_T32_if_cond_is_false(condT);
   8384          // now uncond
   8385 
   8386          if (bL)
   8387             mk_neon_elem_load_to_one_lane(rD, inc, i, N, size, addr);
   8388          else
   8389             mk_neon_elem_store_from_one_lane(rD, inc, i, N, size, addr);
   8390          DIP("v%s%u.%u {", bL ? "ld" : "st", N + 1, 8 << size);
   8391          for (j = 0; j <= N; j++) {
   8392             if (j)
   8393                DIP(", ");
   8394             DIP("d%u[%u]", rD + j * inc, i);
   8395          }
   8396          DIP("}, [r%u]", rN);
   8397          if (rM != 13 && rM != 15) {
   8398             DIP(", r%u\n", rM);
   8399          } else {
   8400             DIP("%s\n", (rM != 15) ? "!" : "");
   8401          }
   8402       } else {
   8403          /* ------------ Case (2) ------------
   8404             VLDn (single element to all lanes) */
   8405          UInt r;
   8406          if (bL == 0)
   8407             return False;
   8408 
   8409          inc = INSN(5,5) + 1;
   8410          size = INSN(7,6);
   8411 
   8412          /* size == 3 and size == 2 cases differ in alignment constraints */
   8413          if (size == 3 && N == 3 && INSN(4,4) == 1)
   8414             size = 2;
   8415 
   8416          if (size == 0 && N == 0 && INSN(4,4) == 1)
   8417             return False;
   8418          if (N == 2 && INSN(4,4) == 1)
   8419             return False;
   8420          if (size == 3)
   8421             return False;
   8422 
   8423          // go uncond
   8424          if (condT != IRTemp_INVALID)
   8425             mk_skip_over_T32_if_cond_is_false(condT);
   8426          // now uncond
   8427 
   8428          IRTemp addr = newTemp(Ity_I32);
   8429          assign(addr, mkexpr(initialRn));
   8430 
   8431          if (N == 0 && INSN(5,5))
   8432             regs = 2;
   8433 
   8434          for (r = 0; r < regs; r++) {
   8435             switch (size) {
   8436                case 0:
   8437                   putDRegI64(rD + r, unop(Iop_Dup8x8,
   8438                                           loadLE(Ity_I8, mkexpr(addr))),
   8439                              IRTemp_INVALID);
   8440                   break;
   8441                case 1:
   8442                   putDRegI64(rD + r, unop(Iop_Dup16x4,
   8443                                           loadLE(Ity_I16, mkexpr(addr))),
   8444                              IRTemp_INVALID);
   8445                   break;
   8446                case 2:
   8447                   putDRegI64(rD + r, unop(Iop_Dup32x2,
   8448                                           loadLE(Ity_I32, mkexpr(addr))),
   8449                              IRTemp_INVALID);
   8450                   break;
   8451                default:
   8452                   vassert(0);
   8453             }
   8454             for (i = 1; i <= N; i++) {
   8455                switch (size) {
   8456                   case 0:
   8457                      putDRegI64(rD + r + i * inc,
   8458                                 unop(Iop_Dup8x8,
   8459                                      loadLE(Ity_I8, binop(Iop_Add32,
   8460                                                           mkexpr(addr),
   8461                                                           mkU32(i * 1)))),
   8462                                 IRTemp_INVALID);
   8463                      break;
   8464                   case 1:
   8465                      putDRegI64(rD + r + i * inc,
   8466                                 unop(Iop_Dup16x4,
   8467                                      loadLE(Ity_I16, binop(Iop_Add32,
   8468                                                            mkexpr(addr),
   8469                                                            mkU32(i * 2)))),
   8470                                 IRTemp_INVALID);
   8471                      break;
   8472                   case 2:
   8473                      putDRegI64(rD + r + i * inc,
   8474                                 unop(Iop_Dup32x2,
   8475                                      loadLE(Ity_I32, binop(Iop_Add32,
   8476                                                            mkexpr(addr),
   8477                                                            mkU32(i * 4)))),
   8478                                 IRTemp_INVALID);
   8479                      break;
   8480                   default:
   8481                      vassert(0);
   8482                }
   8483             }
   8484          }
   8485          DIP("vld%u.%u {", N + 1, 8 << size);
   8486          for (r = 0; r < regs; r++) {
   8487             for (i = 0; i <= N; i++) {
   8488                if (i || r)
   8489                   DIP(", ");
   8490                DIP("d%u[]", rD + r + i * inc);
   8491             }
   8492          }
   8493          DIP("}, [r%u]", rN);
   8494          if (rM != 13 && rM != 15) {
   8495             DIP(", r%u\n", rM);
   8496          } else {
   8497             DIP("%s\n", (rM != 15) ? "!" : "");
   8498          }
   8499       }
   8500       /* Writeback.  We're uncond here, so no condT-ing. */
   8501       if (rM != 15) {
   8502          if (rM == 13) {
   8503             IRExpr* e = binop(Iop_Add32,
   8504                               mkexpr(initialRn),
   8505                               mkU32((1 << size) * (N + 1)));
   8506             if (isT)
   8507                putIRegT(rN, e, IRTemp_INVALID);
   8508             else
   8509                putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
   8510          } else {
   8511             IRExpr* e = binop(Iop_Add32,
   8512                               mkexpr(initialRn),
   8513                               mkexpr(initialRm));
   8514             if (isT)
   8515                putIRegT(rN, e, IRTemp_INVALID);
   8516             else
   8517                putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
   8518          }
   8519       }
   8520       return True;
   8521    } else {
   8522       /* ------------ Case (3) ------------
   8523          VSTn / VLDn (multiple n-element structures) */
   8524       inc = (fB & 1) + 1;
   8525 
   8526       if (fB == BITS4(0,0,1,0)       // Dd, Dd+1, Dd+2, Dd+3  inc = 1  regs = 4
   8527           || fB == BITS4(0,1,1,0)    // Dd, Dd+1, Dd+2        inc = 1  regs = 3
   8528           || fB == BITS4(0,1,1,1)    // Dd                    inc = 2  regs = 1
   8529           || fB == BITS4(1,0,1,0)) { // Dd, Dd+1              inc = 1  regs = 2
   8530          N = 0; // VLD1/VST1.  'inc' does not appear to have any
   8531                 // meaning for the VLD1/VST1 cases.  'regs' is the number of
   8532                 // registers involved.
   8533          if (rD + regs > 32) return False;
   8534       }
   8535       else
   8536       if (fB == BITS4(0,0,1,1)       // Dd, Dd+1, Dd+2, Dd+3  inc=2  regs = 2
   8537           || fB == BITS4(1,0,0,0)    // Dd, Dd+1              inc=1  regs = 1
   8538           || fB == BITS4(1,0,0,1)) { // Dd, Dd+2              inc=2  regs = 1
   8539          N = 1; // VLD2/VST2.  'regs' is the number of register-pairs involved
   8540          if (regs == 1 && inc == 1 && rD + 1 >= 32) return False;
   8541          if (regs == 1 && inc == 2 && rD + 2 >= 32) return False;
   8542          if (regs == 2 && inc == 2 && rD + 3 >= 32) return False;
   8543       } else if (fB == BITS4(0,1,0,0) || fB == BITS4(0,1,0,1)) {
   8544          N = 2; // VLD3/VST3
   8545          if (inc == 1 && rD + 2 >= 32) return False;
   8546          if (inc == 2 && rD + 4 >= 32) return False;
   8547       } else if (fB == BITS4(0,0,0,0) || fB == BITS4(0,0,0,1)) {
   8548          N = 3; // VLD4/VST4
   8549          if (inc == 1 && rD + 3 >= 32) return False;
   8550          if (inc == 2 && rD + 6 >= 32) return False;
   8551       } else {
   8552          return False;
   8553       }
   8554 
   8555       if (N == 1 && fB == BITS4(0,0,1,1)) {
   8556          regs = 2;
   8557       } else if (N == 0) {
   8558          if (fB == BITS4(1,0,1,0)) {
   8559             regs = 2;
   8560          } else if (fB == BITS4(0,1,1,0)) {
   8561             regs = 3;
   8562          } else if (fB == BITS4(0,0,1,0)) {
   8563             regs = 4;
   8564          }
   8565       }
   8566 
   8567       size = INSN(7,6);
   8568       if (N == 0 && size == 3)
   8569          size = 2;
   8570       if (size == 3)
   8571          return False;
   8572 
   8573       // go uncond
   8574       if (condT != IRTemp_INVALID)
   8575          mk_skip_over_T32_if_cond_is_false(condT);
   8576       // now uncond
   8577 
   8578       IRTemp addr = newTemp(Ity_I32);
   8579       assign(addr, mkexpr(initialRn));
   8580 
   8581       if (N == 0 /* No interleaving -- VLD1/VST1 */) {
   8582          UInt r;
   8583          vassert(regs == 1 || regs == 2 || regs == 3 || regs == 4);
   8584          /* inc has no relevance here */
   8585          for (r = 0; r < regs; r++) {
   8586             if (bL)
   8587                putDRegI64(rD+r, loadLE(Ity_I64, mkexpr(addr)), IRTemp_INVALID);
   8588             else
   8589                storeLE(mkexpr(addr), getDRegI64(rD+r));
   8590             IRTemp tmp = newTemp(Ity_I32);
   8591             assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(8)));
   8592             addr = tmp;
   8593          }
   8594       }
   8595       else
   8596       if (N == 1 /* 2-interleaving -- VLD2/VST2 */) {
   8597          vassert( (regs == 1 && (inc == 1 || inc == 2))
   8598                    || (regs == 2 && inc == 2) );
   8599          // Make 'nregs' be the number of registers and 'regstep'
   8600          // equal the actual register-step.  The ARM encoding, using 'regs'
   8601          // and 'inc', is bizarre.  After this, we have:
   8602          // Dd, Dd+1              regs = 1, inc = 1,   nregs = 2, regstep = 1
   8603          // Dd, Dd+2              regs = 1, inc = 2,   nregs = 2, regstep = 2
   8604          // Dd, Dd+1, Dd+2, Dd+3  regs = 2, inc = 2,   nregs = 4, regstep = 1
   8605          UInt nregs   = 2;
   8606          UInt regstep = 1;
   8607          if (regs == 1 && inc == 1) {
   8608             /* nothing */
   8609          } else if (regs == 1 && inc == 2) {
   8610             regstep = 2;
   8611          } else if (regs == 2 && inc == 2) {
   8612             nregs = 4;
   8613          } else {
   8614             vassert(0);
   8615          }
   8616          // 'a' is address,
   8617          // 'di' is interleaved data, 'du' is uninterleaved data
   8618          if (nregs == 2) {
   8619             IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
   8620             IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
   8621             IRTemp  di0 = newTemp(Ity_I64);
   8622             IRTemp  di1 = newTemp(Ity_I64);
   8623             IRTemp  du0 = newTemp(Ity_I64);
   8624             IRTemp  du1 = newTemp(Ity_I64);
   8625             if (bL) {
   8626                assign(di0, loadLE(Ity_I64, a0));
   8627                assign(di1, loadLE(Ity_I64, a1));
   8628                math_DEINTERLEAVE_2(&du0, &du1, di0, di1, 1 << size);
   8629                putDRegI64(rD + 0 * regstep, mkexpr(du0), IRTemp_INVALID);
   8630                putDRegI64(rD + 1 * regstep, mkexpr(du1), IRTemp_INVALID);
   8631             } else {
   8632                assign(du0, getDRegI64(rD + 0 * regstep));
   8633                assign(du1, getDRegI64(rD + 1 * regstep));
   8634                math_INTERLEAVE_2(&di0, &di1, du0, du1, 1 << size);
   8635                storeLE(a0, mkexpr(di0));
   8636                storeLE(a1, mkexpr(di1));
   8637             }
   8638             IRTemp tmp = newTemp(Ity_I32);
   8639             assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(16)));
   8640             addr = tmp;
   8641          } else {
   8642             vassert(nregs == 4);
   8643             vassert(regstep == 1);
   8644             IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
   8645             IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
   8646             IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
   8647             IRExpr* a3  = binop(Iop_Add32, mkexpr(addr), mkU32(24));
   8648             IRTemp  di0 = newTemp(Ity_I64);
   8649             IRTemp  di1 = newTemp(Ity_I64);
   8650             IRTemp  di2 = newTemp(Ity_I64);
   8651             IRTemp  di3 = newTemp(Ity_I64);
   8652             IRTemp  du0 = newTemp(Ity_I64);
   8653             IRTemp  du1 = newTemp(Ity_I64);
   8654             IRTemp  du2 = newTemp(Ity_I64);
   8655             IRTemp  du3 = newTemp(Ity_I64);
   8656             if (bL) {
   8657                assign(di0, loadLE(Ity_I64, a0));
   8658                assign(di1, loadLE(Ity_I64, a1));
   8659                assign(di2, loadLE(Ity_I64, a2));
   8660                assign(di3, loadLE(Ity_I64, a3));
   8661                // Note spooky interleaving: du0, du2, di0, di1 etc
   8662                math_DEINTERLEAVE_2(&du0, &du2, di0, di1, 1 << size);
   8663                math_DEINTERLEAVE_2(&du1, &du3, di2, di3, 1 << size);
   8664                putDRegI64(rD + 0 * regstep, mkexpr(du0), IRTemp_INVALID);
   8665                putDRegI64(rD + 1 * regstep, mkexpr(du1), IRTemp_INVALID);
   8666                putDRegI64(rD + 2 * regstep, mkexpr(du2), IRTemp_INVALID);
   8667                putDRegI64(rD + 3 * regstep, mkexpr(du3), IRTemp_INVALID);
   8668             } else {
   8669                assign(du0, getDRegI64(rD + 0 * regstep));
   8670                assign(du1, getDRegI64(rD + 1 * regstep));
   8671                assign(du2, getDRegI64(rD + 2 * regstep));
   8672                assign(du3, getDRegI64(rD + 3 * regstep));
   8673                // Note spooky interleaving: du0, du2, di0, di1 etc
   8674                math_INTERLEAVE_2(&di0, &di1, du0, du2, 1 << size);
   8675                math_INTERLEAVE_2(&di2, &di3, du1, du3, 1 << size);
   8676                storeLE(a0, mkexpr(di0));
   8677                storeLE(a1, mkexpr(di1));
   8678                storeLE(a2, mkexpr(di2));
   8679                storeLE(a3, mkexpr(di3));
   8680             }
   8681 
   8682             IRTemp tmp = newTemp(Ity_I32);
   8683             assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(32)));
   8684             addr = tmp;
   8685          }
   8686       }
   8687       else
   8688       if (N == 2 /* 3-interleaving -- VLD3/VST3 */) {
   8689          // Dd, Dd+1, Dd+2   regs = 1, inc = 1
   8690          // Dd, Dd+2, Dd+4   regs = 1, inc = 2
   8691          vassert(regs == 1 && (inc == 1 || inc == 2));
   8692          IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
   8693          IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
   8694          IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
   8695          IRTemp  di0 = newTemp(Ity_I64);
   8696          IRTemp  di1 = newTemp(Ity_I64);
   8697          IRTemp  di2 = newTemp(Ity_I64);
   8698          IRTemp  du0 = newTemp(Ity_I64);
   8699          IRTemp  du1 = newTemp(Ity_I64);
   8700          IRTemp  du2 = newTemp(Ity_I64);
   8701          if (bL) {
   8702             assign(di0, loadLE(Ity_I64, a0));
   8703             assign(di1, loadLE(Ity_I64, a1));
   8704             assign(di2, loadLE(Ity_I64, a2));
   8705             math_DEINTERLEAVE_3(&du0, &du1, &du2, di0, di1, di2, 1 << size);
   8706             putDRegI64(rD + 0 * inc, mkexpr(du0), IRTemp_INVALID);
   8707             putDRegI64(rD + 1 * inc, mkexpr(du1), IRTemp_INVALID);
   8708             putDRegI64(rD + 2 * inc, mkexpr(du2), IRTemp_INVALID);
   8709          } else {
   8710             assign(du0, getDRegI64(rD + 0 * inc));
   8711             assign(du1, getDRegI64(rD + 1 * inc));
   8712             assign(du2, getDRegI64(rD + 2 * inc));
   8713             math_INTERLEAVE_3(&di0, &di1, &di2, du0, du1, du2, 1 << size);
   8714             storeLE(a0, mkexpr(di0));
   8715             storeLE(a1, mkexpr(di1));
   8716             storeLE(a2, mkexpr(di2));
   8717          }
   8718          IRTemp tmp = newTemp(Ity_I32);
   8719          assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(24)));
   8720          addr = tmp;
   8721       }
   8722       else
   8723       if (N == 3 /* 4-interleaving -- VLD4/VST4 */) {
   8724          // Dd, Dd+1, Dd+2, Dd+3   regs = 1, inc = 1
   8725          // Dd, Dd+2, Dd+4, Dd+6   regs = 1, inc = 2
   8726          vassert(regs == 1 && (inc == 1 || inc == 2));
   8727          IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
   8728          IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
   8729          IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
   8730          IRExpr* a3  = binop(Iop_Add32, mkexpr(addr), mkU32(24));
   8731          IRTemp  di0 = newTemp(Ity_I64);
   8732          IRTemp  di1 = newTemp(Ity_I64);
   8733          IRTemp  di2 = newTemp(Ity_I64);
   8734          IRTemp  di3 = newTemp(Ity_I64);
   8735          IRTemp  du0 = newTemp(Ity_I64);
   8736          IRTemp  du1 = newTemp(Ity_I64);
   8737          IRTemp  du2 = newTemp(Ity_I64);
   8738          IRTemp  du3 = newTemp(Ity_I64);
   8739          if (bL) {
   8740             assign(di0, loadLE(Ity_I64, a0));
   8741             assign(di1, loadLE(Ity_I64, a1));
   8742             assign(di2, loadLE(Ity_I64, a2));
   8743             assign(di3, loadLE(Ity_I64, a3));
   8744             math_DEINTERLEAVE_4(&du0, &du1, &du2, &du3,
   8745                                 di0, di1, di2, di3, 1 << size);
   8746             putDRegI64(rD + 0 * inc, mkexpr(du0), IRTemp_INVALID);
   8747             putDRegI64(rD + 1 * inc, mkexpr(du1), IRTemp_INVALID);
   8748             putDRegI64(rD + 2 * inc, mkexpr(du2), IRTemp_INVALID);
   8749             putDRegI64(rD + 3 * inc, mkexpr(du3), IRTemp_INVALID);
   8750          } else {
   8751             assign(du0, getDRegI64(rD + 0 * inc));
   8752             assign(du1, getDRegI64(rD + 1 * inc));
   8753             assign(du2, getDRegI64(rD + 2 * inc));
   8754             assign(du3, getDRegI64(rD + 3 * inc));
   8755             math_INTERLEAVE_4(&di0, &di1, &di2, &di3,
   8756                               du0, du1, du2, du3, 1 << size);
   8757             storeLE(a0, mkexpr(di0));
   8758             storeLE(a1, mkexpr(di1));
   8759             storeLE(a2, mkexpr(di2));
   8760             storeLE(a3, mkexpr(di3));
   8761          }
   8762          IRTemp tmp = newTemp(Ity_I32);
   8763          assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(32)));
   8764          addr = tmp;
   8765       }
   8766       else {
   8767          vassert(0);
   8768       }
   8769 
   8770       /* Writeback */
   8771       if (rM != 15) {
   8772          IRExpr* e;
   8773          if (rM == 13) {
   8774             e = binop(Iop_Add32, mkexpr(initialRn),
   8775                                  mkU32(8 * (N + 1) * regs));
   8776          } else {
   8777             e = binop(Iop_Add32, mkexpr(initialRn),
   8778                                  mkexpr(initialRm));
   8779          }
   8780          if (isT)
   8781             putIRegT(rN, e, IRTemp_INVALID);
   8782          else
   8783             putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
   8784       }
   8785 
   8786       DIP("v%s%u.%u {", bL ? "ld" : "st", N + 1, 8 << INSN(7,6));
   8787       if ((inc == 1 && regs * (N + 1) > 1)
   8788           || (inc == 2 && regs > 1 && N > 0)) {
   8789          DIP("d%u-d%u", rD, rD + regs * (N + 1) - 1);
   8790       } else {
   8791          UInt r;
   8792          for (r = 0; r < regs; r++) {
   8793             for (i = 0; i <= N; i++) {
   8794                if (i || r)
   8795                   DIP(", ");
   8796                DIP("d%u", rD + r + i * inc);
   8797             }
   8798          }
   8799       }
   8800       DIP("}, [r%u]", rN);
   8801       if (rM != 13 && rM != 15) {
   8802          DIP(", r%u\n", rM);
   8803       } else {
   8804          DIP("%s\n", (rM != 15) ? "!" : "");
   8805       }
   8806       return True;
   8807    }
   8808 #  undef INSN
   8809 }
   8810 
   8811 
   8812 /*------------------------------------------------------------*/
   8813 /*--- NEON, top level control                              ---*/
   8814 /*------------------------------------------------------------*/
   8815 
   8816 /* Both ARM and Thumb */
   8817 
   8818 /* Translate a NEON instruction.    If successful, returns
   8819    True and *dres may or may not be updated.  If failure, returns
   8820    False and doesn't change *dres nor create any IR.
   8821 
   8822    The Thumb and ARM encodings are similar for the 24 bottom bits, but
   8823    the top 8 bits are slightly different.  In both cases, the caller
   8824    must pass the entire 32 bits.  Callers may pass any instruction;
   8825    this ignores non-NEON ones.
   8826 
   8827    Caller must supply an IRTemp 'condT' holding the gating condition,
   8828    or IRTemp_INVALID indicating the insn is always executed.  In ARM
   8829    code, this must always be IRTemp_INVALID because NEON insns are
   8830    unconditional for ARM.
   8831 
   8832    Finally, the caller must indicate whether this occurs in ARM or in
   8833    Thumb code.
   8834 */
   8835 static Bool decode_NEON_instruction (
   8836                /*MOD*/DisResult* dres,
   8837                UInt              insn32,
   8838                IRTemp            condT,
   8839                Bool              isT
   8840             )
   8841 {
   8842 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn32, (_bMax), (_bMin))
   8843 
   8844    /* There are two kinds of instruction to deal with: load/store and
   8845       data processing.  In each case, in ARM mode we merely identify
   8846       the kind, and pass it on to the relevant sub-handler.  In Thumb
   8847       mode we identify the kind, swizzle the bits around to make it
   8848       have the same encoding as in ARM, and hand it on to the
   8849       sub-handler.
   8850    */
   8851 
   8852    /* In ARM mode, NEON instructions can't be conditional. */
   8853    if (!isT)
   8854       vassert(condT == IRTemp_INVALID);
   8855 
   8856    /* Data processing:
   8857       Thumb: 111U 1111 AAAA Axxx xxxx BBBB CCCC xxxx
   8858       ARM:   1111 001U AAAA Axxx xxxx BBBB CCCC xxxx
   8859    */
   8860    if (!isT && INSN(31,25) == BITS7(1,1,1,1,0,0,1)) {
   8861       // ARM, DP
   8862       return dis_neon_data_processing(INSN(31,0), condT);
   8863    }
   8864    if (isT && INSN(31,29) == BITS3(1,1,1)
   8865        && INSN(27,24) == BITS4(1,1,1,1)) {
   8866       // Thumb, DP
   8867       UInt reformatted = INSN(23,0);
   8868       reformatted |= (INSN(28,28) << 24); // U bit
   8869       reformatted |= (BITS7(1,1,1,1,0,0,1) << 25);
   8870       return dis_neon_data_processing(reformatted, condT);
   8871    }
   8872 
   8873    /* Load/store:
   8874       Thumb: 1111 1001 AxL0 xxxx xxxx BBBB xxxx xxxx
   8875       ARM:   1111 0100 AxL0 xxxx xxxx BBBB xxxx xxxx
   8876    */
   8877    if (!isT && INSN(31,24) == BITS8(1,1,1,1,0,1,0,0)) {
   8878       // ARM, memory
   8879       return dis_neon_load_or_store(INSN(31,0), isT, condT);
   8880    }
   8881    if (isT && INSN(31,24) == BITS8(1,1,1,1,1,0,0,1)) {
   8882       UInt reformatted = INSN(23,0);
   8883       reformatted |= (BITS8(1,1,1,1,0,1,0,0) << 24);
   8884       return dis_neon_load_or_store(reformatted, isT, condT);
   8885    }
   8886 
   8887    /* Doesn't match. */
   8888    return False;
   8889 
   8890 #  undef INSN
   8891 }
   8892 
   8893 
   8894 /*------------------------------------------------------------*/
   8895 /*--- V6 MEDIA instructions                                ---*/
   8896 /*------------------------------------------------------------*/
   8897 
   8898 /* Both ARM and Thumb */
   8899 
   8900 /* Translate a V6 media instruction.    If successful, returns
   8901    True and *dres may or may not be updated.  If failure, returns
   8902    False and doesn't change *dres nor create any IR.
   8903 
   8904    The Thumb and ARM encodings are completely different.  In Thumb
   8905    mode, the caller must pass the entire 32 bits.  In ARM mode it must
   8906    pass the lower 28 bits.  Apart from that, callers may pass any
   8907    instruction; this function ignores anything it doesn't recognise.
   8908 
   8909    Caller must supply an IRTemp 'condT' holding the gating condition,
   8910    or IRTemp_INVALID indicating the insn is always executed.
   8911 
   8912    Caller must also supply an ARMCondcode 'cond'.  This is only used
   8913    for debug printing, no other purpose.  For ARM, this is simply the
   8914    top 4 bits of the original instruction.  For Thumb, the condition
   8915    is not (really) known until run time, and so ARMCondAL should be
   8916    passed, only so that printing of these instructions does not show
   8917    any condition.
   8918 
   8919    Finally, the caller must indicate whether this occurs in ARM or in
   8920    Thumb code.
   8921 */
   8922 static Bool decode_V6MEDIA_instruction (
   8923                /*MOD*/DisResult* dres,
   8924                UInt              insnv6m,
   8925                IRTemp            condT,
   8926                ARMCondcode       conq,
   8927                Bool              isT
   8928             )
   8929 {
   8930 #  define INSNA(_bMax,_bMin)   SLICE_UInt(insnv6m, (_bMax), (_bMin))
   8931 #  define INSNT0(_bMax,_bMin)  SLICE_UInt( ((insnv6m >> 16) & 0xFFFF), \
   8932                                            (_bMax), (_bMin) )
   8933 #  define INSNT1(_bMax,_bMin)  SLICE_UInt( ((insnv6m >> 0)  & 0xFFFF), \
   8934                                            (_bMax), (_bMin) )
   8935    HChar dis_buf[128];
   8936    dis_buf[0] = 0;
   8937 
   8938    if (isT) {
   8939       vassert(conq == ARMCondAL);
   8940    } else {
   8941       vassert(INSNA(31,28) == BITS4(0,0,0,0)); // caller's obligation
   8942       vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
   8943    }
   8944 
   8945    /* ----------- smulbb, smulbt, smultb, smultt ----------- */
   8946    {
   8947      UInt regD = 99, regM = 99, regN = 99, bitM = 0, bitN = 0;
   8948      Bool gate = False;
   8949 
   8950      if (isT) {
   8951         if (INSNT0(15,4) == 0xFB1 && INSNT1(15,12) == BITS4(1,1,1,1)
   8952             && INSNT1(7,6) == BITS2(0,0)) {
   8953            regD = INSNT1(11,8);
   8954            regM = INSNT1(3,0);
   8955            regN = INSNT0(3,0);
   8956            bitM = INSNT1(4,4);
   8957            bitN = INSNT1(5,5);
   8958            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   8959               gate = True;
   8960         }
   8961      } else {
   8962         if (BITS8(0,0,0,1,0,1,1,0) == INSNA(27,20) &&
   8963             BITS4(0,0,0,0)         == INSNA(15,12) &&
   8964             BITS4(1,0,0,0)         == (INSNA(7,4) & BITS4(1,0,0,1)) ) {
   8965            regD = INSNA(19,16);
   8966            regM = INSNA(11,8);
   8967            regN = INSNA(3,0);
   8968            bitM = INSNA(6,6);
   8969            bitN = INSNA(5,5);
   8970            if (regD != 15 && regN != 15 && regM != 15)
   8971               gate = True;
   8972         }
   8973      }
   8974 
   8975      if (gate) {
   8976         IRTemp srcN = newTemp(Ity_I32);
   8977         IRTemp srcM = newTemp(Ity_I32);
   8978         IRTemp res  = newTemp(Ity_I32);
   8979 
   8980         assign( srcN, binop(Iop_Sar32,
   8981                             binop(Iop_Shl32,
   8982                                   isT ? getIRegT(regN) : getIRegA(regN),
   8983                                   mkU8(bitN ? 0 : 16)), mkU8(16)) );
   8984         assign( srcM, binop(Iop_Sar32,
   8985                             binop(Iop_Shl32,
   8986                                   isT ? getIRegT(regM) : getIRegA(regM),
   8987                                   mkU8(bitM ? 0 : 16)), mkU8(16)) );
   8988         assign( res, binop(Iop_Mul32, mkexpr(srcN), mkexpr(srcM)) );
   8989 
   8990         if (isT)
   8991            putIRegT( regD, mkexpr(res), condT );
   8992         else
   8993            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   8994 
   8995         DIP( "smul%c%c%s r%u, r%u, r%u\n", bitN ? 't' : 'b', bitM ? 't' : 'b',
   8996              nCC(conq), regD, regN, regM );
   8997         return True;
   8998      }
   8999      /* fall through */
   9000    }
   9001 
   9002    /* ------------ smulwb<y><c> <Rd>,<Rn>,<Rm> ------------- */
   9003    /* ------------ smulwt<y><c> <Rd>,<Rn>,<Rm> ------------- */
   9004    {
   9005      UInt regD = 99, regN = 99, regM = 99, bitM = 0;
   9006      Bool gate = False;
   9007 
   9008      if (isT) {
   9009         if (INSNT0(15,4) == 0xFB3 && INSNT1(15,12) == BITS4(1,1,1,1)
   9010             && INSNT1(7,5) == BITS3(0,0,0)) {
   9011           regN = INSNT0(3,0);
   9012           regD = INSNT1(11,8);
   9013           regM = INSNT1(3,0);
   9014           bitM = INSNT1(4,4);
   9015           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9016              gate = True;
   9017         }
   9018      } else {
   9019         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
   9020             INSNA(15,12) == BITS4(0,0,0,0)         &&
   9021             (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,1,0)) {
   9022            regD = INSNA(19,16);
   9023            regN = INSNA(3,0);
   9024            regM = INSNA(11,8);
   9025            bitM = INSNA(6,6);
   9026            if (regD != 15 && regN != 15 && regM != 15)
   9027               gate = True;
   9028         }
   9029      }
   9030 
   9031      if (gate) {
   9032         IRTemp irt_prod = newTemp(Ity_I64);
   9033 
   9034         assign( irt_prod,
   9035                 binop(Iop_MullS32,
   9036                       isT ? getIRegT(regN) : getIRegA(regN),
   9037                       binop(Iop_Sar32,
   9038                             binop(Iop_Shl32,
   9039                                   isT ? getIRegT(regM) : getIRegA(regM),
   9040                                   mkU8(bitM ? 0 : 16)),
   9041                             mkU8(16))) );
   9042 
   9043         IRExpr* ire_result = binop(Iop_Or32,
   9044                                    binop( Iop_Shl32,
   9045                                           unop(Iop_64HIto32, mkexpr(irt_prod)),
   9046                                           mkU8(16) ),
   9047                                    binop( Iop_Shr32,
   9048                                           unop(Iop_64to32, mkexpr(irt_prod)),
   9049                                           mkU8(16) ) );
   9050 
   9051         if (isT)
   9052            putIRegT( regD, ire_result, condT );
   9053         else
   9054            putIRegA( regD, ire_result, condT, Ijk_Boring );
   9055 
   9056         DIP("smulw%c%s r%u, r%u, r%u\n",
   9057             bitM ? 't' : 'b', nCC(conq),regD,regN,regM);
   9058         return True;
   9059      }
   9060      /* fall through */
   9061    }
   9062 
   9063    /* ------------ pkhbt<c> Rd, Rn, Rm {,LSL #imm} ------------- */
   9064    /* ------------ pkhtb<c> Rd, Rn, Rm {,ASR #imm} ------------- */
   9065    {
   9066      UInt regD = 99, regN = 99, regM = 99, imm5 = 99, shift_type = 99;
   9067      Bool tbform = False;
   9068      Bool gate = False;
   9069 
   9070      if (isT) {
   9071         if (INSNT0(15,4) == 0xEAC
   9072             && INSNT1(15,15) == 0 && INSNT1(4,4) == 0) {
   9073            regN = INSNT0(3,0);
   9074            regD = INSNT1(11,8);
   9075            regM = INSNT1(3,0);
   9076            imm5 = (INSNT1(14,12) << 2) | INSNT1(7,6);
   9077            shift_type = (INSNT1(5,5) << 1) | 0;
   9078            tbform = (INSNT1(5,5) == 0) ? False : True;
   9079            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9080               gate = True;
   9081         }
   9082      } else {
   9083         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
   9084             INSNA(5,4)   == BITS2(0,1)             &&
   9085             (INSNA(6,6)  == 0 || INSNA(6,6) == 1) ) {
   9086            regD = INSNA(15,12);
   9087            regN = INSNA(19,16);
   9088            regM = INSNA(3,0);
   9089            imm5 = INSNA(11,7);
   9090            shift_type = (INSNA(6,6) << 1) | 0;
   9091            tbform = (INSNA(6,6) == 0) ? False : True;
   9092            if (regD != 15 && regN != 15 && regM != 15)
   9093               gate = True;
   9094         }
   9095      }
   9096 
   9097      if (gate) {
   9098         IRTemp irt_regM       = newTemp(Ity_I32);
   9099         IRTemp irt_regM_shift = newTemp(Ity_I32);
   9100         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   9101         compute_result_and_C_after_shift_by_imm5(
   9102            dis_buf, &irt_regM_shift, NULL, irt_regM, shift_type, imm5, regM );
   9103 
   9104         UInt mask = (tbform == True) ? 0x0000FFFF : 0xFFFF0000;
   9105         IRExpr* ire_result
   9106           = binop( Iop_Or32,
   9107                    binop(Iop_And32, mkexpr(irt_regM_shift), mkU32(mask)),
   9108                    binop(Iop_And32, isT ? getIRegT(regN) : getIRegA(regN),
   9109                                     unop(Iop_Not32, mkU32(mask))) );
   9110 
   9111         if (isT)
   9112            putIRegT( regD, ire_result, condT );
   9113         else
   9114            putIRegA( regD, ire_result, condT, Ijk_Boring );
   9115 
   9116         DIP( "pkh%s%s r%u, r%u, r%u %s\n", tbform ? "tb" : "bt",
   9117              nCC(conq), regD, regN, regM, dis_buf );
   9118 
   9119         return True;
   9120      }
   9121      /* fall through */
   9122    }
   9123 
   9124    /* ---------- usat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
   9125    {
   9126      UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
   9127      Bool gate = False;
   9128 
   9129      if (isT) {
   9130         if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,1,0)
   9131             && INSNT0(4,4) == 0
   9132             && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
   9133            regD       = INSNT1(11,8);
   9134            regN       = INSNT0(3,0);
   9135            shift_type = (INSNT0(5,5) << 1) | 0;
   9136            imm5       = (INSNT1(14,12) << 2) | INSNT1(7,6);
   9137            sat_imm    = INSNT1(4,0);
   9138            if (!isBadRegT(regD) && !isBadRegT(regN))
   9139               gate = True;
   9140            if (shift_type == BITS2(1,0) && imm5 == 0)
   9141               gate = False;
   9142         }
   9143      } else {
   9144         if (INSNA(27,21) == BITS7(0,1,1,0,1,1,1) &&
   9145             INSNA(5,4)   == BITS2(0,1)) {
   9146            regD       = INSNA(15,12);
   9147            regN       = INSNA(3,0);
   9148            shift_type = (INSNA(6,6) << 1) | 0;
   9149            imm5       = INSNA(11,7);
   9150            sat_imm    = INSNA(20,16);
   9151            if (regD != 15 && regN != 15)
   9152               gate = True;
   9153         }
   9154      }
   9155 
   9156      if (gate) {
   9157         IRTemp irt_regN       = newTemp(Ity_I32);
   9158         IRTemp irt_regN_shift = newTemp(Ity_I32);
   9159         IRTemp irt_sat_Q      = newTemp(Ity_I32);
   9160         IRTemp irt_result     = newTemp(Ity_I32);
   9161 
   9162         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   9163         compute_result_and_C_after_shift_by_imm5(
   9164                 dis_buf, &irt_regN_shift, NULL,
   9165                 irt_regN, shift_type, imm5, regN );
   9166 
   9167         armUnsignedSatQ( &irt_result, &irt_sat_Q, irt_regN_shift, sat_imm );
   9168         or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
   9169 
   9170         if (isT)
   9171            putIRegT( regD, mkexpr(irt_result), condT );
   9172         else
   9173            putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
   9174 
   9175         DIP("usat%s r%u, #0x%04x, %s\n",
   9176             nCC(conq), regD, imm5, dis_buf);
   9177         return True;
   9178      }
   9179      /* fall through */
   9180    }
   9181 
   9182   /* ----------- ssat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
   9183    {
   9184      UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
   9185      Bool gate = False;
   9186 
   9187      if (isT) {
   9188         if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,0,0)
   9189             && INSNT0(4,4) == 0
   9190             && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
   9191            regD       = INSNT1(11,8);
   9192            regN       = INSNT0(3,0);
   9193            shift_type = (INSNT0(5,5) << 1) | 0;
   9194            imm5       = (INSNT1(14,12) << 2) | INSNT1(7,6);
   9195            sat_imm    = INSNT1(4,0) + 1;
   9196            if (!isBadRegT(regD) && !isBadRegT(regN))
   9197               gate = True;
   9198            if (shift_type == BITS2(1,0) && imm5 == 0)
   9199               gate = False;
   9200         }
   9201      } else {
   9202         if (INSNA(27,21) == BITS7(0,1,1,0,1,0,1) &&
   9203             INSNA(5,4)   == BITS2(0,1)) {
   9204            regD       = INSNA(15,12);
   9205            regN       = INSNA(3,0);
   9206            shift_type = (INSNA(6,6) << 1) | 0;
   9207            imm5       = INSNA(11,7);
   9208            sat_imm    = INSNA(20,16) + 1;
   9209            if (regD != 15 && regN != 15)
   9210               gate = True;
   9211         }
   9212      }
   9213 
   9214      if (gate) {
   9215         IRTemp irt_regN       = newTemp(Ity_I32);
   9216         IRTemp irt_regN_shift = newTemp(Ity_I32);
   9217         IRTemp irt_sat_Q      = newTemp(Ity_I32);
   9218         IRTemp irt_result     = newTemp(Ity_I32);
   9219 
   9220         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   9221         compute_result_and_C_after_shift_by_imm5(
   9222                 dis_buf, &irt_regN_shift, NULL,
   9223                 irt_regN, shift_type, imm5, regN );
   9224 
   9225         armSignedSatQ( irt_regN_shift, sat_imm, &irt_result, &irt_sat_Q );
   9226         or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
   9227 
   9228         if (isT)
   9229            putIRegT( regD, mkexpr(irt_result), condT );
   9230         else
   9231            putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
   9232 
   9233         DIP( "ssat%s r%u, #0x%04x, %s\n",
   9234              nCC(conq), regD, imm5, dis_buf);
   9235         return True;
   9236     }
   9237     /* fall through */
   9238   }
   9239 
   9240    /* ----------- ssat16<c> <Rd>,#<imm>,<Rn> ----------- */
   9241    {
   9242      UInt regD = 99, regN = 99, sat_imm = 99;
   9243      Bool gate = False;
   9244 
   9245      if (isT) {
   9246         if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,0,0)
   9247             && INSNT0(5,4) == BITS2(1,0)
   9248             && INSNT1(15,12) == BITS4(0,0,0,0)
   9249             && INSNT1(7,4) == BITS4(0,0,0,0)) {
   9250            regD       = INSNT1(11,8);
   9251            regN       = INSNT0(3,0);
   9252            sat_imm    = INSNT1(3,0) + 1;
   9253            if (!isBadRegT(regD) && !isBadRegT(regN))
   9254               gate = True;
   9255         }
   9256      } else {
   9257         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,1,0) &&
   9258             INSNA(11,4)   == BITS8(1,1,1,1,0,0,1,1)) {
   9259            regD       = INSNA(15,12);
   9260            regN       = INSNA(3,0);
   9261            sat_imm    = INSNA(19,16) + 1;
   9262            if (regD != 15 && regN != 15)
   9263               gate = True;
   9264         }
   9265      }
   9266 
   9267      if (gate) {
   9268         IRTemp irt_regN    = newTemp(Ity_I32);
   9269         IRTemp irt_regN_lo = newTemp(Ity_I32);
   9270         IRTemp irt_regN_hi = newTemp(Ity_I32);
   9271         IRTemp irt_Q_lo    = newTemp(Ity_I32);
   9272         IRTemp irt_Q_hi    = newTemp(Ity_I32);
   9273         IRTemp irt_res_lo  = newTemp(Ity_I32);
   9274         IRTemp irt_res_hi  = newTemp(Ity_I32);
   9275 
   9276         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   9277         assign( irt_regN_lo,
   9278                 binop( Iop_Sar32,
   9279                        binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
   9280                        mkU8(16)) );
   9281         assign( irt_regN_hi, binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)) );
   9282 
   9283         armSignedSatQ( irt_regN_lo, sat_imm, &irt_res_lo, &irt_Q_lo );
   9284         or_into_QFLAG32( mkexpr(irt_Q_lo), condT );
   9285 
   9286         armSignedSatQ( irt_regN_hi, sat_imm, &irt_res_hi, &irt_Q_hi );
   9287         or_into_QFLAG32( mkexpr(irt_Q_hi), condT );
   9288 
   9289         IRExpr* ire_result
   9290            = binop(Iop_Or32,
   9291                    binop(Iop_And32, mkexpr(irt_res_lo), mkU32(0xFFFF)),
   9292                    binop(Iop_Shl32, mkexpr(irt_res_hi), mkU8(16)));
   9293         if (isT)
   9294            putIRegT( regD, ire_result, condT );
   9295         else
   9296            putIRegA( regD, ire_result, condT, Ijk_Boring );
   9297 
   9298         DIP( "ssat16%s r%u, #0x%04x, r%u\n", nCC(conq), regD, sat_imm, regN );
   9299         return True;
   9300      }
   9301      /* fall through */
   9302    }
   9303 
   9304    /* -------------- usat16<c> <Rd>,#<imm4>,<Rn> --------------- */
   9305    {
   9306      UInt regD = 99, regN = 99, sat_imm = 99;
   9307      Bool gate = False;
   9308 
   9309      if (isT) {
   9310         if (INSNT0(15,4) == 0xF3A && (INSNT1(15,0) & 0xF0F0) == 0x0000) {
   9311            regN = INSNT0(3,0);
   9312            regD = INSNT1(11,8);
   9313            sat_imm = INSNT1(3,0);
   9314            if (!isBadRegT(regD) && !isBadRegT(regN))
   9315               gate = True;
   9316        }
   9317      } else {
   9318         if (INSNA(27,20) == BITS8(0,1,1,0,1,1,1,0) &&
   9319             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9320             INSNA(7,4)   == BITS4(0,0,1,1)) {
   9321            regD    = INSNA(15,12);
   9322            regN    = INSNA(3,0);
   9323            sat_imm = INSNA(19,16);
   9324            if (regD != 15 && regN != 15)
   9325               gate = True;
   9326         }
   9327      }
   9328 
   9329      if (gate) {
   9330         IRTemp irt_regN    = newTemp(Ity_I32);
   9331         IRTemp irt_regN_lo = newTemp(Ity_I32);
   9332         IRTemp irt_regN_hi = newTemp(Ity_I32);
   9333         IRTemp irt_Q_lo    = newTemp(Ity_I32);
   9334         IRTemp irt_Q_hi    = newTemp(Ity_I32);
   9335         IRTemp irt_res_lo  = newTemp(Ity_I32);
   9336         IRTemp irt_res_hi  = newTemp(Ity_I32);
   9337 
   9338         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   9339         assign( irt_regN_lo, binop( Iop_Sar32,
   9340                                     binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
   9341                                     mkU8(16)) );
   9342         assign( irt_regN_hi, binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)) );
   9343 
   9344         armUnsignedSatQ( &irt_res_lo, &irt_Q_lo, irt_regN_lo, sat_imm );
   9345         or_into_QFLAG32( mkexpr(irt_Q_lo), condT );
   9346 
   9347         armUnsignedSatQ( &irt_res_hi, &irt_Q_hi, irt_regN_hi, sat_imm );
   9348         or_into_QFLAG32( mkexpr(irt_Q_hi), condT );
   9349 
   9350         IRExpr* ire_result = binop( Iop_Or32,
   9351                                     binop(Iop_Shl32, mkexpr(irt_res_hi), mkU8(16)),
   9352                                     mkexpr(irt_res_lo) );
   9353 
   9354         if (isT)
   9355            putIRegT( regD, ire_result, condT );
   9356         else
   9357            putIRegA( regD, ire_result, condT, Ijk_Boring );
   9358 
   9359         DIP( "usat16%s r%u, #0x%04x, r%u\n", nCC(conq), regD, sat_imm, regN );
   9360         return True;
   9361      }
   9362      /* fall through */
   9363    }
   9364 
   9365    /* -------------- uadd16<c> <Rd>,<Rn>,<Rm> -------------- */
   9366    {
   9367      UInt regD = 99, regN = 99, regM = 99;
   9368      Bool gate = False;
   9369 
   9370      if (isT) {
   9371         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
   9372            regN = INSNT0(3,0);
   9373            regD = INSNT1(11,8);
   9374            regM = INSNT1(3,0);
   9375            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9376               gate = True;
   9377         }
   9378      } else {
   9379         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
   9380             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9381             INSNA(7,4)   == BITS4(0,0,0,1)) {
   9382            regD = INSNA(15,12);
   9383            regN = INSNA(19,16);
   9384            regM = INSNA(3,0);
   9385            if (regD != 15 && regN != 15 && regM != 15)
   9386               gate = True;
   9387         }
   9388      }
   9389 
   9390      if (gate) {
   9391         IRTemp rNt  = newTemp(Ity_I32);
   9392         IRTemp rMt  = newTemp(Ity_I32);
   9393         IRTemp res  = newTemp(Ity_I32);
   9394         IRTemp reso = newTemp(Ity_I32);
   9395 
   9396         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9397         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9398 
   9399         assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
   9400         if (isT)
   9401            putIRegT( regD, mkexpr(res), condT );
   9402         else
   9403            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9404 
   9405         assign(reso, binop(Iop_HAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
   9406         set_GE_32_10_from_bits_31_15(reso, condT);
   9407 
   9408         DIP("uadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9409         return True;
   9410      }
   9411      /* fall through */
   9412    }
   9413 
   9414    /* -------------- sadd16<c> <Rd>,<Rn>,<Rm> -------------- */
   9415    {
   9416      UInt regD = 99, regN = 99, regM = 99;
   9417      Bool gate = False;
   9418 
   9419      if (isT) {
   9420         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
   9421            regN = INSNT0(3,0);
   9422            regD = INSNT1(11,8);
   9423            regM = INSNT1(3,0);
   9424            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9425               gate = True;
   9426         }
   9427      } else {
   9428         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
   9429             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9430             INSNA(7,4)   == BITS4(0,0,0,1)) {
   9431            regD = INSNA(15,12);
   9432            regN = INSNA(19,16);
   9433            regM = INSNA(3,0);
   9434            if (regD != 15 && regN != 15 && regM != 15)
   9435               gate = True;
   9436         }
   9437      }
   9438 
   9439      if (gate) {
   9440         IRTemp rNt  = newTemp(Ity_I32);
   9441         IRTemp rMt  = newTemp(Ity_I32);
   9442         IRTemp res  = newTemp(Ity_I32);
   9443         IRTemp reso = newTemp(Ity_I32);
   9444 
   9445         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9446         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9447 
   9448         assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
   9449         if (isT)
   9450            putIRegT( regD, mkexpr(res), condT );
   9451         else
   9452            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9453 
   9454         assign(reso, unop(Iop_Not32,
   9455                           binop(Iop_HAdd16Sx2, mkexpr(rNt), mkexpr(rMt))));
   9456         set_GE_32_10_from_bits_31_15(reso, condT);
   9457 
   9458         DIP("sadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9459         return True;
   9460      }
   9461      /* fall through */
   9462    }
   9463 
   9464    /* ---------------- usub16<c> <Rd>,<Rn>,<Rm> ---------------- */
   9465    {
   9466      UInt regD = 99, regN = 99, regM = 99;
   9467      Bool gate = False;
   9468 
   9469      if (isT) {
   9470         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
   9471            regN = INSNT0(3,0);
   9472            regD = INSNT1(11,8);
   9473            regM = INSNT1(3,0);
   9474            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9475               gate = True;
   9476         }
   9477      } else {
   9478         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
   9479             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9480             INSNA(7,4)   == BITS4(0,1,1,1)) {
   9481            regD = INSNA(15,12);
   9482            regN = INSNA(19,16);
   9483            regM = INSNA(3,0);
   9484            if (regD != 15 && regN != 15 && regM != 15)
   9485              gate = True;
   9486         }
   9487      }
   9488 
   9489      if (gate) {
   9490         IRTemp rNt  = newTemp(Ity_I32);
   9491         IRTemp rMt  = newTemp(Ity_I32);
   9492         IRTemp res  = newTemp(Ity_I32);
   9493         IRTemp reso = newTemp(Ity_I32);
   9494 
   9495         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9496         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9497 
   9498         assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
   9499         if (isT)
   9500            putIRegT( regD, mkexpr(res), condT );
   9501         else
   9502            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9503 
   9504         assign(reso, unop(Iop_Not32,
   9505                           binop(Iop_HSub16Ux2, mkexpr(rNt), mkexpr(rMt))));
   9506         set_GE_32_10_from_bits_31_15(reso, condT);
   9507 
   9508         DIP("usub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9509         return True;
   9510      }
   9511      /* fall through */
   9512    }
   9513 
   9514    /* -------------- ssub16<c> <Rd>,<Rn>,<Rm> -------------- */
   9515    {
   9516      UInt regD = 99, regN = 99, regM = 99;
   9517      Bool gate = False;
   9518 
   9519      if (isT) {
   9520         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
   9521            regN = INSNT0(3,0);
   9522            regD = INSNT1(11,8);
   9523            regM = INSNT1(3,0);
   9524            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9525               gate = True;
   9526         }
   9527      } else {
   9528         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
   9529             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9530             INSNA(7,4)   == BITS4(0,1,1,1)) {
   9531            regD = INSNA(15,12);
   9532            regN = INSNA(19,16);
   9533            regM = INSNA(3,0);
   9534            if (regD != 15 && regN != 15 && regM != 15)
   9535               gate = True;
   9536         }
   9537      }
   9538 
   9539      if (gate) {
   9540         IRTemp rNt  = newTemp(Ity_I32);
   9541         IRTemp rMt  = newTemp(Ity_I32);
   9542         IRTemp res  = newTemp(Ity_I32);
   9543         IRTemp reso = newTemp(Ity_I32);
   9544 
   9545         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9546         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9547 
   9548         assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
   9549         if (isT)
   9550            putIRegT( regD, mkexpr(res), condT );
   9551         else
   9552            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9553 
   9554         assign(reso, unop(Iop_Not32,
   9555                           binop(Iop_HSub16Sx2, mkexpr(rNt), mkexpr(rMt))));
   9556         set_GE_32_10_from_bits_31_15(reso, condT);
   9557 
   9558         DIP("ssub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9559         return True;
   9560      }
   9561      /* fall through */
   9562    }
   9563 
   9564    /* ----------------- uadd8<c> <Rd>,<Rn>,<Rm> ---------------- */
   9565    {
   9566      UInt regD = 99, regN = 99, regM = 99;
   9567      Bool gate = False;
   9568 
   9569      if (isT) {
   9570         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
   9571            regN = INSNT0(3,0);
   9572            regD = INSNT1(11,8);
   9573            regM = INSNT1(3,0);
   9574            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9575               gate = True;
   9576         }
   9577      } else {
   9578         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
   9579             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9580             (INSNA(7,4)  == BITS4(1,0,0,1))) {
   9581            regD = INSNA(15,12);
   9582            regN = INSNA(19,16);
   9583            regM = INSNA(3,0);
   9584            if (regD != 15 && regN != 15 && regM != 15)
   9585               gate = True;
   9586         }
   9587      }
   9588 
   9589      if (gate) {
   9590         IRTemp rNt  = newTemp(Ity_I32);
   9591         IRTemp rMt  = newTemp(Ity_I32);
   9592         IRTemp res  = newTemp(Ity_I32);
   9593         IRTemp reso = newTemp(Ity_I32);
   9594 
   9595         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9596         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9597 
   9598         assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
   9599         if (isT)
   9600            putIRegT( regD, mkexpr(res), condT );
   9601         else
   9602            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9603 
   9604         assign(reso, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
   9605         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
   9606 
   9607         DIP("uadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9608         return True;
   9609      }
   9610      /* fall through */
   9611    }
   9612 
   9613    /* ------------------- sadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
   9614    {
   9615      UInt regD = 99, regN = 99, regM = 99;
   9616      Bool gate = False;
   9617 
   9618      if (isT) {
   9619         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
   9620            regN = INSNT0(3,0);
   9621            regD = INSNT1(11,8);
   9622            regM = INSNT1(3,0);
   9623            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9624               gate = True;
   9625         }
   9626      } else {
   9627         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
   9628             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9629             (INSNA(7,4)  == BITS4(1,0,0,1))) {
   9630            regD = INSNA(15,12);
   9631            regN = INSNA(19,16);
   9632            regM = INSNA(3,0);
   9633            if (regD != 15 && regN != 15 && regM != 15)
   9634               gate = True;
   9635         }
   9636      }
   9637 
   9638      if (gate) {
   9639         IRTemp rNt  = newTemp(Ity_I32);
   9640         IRTemp rMt  = newTemp(Ity_I32);
   9641         IRTemp res  = newTemp(Ity_I32);
   9642         IRTemp reso = newTemp(Ity_I32);
   9643 
   9644         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9645         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9646 
   9647         assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
   9648         if (isT)
   9649            putIRegT( regD, mkexpr(res), condT );
   9650         else
   9651            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9652 
   9653         assign(reso, unop(Iop_Not32,
   9654                           binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt))));
   9655         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
   9656 
   9657         DIP("sadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9658         return True;
   9659      }
   9660      /* fall through */
   9661    }
   9662 
   9663    /* ------------------- usub8<c> <Rd>,<Rn>,<Rm> ------------------ */
   9664    {
   9665      UInt regD = 99, regN = 99, regM = 99;
   9666      Bool gate = False;
   9667 
   9668      if (isT) {
   9669         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
   9670            regN = INSNT0(3,0);
   9671            regD = INSNT1(11,8);
   9672            regM = INSNT1(3,0);
   9673            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9674               gate = True;
   9675         }
   9676      } else {
   9677         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
   9678             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9679             (INSNA(7,4)  == BITS4(1,1,1,1))) {
   9680            regD = INSNA(15,12);
   9681            regN = INSNA(19,16);
   9682            regM = INSNA(3,0);
   9683            if (regD != 15 && regN != 15 && regM != 15)
   9684              gate = True;
   9685         }
   9686      }
   9687 
   9688      if (gate) {
   9689         IRTemp rNt  = newTemp(Ity_I32);
   9690         IRTemp rMt  = newTemp(Ity_I32);
   9691         IRTemp res  = newTemp(Ity_I32);
   9692         IRTemp reso = newTemp(Ity_I32);
   9693 
   9694         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9695         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9696 
   9697         assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
   9698         if (isT)
   9699            putIRegT( regD, mkexpr(res), condT );
   9700         else
   9701            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9702 
   9703         assign(reso, unop(Iop_Not32,
   9704                           binop(Iop_HSub8Ux4, mkexpr(rNt), mkexpr(rMt))));
   9705         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
   9706 
   9707         DIP("usub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9708         return True;
   9709      }
   9710      /* fall through */
   9711    }
   9712 
   9713    /* ------------------- ssub8<c> <Rd>,<Rn>,<Rm> ------------------ */
   9714    {
   9715      UInt regD = 99, regN = 99, regM = 99;
   9716      Bool gate = False;
   9717 
   9718      if (isT) {
   9719         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
   9720            regN = INSNT0(3,0);
   9721            regD = INSNT1(11,8);
   9722            regM = INSNT1(3,0);
   9723            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9724               gate = True;
   9725         }
   9726      } else {
   9727         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
   9728             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9729             INSNA(7,4)   == BITS4(1,1,1,1)) {
   9730            regD = INSNA(15,12);
   9731            regN = INSNA(19,16);
   9732            regM = INSNA(3,0);
   9733            if (regD != 15 && regN != 15 && regM != 15)
   9734               gate = True;
   9735         }
   9736      }
   9737 
   9738      if (gate) {
   9739         IRTemp rNt  = newTemp(Ity_I32);
   9740         IRTemp rMt  = newTemp(Ity_I32);
   9741         IRTemp res  = newTemp(Ity_I32);
   9742         IRTemp reso = newTemp(Ity_I32);
   9743 
   9744         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9745         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9746 
   9747         assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
   9748         if (isT)
   9749            putIRegT( regD, mkexpr(res), condT );
   9750         else
   9751            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9752 
   9753         assign(reso, unop(Iop_Not32,
   9754                           binop(Iop_HSub8Sx4, mkexpr(rNt), mkexpr(rMt))));
   9755         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
   9756 
   9757         DIP("ssub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9758         return True;
   9759      }
   9760      /* fall through */
   9761    }
   9762 
   9763    /* ------------------ qadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
   9764    {
   9765      UInt regD = 99, regN = 99, regM = 99;
   9766      Bool gate = False;
   9767 
   9768      if (isT) {
   9769         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
   9770            regN = INSNT0(3,0);
   9771            regD = INSNT1(11,8);
   9772            regM = INSNT1(3,0);
   9773            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9774               gate = True;
   9775         }
   9776      } else {
   9777         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
   9778             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9779             INSNA(7,4)   == BITS4(1,0,0,1)) {
   9780            regD = INSNA(15,12);
   9781            regN = INSNA(19,16);
   9782            regM = INSNA(3,0);
   9783            if (regD != 15 && regN != 15 && regM != 15)
   9784               gate = True;
   9785         }
   9786      }
   9787 
   9788      if (gate) {
   9789         IRTemp rNt   = newTemp(Ity_I32);
   9790         IRTemp rMt   = newTemp(Ity_I32);
   9791         IRTemp res_q = newTemp(Ity_I32);
   9792 
   9793         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9794         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9795 
   9796         assign(res_q, binop(Iop_QAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
   9797         if (isT)
   9798            putIRegT( regD, mkexpr(res_q), condT );
   9799         else
   9800            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   9801 
   9802         DIP("qadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9803         return True;
   9804      }
   9805      /* fall through */
   9806    }
   9807 
   9808    /* ------------------ qsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
   9809    {
   9810      UInt regD = 99, regN = 99, regM = 99;
   9811      Bool gate = False;
   9812 
   9813      if (isT) {
   9814         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
   9815            regN = INSNT0(3,0);
   9816            regD = INSNT1(11,8);
   9817            regM = INSNT1(3,0);
   9818            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9819               gate = True;
   9820         }
   9821      } else {
   9822         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
   9823             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9824             INSNA(7,4)   == BITS4(1,1,1,1)) {
   9825            regD = INSNA(15,12);
   9826            regN = INSNA(19,16);
   9827            regM = INSNA(3,0);
   9828            if (regD != 15 && regN != 15 && regM != 15)
   9829               gate = True;
   9830         }
   9831      }
   9832 
   9833      if (gate) {
   9834         IRTemp rNt   = newTemp(Ity_I32);
   9835         IRTemp rMt   = newTemp(Ity_I32);
   9836         IRTemp res_q = newTemp(Ity_I32);
   9837 
   9838         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9839         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9840 
   9841         assign(res_q, binop(Iop_QSub8Sx4, mkexpr(rNt), mkexpr(rMt)));
   9842         if (isT)
   9843            putIRegT( regD, mkexpr(res_q), condT );
   9844         else
   9845            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   9846 
   9847         DIP("qsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9848         return True;
   9849      }
   9850      /* fall through */
   9851    }
   9852 
   9853    /* ------------------ uqadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
   9854    {
   9855      UInt regD = 99, regN = 99, regM = 99;
   9856      Bool gate = False;
   9857 
   9858      if (isT) {
   9859         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
   9860            regN = INSNT0(3,0);
   9861            regD = INSNT1(11,8);
   9862            regM = INSNT1(3,0);
   9863            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9864               gate = True;
   9865         }
   9866      } else {
   9867         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
   9868             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9869             (INSNA(7,4)  == BITS4(1,0,0,1))) {
   9870            regD = INSNA(15,12);
   9871            regN = INSNA(19,16);
   9872            regM = INSNA(3,0);
   9873            if (regD != 15 && regN != 15 && regM != 15)
   9874               gate = True;
   9875         }
   9876      }
   9877 
   9878      if (gate) {
   9879         IRTemp rNt   = newTemp(Ity_I32);
   9880         IRTemp rMt   = newTemp(Ity_I32);
   9881         IRTemp res_q = newTemp(Ity_I32);
   9882 
   9883         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9884         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9885 
   9886         assign(res_q, binop(Iop_QAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
   9887         if (isT)
   9888            putIRegT( regD, mkexpr(res_q), condT );
   9889         else
   9890            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   9891 
   9892         DIP("uqadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9893         return True;
   9894      }
   9895      /* fall through */
   9896    }
   9897 
   9898    /* ------------------ uqsub8<c> <Rd>,<Rn>,<Rm> ------------------ */
   9899    {
   9900      UInt regD = 99, regN = 99, regM = 99;
   9901      Bool gate = False;
   9902 
   9903      if (isT) {
   9904         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
   9905            regN = INSNT0(3,0);
   9906            regD = INSNT1(11,8);
   9907            regM = INSNT1(3,0);
   9908            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9909               gate = True;
   9910         }
   9911      } else {
   9912         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
   9913             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9914             (INSNA(7,4)  == BITS4(1,1,1,1))) {
   9915            regD = INSNA(15,12);
   9916            regN = INSNA(19,16);
   9917            regM = INSNA(3,0);
   9918            if (regD != 15 && regN != 15 && regM != 15)
   9919              gate = True;
   9920         }
   9921      }
   9922 
   9923      if (gate) {
   9924         IRTemp rNt   = newTemp(Ity_I32);
   9925         IRTemp rMt   = newTemp(Ity_I32);
   9926         IRTemp res_q = newTemp(Ity_I32);
   9927 
   9928         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9929         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9930 
   9931         assign(res_q, binop(Iop_QSub8Ux4, mkexpr(rNt), mkexpr(rMt)));
   9932         if (isT)
   9933            putIRegT( regD, mkexpr(res_q), condT );
   9934         else
   9935            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   9936 
   9937         DIP("uqsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9938         return True;
   9939      }
   9940      /* fall through */
   9941    }
   9942 
   9943    /* ----------------- uhadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
   9944    {
   9945      UInt regD = 99, regN = 99, regM = 99;
   9946      Bool gate = False;
   9947 
   9948      if (isT) {
   9949         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
   9950            regN = INSNT0(3,0);
   9951            regD = INSNT1(11,8);
   9952            regM = INSNT1(3,0);
   9953            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9954               gate = True;
   9955         }
   9956      } else {
   9957         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
   9958             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9959             INSNA(7,4)   == BITS4(1,0,0,1)) {
   9960            regD = INSNA(15,12);
   9961            regN = INSNA(19,16);
   9962            regM = INSNA(3,0);
   9963            if (regD != 15 && regN != 15 && regM != 15)
   9964               gate = True;
   9965         }
   9966      }
   9967 
   9968      if (gate) {
   9969         IRTemp rNt   = newTemp(Ity_I32);
   9970         IRTemp rMt   = newTemp(Ity_I32);
   9971         IRTemp res_q = newTemp(Ity_I32);
   9972 
   9973         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9974         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9975 
   9976         assign(res_q, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
   9977         if (isT)
   9978            putIRegT( regD, mkexpr(res_q), condT );
   9979         else
   9980            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   9981 
   9982         DIP("uhadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9983         return True;
   9984      }
   9985      /* fall through */
   9986    }
   9987 
   9988    /* ----------------- uhadd16<c> <Rd>,<Rn>,<Rm> ------------------- */
   9989    {
   9990      UInt regD = 99, regN = 99, regM = 99;
   9991      Bool gate = False;
   9992 
   9993      if (isT) {
   9994         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
   9995            regN = INSNT0(3,0);
   9996            regD = INSNT1(11,8);
   9997            regM = INSNT1(3,0);
   9998            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9999               gate = True;
   10000         }
   10001      } else {
   10002         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
   10003             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10004             INSNA(7,4)   == BITS4(0,0,0,1)) {
   10005            regD = INSNA(15,12);
   10006            regN = INSNA(19,16);
   10007            regM = INSNA(3,0);
   10008            if (regD != 15 && regN != 15 && regM != 15)
   10009               gate = True;
   10010         }
   10011      }
   10012 
   10013      if (gate) {
   10014         IRTemp rNt   = newTemp(Ity_I32);
   10015         IRTemp rMt   = newTemp(Ity_I32);
   10016         IRTemp res_q = newTemp(Ity_I32);
   10017 
   10018         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   10019         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   10020 
   10021         assign(res_q, binop(Iop_HAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
   10022         if (isT)
   10023            putIRegT( regD, mkexpr(res_q), condT );
   10024         else
   10025            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   10026 
   10027         DIP("uhadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   10028         return True;
   10029      }
   10030      /* fall through */
   10031    }
   10032 
   10033    /* ----------------- shadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
   10034    {
   10035      UInt regD = 99, regN = 99, regM = 99;
   10036      Bool gate = False;
   10037 
   10038      if (isT) {
   10039         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
   10040            regN = INSNT0(3,0);
   10041            regD = INSNT1(11,8);
   10042            regM = INSNT1(3,0);
   10043            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10044               gate = True;
   10045         }
   10046      } else {
   10047         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
   10048             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10049             INSNA(7,4)   == BITS4(1,0,0,1)) {
   10050            regD = INSNA(15,12);
   10051            regN = INSNA(19,16);
   10052            regM = INSNA(3,0);
   10053            if (regD != 15 && regN != 15 && regM != 15)
   10054               gate = True;
   10055         }
   10056      }
   10057 
   10058      if (gate) {
   10059         IRTemp rNt   = newTemp(Ity_I32);
   10060         IRTemp rMt   = newTemp(Ity_I32);
   10061         IRTemp res_q = newTemp(Ity_I32);
   10062 
   10063         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   10064         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   10065 
   10066         assign(res_q, binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
   10067         if (isT)
   10068            putIRegT( regD, mkexpr(res_q), condT );
   10069         else
   10070            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   10071 
   10072         DIP("shadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   10073         return True;
   10074      }
   10075      /* fall through */
   10076    }
   10077 
   10078    /* ------------------ qadd16<c> <Rd>,<Rn>,<Rm> ------------------ */
   10079    {
   10080      UInt regD = 99, regN = 99, regM = 99;
   10081      Bool gate = False;
   10082 
   10083      if (isT) {
   10084         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
   10085            regN = INSNT0(3,0);
   10086            regD = INSNT1(11,8);
   10087            regM = INSNT1(3,0);
   10088            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10089               gate = True;
   10090         }
   10091      } else {
   10092         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
   10093             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10094             INSNA(7,4)   == BITS4(0,0,0,1)) {
   10095            regD = INSNA(15,12);
   10096            regN = INSNA(19,16);
   10097            regM = INSNA(3,0);
   10098            if (regD != 15 && regN != 15 && regM != 15)
   10099               gate = True;
   10100         }
   10101      }
   10102 
   10103      if (gate) {
   10104         IRTemp rNt   = newTemp(Ity_I32);
   10105         IRTemp rMt   = newTemp(Ity_I32);
   10106         IRTemp res_q = newTemp(Ity_I32);
   10107 
   10108         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   10109         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   10110 
   10111         assign(res_q, binop(Iop_QAdd16Sx2, mkexpr(rNt), mkexpr(rMt)));
   10112         if (isT)
   10113            putIRegT( regD, mkexpr(res_q), condT );
   10114         else
   10115            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   10116 
   10117         DIP("qadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   10118         return True;
   10119      }
   10120      /* fall through */
   10121    }
   10122 
   10123    /* ------------------ qsub16<c> <Rd>,<Rn>,<Rm> ------------------ */
   10124    {
   10125      UInt regD = 99, regN = 99, regM = 99;
   10126      Bool gate = False;
   10127 
   10128       if (isT) {
   10129         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
   10130            regN = INSNT0(3,0);
   10131            regD = INSNT1(11,8);
   10132            regM = INSNT1(3,0);
   10133            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10134               gate = True;
   10135         }
   10136      } else {
   10137         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
   10138             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10139             INSNA(7,4)   == BITS4(0,1,1,1)) {
   10140            regD = INSNA(15,12);
   10141            regN = INSNA(19,16);
   10142            regM = INSNA(3,0);
   10143            if (regD != 15 && regN != 15 && regM != 15)
   10144              gate = True;
   10145         }
   10146      }
   10147 
   10148      if (gate) {
   10149         IRTemp rNt   = newTemp(Ity_I32);
   10150         IRTemp rMt   = newTemp(Ity_I32);
   10151         IRTemp res_q = newTemp(Ity_I32);
   10152 
   10153         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   10154         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   10155 
   10156         assign(res_q, binop(Iop_QSub16Sx2, mkexpr(rNt), mkexpr(rMt)));
   10157         if (isT)
   10158            putIRegT( regD, mkexpr(res_q), condT );
   10159         else
   10160            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   10161 
   10162         DIP("qsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   10163         return True;
   10164      }
   10165      /* fall through */
   10166    }
   10167 
   10168    /* ------------------- qsax<c> <Rd>,<Rn>,<Rm> ------------------- */
   10169    /* note: the hardware seems to construct the result differently
   10170       from wot the manual says. */
   10171    {
   10172      UInt regD = 99, regN = 99, regM = 99;
   10173      Bool gate = False;
   10174 
   10175      if (isT) {
   10176         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
   10177            regN = INSNT0(3,0);
   10178            regD = INSNT1(11,8);
   10179            regM = INSNT1(3,0);
   10180            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10181               gate = True;
   10182         }
   10183      } else {
   10184         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
   10185             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10186             INSNA(7,4)   == BITS4(0,1,0,1)) {
   10187            regD = INSNA(15,12);
   10188            regN = INSNA(19,16);
   10189            regM = INSNA(3,0);
   10190            if (regD != 15 && regN != 15 && regM != 15)
   10191               gate = True;
   10192         }
   10193      }
   10194 
   10195      if (gate) {
   10196         IRTemp irt_regN     = newTemp(Ity_I32);
   10197         IRTemp irt_regM     = newTemp(Ity_I32);
   10198         IRTemp irt_sum      = newTemp(Ity_I32);
   10199         IRTemp irt_diff     = newTemp(Ity_I32);
   10200         IRTemp irt_sum_res  = newTemp(Ity_I32);
   10201         IRTemp irt_diff_res = newTemp(Ity_I32);
   10202 
   10203         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   10204         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   10205 
   10206         assign( irt_diff,
   10207                 binop( Iop_Sub32,
   10208                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
   10209                        binop( Iop_Sar32,
   10210                               binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
   10211                               mkU8(16) ) ) );
   10212         armSignedSatQ( irt_diff, 0x10, &irt_diff_res, NULL);
   10213 
   10214         assign( irt_sum,
   10215                 binop( Iop_Add32,
   10216                        binop( Iop_Sar32,
   10217                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   10218                               mkU8(16) ),
   10219                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) )) );
   10220         armSignedSatQ( irt_sum, 0x10, &irt_sum_res, NULL );
   10221 
   10222         IRExpr* ire_result = binop( Iop_Or32,
   10223                                     binop( Iop_Shl32, mkexpr(irt_diff_res),
   10224                                            mkU8(16) ),
   10225                                     binop( Iop_And32, mkexpr(irt_sum_res),
   10226                                            mkU32(0xFFFF)) );
   10227 
   10228         if (isT)
   10229            putIRegT( regD, ire_result, condT );
   10230         else
   10231            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10232 
   10233         DIP( "qsax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   10234         return True;
   10235      }
   10236      /* fall through */
   10237    }
   10238 
   10239    /* ------------------- qasx<c> <Rd>,<Rn>,<Rm> ------------------- */
   10240    {
   10241      UInt regD = 99, regN = 99, regM = 99;
   10242      Bool gate = False;
   10243 
   10244      if (isT) {
   10245         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
   10246            regN = INSNT0(3,0);
   10247            regD = INSNT1(11,8);
   10248            regM = INSNT1(3,0);
   10249            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10250               gate = True;
   10251         }
   10252      } else {
   10253         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
   10254             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10255             INSNA(7,4)   == BITS4(0,0,1,1)) {
   10256            regD = INSNA(15,12);
   10257            regN = INSNA(19,16);
   10258            regM = INSNA(3,0);
   10259            if (regD != 15 && regN != 15 && regM != 15)
   10260               gate = True;
   10261         }
   10262      }
   10263 
   10264      if (gate) {
   10265         IRTemp irt_regN     = newTemp(Ity_I32);
   10266         IRTemp irt_regM     = newTemp(Ity_I32);
   10267         IRTemp irt_sum      = newTemp(Ity_I32);
   10268         IRTemp irt_diff     = newTemp(Ity_I32);
   10269         IRTemp irt_res_sum  = newTemp(Ity_I32);
   10270         IRTemp irt_res_diff = newTemp(Ity_I32);
   10271 
   10272         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   10273         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   10274 
   10275         assign( irt_diff,
   10276                 binop( Iop_Sub32,
   10277                        binop( Iop_Sar32,
   10278                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   10279                               mkU8(16) ),
   10280                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
   10281         armSignedSatQ( irt_diff, 0x10, &irt_res_diff, NULL );
   10282 
   10283         assign( irt_sum,
   10284                 binop( Iop_Add32,
   10285                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
   10286                        binop( Iop_Sar32,
   10287                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
   10288                               mkU8(16) ) ) );
   10289         armSignedSatQ( irt_sum, 0x10, &irt_res_sum, NULL );
   10290 
   10291         IRExpr* ire_result
   10292           = binop( Iop_Or32,
   10293                    binop( Iop_Shl32, mkexpr(irt_res_sum), mkU8(16) ),
   10294                    binop( Iop_And32, mkexpr(irt_res_diff), mkU32(0xFFFF) ) );
   10295 
   10296         if (isT)
   10297            putIRegT( regD, ire_result, condT );
   10298         else
   10299            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10300 
   10301         DIP( "qasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   10302         return True;
   10303      }
   10304      /* fall through */
   10305    }
   10306 
   10307    /* ------------------- sasx<c> <Rd>,<Rn>,<Rm> ------------------- */
   10308    {
   10309      UInt regD = 99, regN = 99, regM = 99;
   10310      Bool gate = False;
   10311 
   10312      if (isT) {
   10313         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
   10314            regN = INSNT0(3,0);
   10315            regD = INSNT1(11,8);
   10316            regM = INSNT1(3,0);
   10317            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10318               gate = True;
   10319         }
   10320      } else {
   10321         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
   10322             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10323             INSNA(7,4)   == BITS4(0,0,1,1)) {
   10324            regD = INSNA(15,12);
   10325            regN = INSNA(19,16);
   10326            regM = INSNA(3,0);
   10327            if (regD != 15 && regN != 15 && regM != 15)
   10328               gate = True;
   10329         }
   10330      }
   10331 
   10332      if (gate) {
   10333         IRTemp irt_regN = newTemp(Ity_I32);
   10334         IRTemp irt_regM = newTemp(Ity_I32);
   10335         IRTemp irt_sum  = newTemp(Ity_I32);
   10336         IRTemp irt_diff = newTemp(Ity_I32);
   10337 
   10338         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   10339         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   10340 
   10341         assign( irt_diff,
   10342                 binop( Iop_Sub32,
   10343                        binop( Iop_Sar32,
   10344                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   10345                               mkU8(16) ),
   10346                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
   10347 
   10348         assign( irt_sum,
   10349                 binop( Iop_Add32,
   10350                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
   10351                        binop( Iop_Sar32,
   10352                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
   10353                               mkU8(16) ) ) );
   10354 
   10355         IRExpr* ire_result
   10356           = binop( Iop_Or32,
   10357                    binop( Iop_Shl32, mkexpr(irt_sum), mkU8(16) ),
   10358                    binop( Iop_And32, mkexpr(irt_diff), mkU32(0xFFFF) ) );
   10359 
   10360         IRTemp ge10 = newTemp(Ity_I32);
   10361         assign(ge10, unop(Iop_Not32, mkexpr(irt_diff)));
   10362         put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
   10363         put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
   10364 
   10365         IRTemp ge32 = newTemp(Ity_I32);
   10366         assign(ge32, unop(Iop_Not32, mkexpr(irt_sum)));
   10367         put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
   10368         put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
   10369 
   10370         if (isT)
   10371            putIRegT( regD, ire_result, condT );
   10372         else
   10373            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10374 
   10375         DIP( "sasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   10376         return True;
   10377      }
   10378      /* fall through */
   10379    }
   10380 
   10381    /* --------------- smuad, smuadx<c><Rd>,<Rn>,<Rm> --------------- */
   10382    /* --------------- smsad, smsadx<c><Rd>,<Rn>,<Rm> --------------- */
   10383    {
   10384      UInt regD = 99, regN = 99, regM = 99, bitM = 99;
   10385      Bool gate = False, isAD = False;
   10386 
   10387      if (isT) {
   10388         if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
   10389             && (INSNT1(15,0) & 0xF0E0) == 0xF000) {
   10390            regN = INSNT0(3,0);
   10391            regD = INSNT1(11,8);
   10392            regM = INSNT1(3,0);
   10393            bitM = INSNT1(4,4);
   10394            isAD = INSNT0(15,4) == 0xFB2;
   10395            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10396               gate = True;
   10397         }
   10398      } else {
   10399         if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
   10400             INSNA(15,12) == BITS4(1,1,1,1)         &&
   10401             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1) ) {
   10402            regD = INSNA(19,16);
   10403            regN = INSNA(3,0);
   10404            regM = INSNA(11,8);
   10405            bitM = INSNA(5,5);
   10406            isAD = INSNA(6,6) == 0;
   10407            if (regD != 15 && regN != 15 && regM != 15)
   10408               gate = True;
   10409         }
   10410      }
   10411 
   10412      if (gate) {
   10413         IRTemp irt_regN    = newTemp(Ity_I32);
   10414         IRTemp irt_regM    = newTemp(Ity_I32);
   10415         IRTemp irt_prod_lo = newTemp(Ity_I32);
   10416         IRTemp irt_prod_hi = newTemp(Ity_I32);
   10417         IRTemp tmpM        = newTemp(Ity_I32);
   10418 
   10419         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   10420 
   10421         assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
   10422         assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
   10423 
   10424         assign( irt_prod_lo,
   10425                 binop( Iop_Mul32,
   10426                        binop( Iop_Sar32,
   10427                               binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
   10428                               mkU8(16) ),
   10429                        binop( Iop_Sar32,
   10430                               binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
   10431                               mkU8(16) ) ) );
   10432         assign( irt_prod_hi, binop(Iop_Mul32,
   10433                                    binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)),
   10434                                    binop(Iop_Sar32, mkexpr(irt_regM), mkU8(16))) );
   10435         IRExpr* ire_result
   10436            = binop( isAD ? Iop_Add32 : Iop_Sub32,
   10437                     mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) );
   10438 
   10439         if (isT)
   10440            putIRegT( regD, ire_result, condT );
   10441         else
   10442            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10443 
   10444         if (isAD) {
   10445            or_into_QFLAG32(
   10446               signed_overflow_after_Add32( ire_result,
   10447                                            irt_prod_lo, irt_prod_hi ),
   10448               condT
   10449            );
   10450         }
   10451 
   10452         DIP("smu%cd%s%s r%u, r%u, r%u\n",
   10453             isAD ? 'a' : 's',
   10454             bitM ? "x" : "", nCC(conq), regD, regN, regM);
   10455         return True;
   10456      }
   10457      /* fall through */
   10458    }
   10459 
   10460    /* --------------- smlad{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
   10461    /* --------------- smlsd{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
   10462    {
   10463      UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
   10464      Bool gate = False, isAD = False;
   10465 
   10466      if (isT) {
   10467        if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
   10468            && INSNT1(7,5) == BITS3(0,0,0)) {
   10469            regN = INSNT0(3,0);
   10470            regD = INSNT1(11,8);
   10471            regM = INSNT1(3,0);
   10472            regA = INSNT1(15,12);
   10473            bitM = INSNT1(4,4);
   10474            isAD = INSNT0(15,4) == 0xFB2;
   10475            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
   10476                && !isBadRegT(regA))
   10477               gate = True;
   10478         }
   10479      } else {
   10480         if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
   10481             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
   10482            regD = INSNA(19,16);
   10483            regA = INSNA(15,12);
   10484            regN = INSNA(3,0);
   10485            regM = INSNA(11,8);
   10486            bitM = INSNA(5,5);
   10487            isAD = INSNA(6,6) == 0;
   10488            if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
   10489               gate = True;
   10490         }
   10491      }
   10492 
   10493      if (gate) {
   10494         IRTemp irt_regN    = newTemp(Ity_I32);
   10495         IRTemp irt_regM    = newTemp(Ity_I32);
   10496         IRTemp irt_regA    = newTemp(Ity_I32);
   10497         IRTemp irt_prod_lo = newTemp(Ity_I32);
   10498         IRTemp irt_prod_hi = newTemp(Ity_I32);
   10499         IRTemp irt_sum     = newTemp(Ity_I32);
   10500         IRTemp tmpM        = newTemp(Ity_I32);
   10501 
   10502         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   10503         assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
   10504 
   10505         assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
   10506         assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
   10507 
   10508         assign( irt_prod_lo,
   10509                 binop(Iop_Mul32,
   10510                       binop(Iop_Sar32,
   10511                             binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   10512                             mkU8(16)),
   10513                       binop(Iop_Sar32,
   10514                             binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
   10515                             mkU8(16))) );
   10516         assign( irt_prod_hi,
   10517                 binop( Iop_Mul32,
   10518                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
   10519                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
   10520         assign( irt_sum, binop( isAD ? Iop_Add32 : Iop_Sub32,
   10521                                 mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) ) );
   10522 
   10523         IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_sum), mkexpr(irt_regA));
   10524 
   10525         if (isT)
   10526            putIRegT( regD, ire_result, condT );
   10527         else
   10528            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10529 
   10530         if (isAD) {
   10531            or_into_QFLAG32(
   10532               signed_overflow_after_Add32( mkexpr(irt_sum),
   10533                                            irt_prod_lo, irt_prod_hi ),
   10534               condT
   10535            );
   10536         }
   10537 
   10538         or_into_QFLAG32(
   10539            signed_overflow_after_Add32( ire_result, irt_sum, irt_regA ),
   10540            condT
   10541         );
   10542 
   10543         DIP("sml%cd%s%s r%u, r%u, r%u, r%u\n",
   10544             isAD ? 'a' : 's',
   10545             bitM ? "x" : "", nCC(conq), regD, regN, regM, regA);
   10546         return True;
   10547      }
   10548      /* fall through */
   10549    }
   10550 
   10551    /* ----- smlabb, smlabt, smlatb, smlatt <Rd>,<Rn>,<Rm>,<Ra> ----- */
   10552    {
   10553      UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99, bitN = 99;
   10554      Bool gate = False;
   10555 
   10556      if (isT) {
   10557         if (INSNT0(15,4) == 0xFB1 && INSNT1(7,6) == BITS2(0,0)) {
   10558            regN = INSNT0(3,0);
   10559            regD = INSNT1(11,8);
   10560            regM = INSNT1(3,0);
   10561            regA = INSNT1(15,12);
   10562            bitM = INSNT1(4,4);
   10563            bitN = INSNT1(5,5);
   10564            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
   10565                && !isBadRegT(regA))
   10566               gate = True;
   10567         }
   10568      } else {
   10569         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
   10570             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(1,0,0,0)) {
   10571            regD = INSNA(19,16);
   10572            regN = INSNA(3,0);
   10573            regM = INSNA(11,8);
   10574            regA = INSNA(15,12);
   10575            bitM = INSNA(6,6);
   10576            bitN = INSNA(5,5);
   10577            if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
   10578               gate = True;
   10579         }
   10580      }
   10581 
   10582      if (gate) {
   10583         IRTemp irt_regA = newTemp(Ity_I32);
   10584         IRTemp irt_prod = newTemp(Ity_I32);
   10585 
   10586         assign( irt_prod,
   10587                 binop(Iop_Mul32,
   10588                       binop(Iop_Sar32,
   10589                             binop(Iop_Shl32,
   10590                                   isT ? getIRegT(regN) : getIRegA(regN),
   10591                                   mkU8(bitN ? 0 : 16)),
   10592                             mkU8(16)),
   10593                       binop(Iop_Sar32,
   10594                             binop(Iop_Shl32,
   10595                                   isT ? getIRegT(regM) : getIRegA(regM),
   10596                                   mkU8(bitM ? 0 : 16)),
   10597                             mkU8(16))) );
   10598 
   10599         assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
   10600 
   10601         IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_prod), mkexpr(irt_regA));
   10602 
   10603         if (isT)
   10604            putIRegT( regD, ire_result, condT );
   10605         else
   10606            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10607 
   10608         or_into_QFLAG32(
   10609            signed_overflow_after_Add32( ire_result, irt_prod, irt_regA ),
   10610            condT
   10611         );
   10612 
   10613         DIP( "smla%c%c%s r%u, r%u, r%u, r%u\n",
   10614              bitN ? 't' : 'b', bitM ? 't' : 'b',
   10615              nCC(conq), regD, regN, regM, regA );
   10616         return True;
   10617      }
   10618      /* fall through */
   10619    }
   10620 
   10621    /* ----- smlalbb, smlalbt, smlaltb, smlaltt <Rd>,<Rn>,<Rm>,<Ra> ----- */
   10622    {
   10623      UInt regDHi = 99, regN = 99, regM = 99, regDLo = 99, bitM = 99, bitN = 99;
   10624      Bool gate = False;
   10625 
   10626      if (isT) {
   10627         if (INSNT0(15,4) == 0xFBC && INSNT1(7,6) == BITS2(1,0)) {
   10628            regN   = INSNT0(3,0);
   10629            regDHi = INSNT1(11,8);
   10630            regM   = INSNT1(3,0);
   10631            regDLo = INSNT1(15,12);
   10632            bitM   = INSNT1(4,4);
   10633            bitN   = INSNT1(5,5);
   10634            if (!isBadRegT(regDHi) && !isBadRegT(regN) && !isBadRegT(regM)
   10635                && !isBadRegT(regDLo) && regDHi != regDLo)
   10636               gate = True;
   10637         }
   10638      } else {
   10639         if (INSNA(27,20) == BITS8(0,0,0,1,0,1,0,0) &&
   10640             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(1,0,0,0)) {
   10641            regDHi = INSNA(19,16);
   10642            regN   = INSNA(3,0);
   10643            regM   = INSNA(11,8);
   10644            regDLo = INSNA(15,12);
   10645            bitM   = INSNA(6,6);
   10646            bitN   = INSNA(5,5);
   10647            if (regDHi != 15 && regN != 15 && regM != 15 && regDLo != 15 &&
   10648                regDHi != regDLo)
   10649               gate = True;
   10650         }
   10651      }
   10652 
   10653      if (gate) {
   10654         IRTemp irt_regD  = newTemp(Ity_I64);
   10655         IRTemp irt_prod  = newTemp(Ity_I64);
   10656         IRTemp irt_res   = newTemp(Ity_I64);
   10657         IRTemp irt_resHi = newTemp(Ity_I32);
   10658         IRTemp irt_resLo = newTemp(Ity_I32);
   10659 
   10660         assign( irt_prod,
   10661                 binop(Iop_MullS32,
   10662                       binop(Iop_Sar32,
   10663                             binop(Iop_Shl32,
   10664                                   isT ? getIRegT(regN) : getIRegA(regN),
   10665                                   mkU8(bitN ? 0 : 16)),
   10666                             mkU8(16)),
   10667                       binop(Iop_Sar32,
   10668                             binop(Iop_Shl32,
   10669                                   isT ? getIRegT(regM) : getIRegA(regM),
   10670                                   mkU8(bitM ? 0 : 16)),
   10671                             mkU8(16))) );
   10672 
   10673         assign( irt_regD, binop(Iop_32HLto64,
   10674                                 isT ? getIRegT(regDHi) : getIRegA(regDHi),
   10675                                 isT ? getIRegT(regDLo) : getIRegA(regDLo)) );
   10676         assign( irt_res, binop(Iop_Add64, mkexpr(irt_regD), mkexpr(irt_prod)) );
   10677         assign( irt_resHi, unop(Iop_64HIto32, mkexpr(irt_res)) );
   10678         assign( irt_resLo, unop(Iop_64to32, mkexpr(irt_res)) );
   10679 
   10680         if (isT) {
   10681            putIRegT( regDHi, mkexpr(irt_resHi), condT );
   10682            putIRegT( regDLo, mkexpr(irt_resLo), condT );
   10683         } else {
   10684            putIRegA( regDHi, mkexpr(irt_resHi), condT, Ijk_Boring );
   10685            putIRegA( regDLo, mkexpr(irt_resLo), condT, Ijk_Boring );
   10686         }
   10687 
   10688         DIP( "smlal%c%c%s r%u, r%u, r%u, r%u\n",
   10689              bitN ? 't' : 'b', bitM ? 't' : 'b',
   10690              nCC(conq), regDHi, regN, regM, regDLo );
   10691         return True;
   10692      }
   10693      /* fall through */
   10694    }
   10695 
   10696    /* ----- smlawb, smlawt <Rd>,<Rn>,<Rm>,<Ra> ----- */
   10697    {
   10698      UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
   10699      Bool gate = False;
   10700 
   10701      if (isT) {
   10702         if (INSNT0(15,4) == 0xFB3 && INSNT1(7,5) == BITS3(0,0,0)) {
   10703            regN = INSNT0(3,0);
   10704            regD = INSNT1(11,8);
   10705            regM = INSNT1(3,0);
   10706            regA = INSNT1(15,12);
   10707            bitM = INSNT1(4,4);
   10708            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
   10709                && !isBadRegT(regA))
   10710               gate = True;
   10711         }
   10712      } else {
   10713         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
   10714             (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,0,0)) {
   10715            regD = INSNA(19,16);
   10716            regN = INSNA(3,0);
   10717            regM = INSNA(11,8);
   10718            regA = INSNA(15,12);
   10719            bitM = INSNA(6,6);
   10720            if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
   10721               gate = True;
   10722         }
   10723      }
   10724 
   10725      if (gate) {
   10726         IRTemp irt_regA = newTemp(Ity_I32);
   10727         IRTemp irt_prod = newTemp(Ity_I64);
   10728 
   10729         assign( irt_prod,
   10730                 binop(Iop_MullS32,
   10731                       isT ? getIRegT(regN) : getIRegA(regN),
   10732                       binop(Iop_Sar32,
   10733                             binop(Iop_Shl32,
   10734                                   isT ? getIRegT(regM) : getIRegA(regM),
   10735                                   mkU8(bitM ? 0 : 16)),
   10736                             mkU8(16))) );
   10737 
   10738         assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
   10739 
   10740         IRTemp prod32 = newTemp(Ity_I32);
   10741         assign(prod32,
   10742                binop(Iop_Or32,
   10743                      binop(Iop_Shl32, unop(Iop_64HIto32, mkexpr(irt_prod)), mkU8(16)),
   10744                      binop(Iop_Shr32, unop(Iop_64to32, mkexpr(irt_prod)), mkU8(16))
   10745         ));
   10746 
   10747         IRExpr* ire_result = binop(Iop_Add32, mkexpr(prod32), mkexpr(irt_regA));
   10748 
   10749         if (isT)
   10750            putIRegT( regD, ire_result, condT );
   10751         else
   10752            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10753 
   10754         or_into_QFLAG32(
   10755            signed_overflow_after_Add32( ire_result, prod32, irt_regA ),
   10756            condT
   10757         );
   10758 
   10759         DIP( "smlaw%c%s r%u, r%u, r%u, r%u\n",
   10760              bitM ? 't' : 'b',
   10761              nCC(conq), regD, regN, regM, regA );
   10762         return True;
   10763      }
   10764      /* fall through */
   10765    }
   10766 
   10767    /* ------------------- sel<c> <Rd>,<Rn>,<Rm> -------------------- */
   10768    /* fixme: fix up the test in v6media.c so that we can pass the ge
   10769       flags as part of the test. */
   10770    {
   10771      UInt regD = 99, regN = 99, regM = 99;
   10772      Bool gate = False;
   10773 
   10774      if (isT) {
   10775         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
   10776            regN = INSNT0(3,0);
   10777            regD = INSNT1(11,8);
   10778            regM = INSNT1(3,0);
   10779            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10780               gate = True;
   10781         }
   10782      } else {
   10783         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
   10784             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10785             INSNA(7,4)   == BITS4(1,0,1,1)) {
   10786            regD = INSNA(15,12);
   10787            regN = INSNA(19,16);
   10788            regM = INSNA(3,0);
   10789            if (regD != 15 && regN != 15 && regM != 15)
   10790               gate = True;
   10791         }
   10792      }
   10793 
   10794      if (gate) {
   10795         IRTemp irt_ge_flag0 = newTemp(Ity_I32);
   10796         IRTemp irt_ge_flag1 = newTemp(Ity_I32);
   10797         IRTemp irt_ge_flag2 = newTemp(Ity_I32);
   10798         IRTemp irt_ge_flag3 = newTemp(Ity_I32);
   10799 
   10800         assign( irt_ge_flag0, get_GEFLAG32(0) );
   10801         assign( irt_ge_flag1, get_GEFLAG32(1) );
   10802         assign( irt_ge_flag2, get_GEFLAG32(2) );
   10803         assign( irt_ge_flag3, get_GEFLAG32(3) );
   10804 
   10805         IRExpr* ire_ge_flag0_or
   10806           = binop(Iop_Or32, mkexpr(irt_ge_flag0),
   10807                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag0)));
   10808         IRExpr* ire_ge_flag1_or
   10809           = binop(Iop_Or32, mkexpr(irt_ge_flag1),
   10810                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag1)));
   10811         IRExpr* ire_ge_flag2_or
   10812           = binop(Iop_Or32, mkexpr(irt_ge_flag2),
   10813                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag2)));
   10814         IRExpr* ire_ge_flag3_or
   10815           = binop(Iop_Or32, mkexpr(irt_ge_flag3),
   10816                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag3)));
   10817 
   10818         IRExpr* ire_ge_flags
   10819           = binop( Iop_Or32,
   10820                    binop(Iop_Or32,
   10821                          binop(Iop_And32,
   10822                                binop(Iop_Sar32, ire_ge_flag0_or, mkU8(31)),
   10823                                mkU32(0x000000ff)),
   10824                          binop(Iop_And32,
   10825                                binop(Iop_Sar32, ire_ge_flag1_or, mkU8(31)),
   10826                                mkU32(0x0000ff00))),
   10827                    binop(Iop_Or32,
   10828                          binop(Iop_And32,
   10829                                binop(Iop_Sar32, ire_ge_flag2_or, mkU8(31)),
   10830                                mkU32(0x00ff0000)),
   10831                          binop(Iop_And32,
   10832                                binop(Iop_Sar32, ire_ge_flag3_or, mkU8(31)),
   10833                                mkU32(0xff000000))) );
   10834 
   10835         IRExpr* ire_result
   10836           = binop(Iop_Or32,
   10837                   binop(Iop_And32,
   10838                         isT ? getIRegT(regN) : getIRegA(regN),
   10839                         ire_ge_flags ),
   10840                   binop(Iop_And32,
   10841                         isT ? getIRegT(regM) : getIRegA(regM),
   10842                         unop(Iop_Not32, ire_ge_flags)));
   10843 
   10844         if (isT)
   10845            putIRegT( regD, ire_result, condT );
   10846         else
   10847            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10848 
   10849         DIP("sel%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   10850         return True;
   10851      }
   10852      /* fall through */
   10853    }
   10854 
   10855    /* ----------------- uxtab16<c> Rd,Rn,Rm{,rot} ------------------ */
   10856    {
   10857      UInt regD = 99, regN = 99, regM = 99, rotate = 99;
   10858      Bool gate = False;
   10859 
   10860      if (isT) {
   10861         if (INSNT0(15,4) == 0xFA3 && (INSNT1(15,0) & 0xF0C0) == 0xF080) {
   10862            regN   = INSNT0(3,0);
   10863            regD   = INSNT1(11,8);
   10864            regM   = INSNT1(3,0);
   10865            rotate = INSNT1(5,4);
   10866            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10867               gate = True;
   10868         }
   10869      } else {
   10870         if (INSNA(27,20) == BITS8(0,1,1,0,1,1,0,0) &&
   10871             INSNA(9,4)   == BITS6(0,0,0,1,1,1) ) {
   10872            regD   = INSNA(15,12);
   10873            regN   = INSNA(19,16);
   10874            regM   = INSNA(3,0);
   10875            rotate = INSNA(11,10);
   10876            if (regD != 15 && regN != 15 && regM != 15)
   10877              gate = True;
   10878         }
   10879      }
   10880 
   10881      if (gate) {
   10882         IRTemp irt_regN = newTemp(Ity_I32);
   10883         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   10884 
   10885         IRTemp irt_regM = newTemp(Ity_I32);
   10886         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   10887 
   10888         IRTemp irt_rot = newTemp(Ity_I32);
   10889         assign( irt_rot, binop(Iop_And32,
   10890                                genROR32(irt_regM, 8 * rotate),
   10891                                mkU32(0x00FF00FF)) );
   10892 
   10893         IRExpr* resLo
   10894            = binop(Iop_And32,
   10895                    binop(Iop_Add32, mkexpr(irt_regN), mkexpr(irt_rot)),
   10896                    mkU32(0x0000FFFF));
   10897 
   10898         IRExpr* resHi
   10899            = binop(Iop_Add32,
   10900                    binop(Iop_And32, mkexpr(irt_regN), mkU32(0xFFFF0000)),
   10901                    binop(Iop_And32, mkexpr(irt_rot),  mkU32(0xFFFF0000)));
   10902 
   10903         IRExpr* ire_result
   10904            = binop( Iop_Or32, resHi, resLo );
   10905 
   10906         if (isT)
   10907            putIRegT( regD, ire_result, condT );
   10908         else
   10909            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10910 
   10911         DIP( "uxtab16%s r%u, r%u, r%u, ROR #%u\n",
   10912              nCC(conq), regD, regN, regM, 8 * rotate );
   10913         return True;
   10914      }
   10915      /* fall through */
   10916    }
   10917 
   10918    /* --------------- usad8  Rd,Rn,Rm    ---------------- */
   10919    /* --------------- usada8 Rd,Rn,Rm,Ra ---------------- */
   10920    {
   10921      UInt rD = 99, rN = 99, rM = 99, rA = 99;
   10922      Bool gate = False;
   10923 
   10924      if (isT) {
   10925        if (INSNT0(15,4) == 0xFB7 && INSNT1(7,4) == BITS4(0,0,0,0)) {
   10926            rN = INSNT0(3,0);
   10927            rA = INSNT1(15,12);
   10928            rD = INSNT1(11,8);
   10929            rM = INSNT1(3,0);
   10930            if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && rA != 13)
   10931               gate = True;
   10932         }
   10933      } else {
   10934         if (INSNA(27,20) == BITS8(0,1,1,1,1,0,0,0) &&
   10935             INSNA(7,4)   == BITS4(0,0,0,1) ) {
   10936            rD = INSNA(19,16);
   10937            rA = INSNA(15,12);
   10938            rM = INSNA(11,8);
   10939            rN = INSNA(3,0);
   10940            if (rD != 15 && rN != 15 && rM != 15 /* but rA can be 15 */)
   10941               gate = True;
   10942         }
   10943      }
   10944      /* We allow rA == 15, to denote the usad8 (no accumulator) case. */
   10945 
   10946      if (gate) {
   10947         IRExpr* rNe = isT ? getIRegT(rN) : getIRegA(rN);
   10948         IRExpr* rMe = isT ? getIRegT(rM) : getIRegA(rM);
   10949         IRExpr* rAe = rA == 15 ? mkU32(0)
   10950                                : (isT ? getIRegT(rA) : getIRegA(rA));
   10951         IRExpr* res = binop(Iop_Add32,
   10952                             binop(Iop_Sad8Ux4, rNe, rMe),
   10953                             rAe);
   10954         if (isT)
   10955            putIRegT( rD, res, condT );
   10956         else
   10957            putIRegA( rD, res, condT, Ijk_Boring );
   10958 
   10959         if (rA == 15) {
   10960            DIP( "usad8%s r%u, r%u, r%u\n",
   10961                 nCC(conq), rD, rN, rM );
   10962         } else {
   10963            DIP( "usada8%s r%u, r%u, r%u, r%u\n",
   10964                 nCC(conq), rD, rN, rM, rA );
   10965         }
   10966         return True;
   10967      }
   10968      /* fall through */
   10969    }
   10970 
   10971    /* ------------------ qadd<c> <Rd>,<Rn>,<Rm> ------------------- */
   10972    {
   10973      UInt regD = 99, regN = 99, regM = 99;
   10974      Bool gate = False;
   10975 
   10976      if (isT) {
   10977         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
   10978            regN = INSNT0(3,0);
   10979            regD = INSNT1(11,8);
   10980            regM = INSNT1(3,0);
   10981            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10982               gate = True;
   10983         }
   10984      } else {
   10985         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
   10986             INSNA(11,8)  == BITS4(0,0,0,0)         &&
   10987             INSNA(7,4)   == BITS4(0,1,0,1)) {
   10988            regD = INSNA(15,12);
   10989            regN = INSNA(19,16);
   10990            regM = INSNA(3,0);
   10991            if (regD != 15 && regN != 15 && regM != 15)
   10992               gate = True;
   10993         }
   10994      }
   10995 
   10996      if (gate) {
   10997         IRTemp rNt   = newTemp(Ity_I32);
   10998         IRTemp rMt   = newTemp(Ity_I32);
   10999         IRTemp res_q = newTemp(Ity_I32);
   11000 
   11001         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11002         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11003 
   11004         assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rNt)));
   11005         if (isT)
   11006            putIRegT( regD, mkexpr(res_q), condT );
   11007         else
   11008            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11009 
   11010         or_into_QFLAG32(
   11011            signed_overflow_after_Add32(
   11012               binop(Iop_Add32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
   11013            condT
   11014         );
   11015 
   11016         DIP("qadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
   11017         return True;
   11018      }
   11019      /* fall through */
   11020    }
   11021 
   11022    /* ------------------ qdadd<c> <Rd>,<Rm>,<Rn> ------------------- */
   11023    {
   11024      UInt regD = 99, regN = 99, regM = 99;
   11025      Bool gate = False;
   11026 
   11027      if (isT) {
   11028         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF090) {
   11029            regN = INSNT0(3,0);
   11030            regD = INSNT1(11,8);
   11031            regM = INSNT1(3,0);
   11032            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11033               gate = True;
   11034         }
   11035      } else {
   11036         if (INSNA(27,20) == BITS8(0,0,0,1,0,1,0,0) &&
   11037             INSNA(11,8)  == BITS4(0,0,0,0)         &&
   11038             INSNA(7,4)   == BITS4(0,1,0,1)) {
   11039            regD = INSNA(15,12);
   11040            regN = INSNA(19,16);
   11041            regM = INSNA(3,0);
   11042            if (regD != 15 && regN != 15 && regM != 15)
   11043               gate = True;
   11044         }
   11045      }
   11046 
   11047      if (gate) {
   11048         IRTemp rNt   = newTemp(Ity_I32);
   11049         IRTemp rMt   = newTemp(Ity_I32);
   11050         IRTemp rN_d  = newTemp(Ity_I32);
   11051         IRTemp res_q = newTemp(Ity_I32);
   11052 
   11053         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11054         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11055 
   11056         or_into_QFLAG32(
   11057            signed_overflow_after_Add32(
   11058               binop(Iop_Add32, mkexpr(rNt), mkexpr(rNt)), rNt, rNt),
   11059            condT
   11060         );
   11061 
   11062         assign(rN_d,  binop(Iop_QAdd32S, mkexpr(rNt), mkexpr(rNt)));
   11063         assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rN_d)));
   11064         if (isT)
   11065            putIRegT( regD, mkexpr(res_q), condT );
   11066         else
   11067            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11068 
   11069         or_into_QFLAG32(
   11070            signed_overflow_after_Add32(
   11071               binop(Iop_Add32, mkexpr(rMt), mkexpr(rN_d)), rMt, rN_d),
   11072            condT
   11073         );
   11074 
   11075         DIP("qdadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
   11076         return True;
   11077      }
   11078      /* fall through */
   11079    }
   11080 
   11081    /* ------------------ qsub<c> <Rd>,<Rn>,<Rm> ------------------- */
   11082    {
   11083      UInt regD = 99, regN = 99, regM = 99;
   11084      Bool gate = False;
   11085 
   11086      if (isT) {
   11087         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0A0) {
   11088            regN = INSNT0(3,0);
   11089            regD = INSNT1(11,8);
   11090            regM = INSNT1(3,0);
   11091            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11092               gate = True;
   11093         }
   11094      } else {
   11095         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
   11096             INSNA(11,8)  == BITS4(0,0,0,0)         &&
   11097             INSNA(7,4)   == BITS4(0,1,0,1)) {
   11098            regD = INSNA(15,12);
   11099            regN = INSNA(19,16);
   11100            regM = INSNA(3,0);
   11101            if (regD != 15 && regN != 15 && regM != 15)
   11102               gate = True;
   11103         }
   11104      }
   11105 
   11106      if (gate) {
   11107         IRTemp rNt   = newTemp(Ity_I32);
   11108         IRTemp rMt   = newTemp(Ity_I32);
   11109         IRTemp res_q = newTemp(Ity_I32);
   11110 
   11111         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11112         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11113 
   11114         assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rNt)));
   11115         if (isT)
   11116            putIRegT( regD, mkexpr(res_q), condT );
   11117         else
   11118            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11119 
   11120         or_into_QFLAG32(
   11121            signed_overflow_after_Sub32(
   11122               binop(Iop_Sub32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
   11123            condT
   11124         );
   11125 
   11126         DIP("qsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
   11127         return True;
   11128      }
   11129      /* fall through */
   11130    }
   11131 
   11132    /* ------------------ qdsub<c> <Rd>,<Rm>,<Rn> ------------------- */
   11133    {
   11134      UInt regD = 99, regN = 99, regM = 99;
   11135      Bool gate = False;
   11136 
   11137      if (isT) {
   11138         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0B0) {
   11139            regN = INSNT0(3,0);
   11140            regD = INSNT1(11,8);
   11141            regM = INSNT1(3,0);
   11142            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11143               gate = True;
   11144         }
   11145      } else {
   11146         if (INSNA(27,20) == BITS8(0,0,0,1,0,1,1,0) &&
   11147             INSNA(11,8)  == BITS4(0,0,0,0)         &&
   11148             INSNA(7,4)   == BITS4(0,1,0,1)) {
   11149            regD = INSNA(15,12);
   11150            regN = INSNA(19,16);
   11151            regM = INSNA(3,0);
   11152            if (regD != 15 && regN != 15 && regM != 15)
   11153               gate = True;
   11154         }
   11155      }
   11156 
   11157      if (gate) {
   11158         IRTemp rNt   = newTemp(Ity_I32);
   11159         IRTemp rMt   = newTemp(Ity_I32);
   11160         IRTemp rN_d  = newTemp(Ity_I32);
   11161         IRTemp res_q = newTemp(Ity_I32);
   11162 
   11163         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11164         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11165 
   11166         or_into_QFLAG32(
   11167            signed_overflow_after_Add32(
   11168               binop(Iop_Add32, mkexpr(rNt), mkexpr(rNt)), rNt, rNt),
   11169            condT
   11170         );
   11171 
   11172         assign(rN_d,  binop(Iop_QAdd32S, mkexpr(rNt), mkexpr(rNt)));
   11173         assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rN_d)));
   11174         if (isT)
   11175            putIRegT( regD, mkexpr(res_q), condT );
   11176         else
   11177            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11178 
   11179         or_into_QFLAG32(
   11180            signed_overflow_after_Sub32(
   11181               binop(Iop_Sub32, mkexpr(rMt), mkexpr(rN_d)), rMt, rN_d),
   11182            condT
   11183         );
   11184 
   11185         DIP("qdsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
   11186         return True;
   11187      }
   11188      /* fall through */
   11189    }
   11190 
   11191    /* ------------------ uqsub16<c> <Rd>,<Rn>,<Rm> ------------------ */
   11192    {
   11193      UInt regD = 99, regN = 99, regM = 99;
   11194      Bool gate = False;
   11195 
   11196      if (isT) {
   11197         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
   11198            regN = INSNT0(3,0);
   11199            regD = INSNT1(11,8);
   11200            regM = INSNT1(3,0);
   11201            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11202               gate = True;
   11203         }
   11204      } else {
   11205         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
   11206             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11207             INSNA(7,4)   == BITS4(0,1,1,1)) {
   11208            regD = INSNA(15,12);
   11209            regN = INSNA(19,16);
   11210            regM = INSNA(3,0);
   11211            if (regD != 15 && regN != 15 && regM != 15)
   11212              gate = True;
   11213         }
   11214      }
   11215 
   11216      if (gate) {
   11217         IRTemp rNt   = newTemp(Ity_I32);
   11218         IRTemp rMt   = newTemp(Ity_I32);
   11219         IRTemp res_q = newTemp(Ity_I32);
   11220 
   11221         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11222         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11223 
   11224         assign(res_q, binop(Iop_QSub16Ux2, mkexpr(rNt), mkexpr(rMt)));
   11225         if (isT)
   11226            putIRegT( regD, mkexpr(res_q), condT );
   11227         else
   11228            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11229 
   11230         DIP("uqsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   11231         return True;
   11232      }
   11233      /* fall through */
   11234    }
   11235 
   11236    /* ----------------- shadd16<c> <Rd>,<Rn>,<Rm> ------------------- */
   11237    {
   11238      UInt regD = 99, regN = 99, regM = 99;
   11239      Bool gate = False;
   11240 
   11241      if (isT) {
   11242         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
   11243            regN = INSNT0(3,0);
   11244            regD = INSNT1(11,8);
   11245            regM = INSNT1(3,0);
   11246            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11247               gate = True;
   11248         }
   11249      } else {
   11250         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
   11251             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11252             INSNA(7,4)   == BITS4(0,0,0,1)) {
   11253            regD = INSNA(15,12);
   11254            regN = INSNA(19,16);
   11255            regM = INSNA(3,0);
   11256            if (regD != 15 && regN != 15 && regM != 15)
   11257               gate = True;
   11258         }
   11259      }
   11260 
   11261      if (gate) {
   11262         IRTemp rNt   = newTemp(Ity_I32);
   11263         IRTemp rMt   = newTemp(Ity_I32);
   11264         IRTemp res_q = newTemp(Ity_I32);
   11265 
   11266         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11267         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11268 
   11269         assign(res_q, binop(Iop_HAdd16Sx2, mkexpr(rNt), mkexpr(rMt)));
   11270         if (isT)
   11271            putIRegT( regD, mkexpr(res_q), condT );
   11272         else
   11273            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11274 
   11275         DIP("shadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   11276         return True;
   11277      }
   11278      /* fall through */
   11279    }
   11280 
   11281    /* ----------------- uhsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
   11282    {
   11283      UInt regD = 99, regN = 99, regM = 99;
   11284      Bool gate = False;
   11285 
   11286      if (isT) {
   11287         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
   11288            regN = INSNT0(3,0);
   11289            regD = INSNT1(11,8);
   11290            regM = INSNT1(3,0);
   11291            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11292               gate = True;
   11293         }
   11294      } else {
   11295         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
   11296             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11297             INSNA(7,4)   == BITS4(1,1,1,1)) {
   11298            regD = INSNA(15,12);
   11299            regN = INSNA(19,16);
   11300            regM = INSNA(3,0);
   11301            if (regD != 15 && regN != 15 && regM != 15)
   11302               gate = True;
   11303         }
   11304      }
   11305 
   11306      if (gate) {
   11307         IRTemp rNt   = newTemp(Ity_I32);
   11308         IRTemp rMt   = newTemp(Ity_I32);
   11309         IRTemp res_q = newTemp(Ity_I32);
   11310 
   11311         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11312         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11313 
   11314         assign(res_q, binop(Iop_HSub8Ux4, mkexpr(rNt), mkexpr(rMt)));
   11315         if (isT)
   11316            putIRegT( regD, mkexpr(res_q), condT );
   11317         else
   11318            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11319 
   11320         DIP("uhsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   11321         return True;
   11322      }
   11323      /* fall through */
   11324    }
   11325 
   11326    /* ----------------- uhsub16<c> <Rd>,<Rn>,<Rm> ------------------- */
   11327    {
   11328      UInt regD = 99, regN = 99, regM = 99;
   11329      Bool gate = False;
   11330 
   11331      if (isT) {
   11332         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
   11333            regN = INSNT0(3,0);
   11334            regD = INSNT1(11,8);
   11335            regM = INSNT1(3,0);
   11336            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11337               gate = True;
   11338         }
   11339      } else {
   11340         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
   11341             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11342             INSNA(7,4)   == BITS4(0,1,1,1)) {
   11343            regD = INSNA(15,12);
   11344            regN = INSNA(19,16);
   11345            regM = INSNA(3,0);
   11346            if (regD != 15 && regN != 15 && regM != 15)
   11347               gate = True;
   11348         }
   11349      }
   11350 
   11351      if (gate) {
   11352         IRTemp rNt   = newTemp(Ity_I32);
   11353         IRTemp rMt   = newTemp(Ity_I32);
   11354         IRTemp res_q = newTemp(Ity_I32);
   11355 
   11356         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11357         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11358 
   11359         assign(res_q, binop(Iop_HSub16Ux2, mkexpr(rNt), mkexpr(rMt)));
   11360         if (isT)
   11361            putIRegT( regD, mkexpr(res_q), condT );
   11362         else
   11363            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11364 
   11365         DIP("uhsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   11366         return True;
   11367      }
   11368      /* fall through */
   11369    }
   11370 
   11371    /* ------------------ uqadd16<c> <Rd>,<Rn>,<Rm> ------------------ */
   11372    {
   11373      UInt regD = 99, regN = 99, regM = 99;
   11374      Bool gate = False;
   11375 
   11376      if (isT) {
   11377         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
   11378            regN = INSNT0(3,0);
   11379            regD = INSNT1(11,8);
   11380            regM = INSNT1(3,0);
   11381            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11382               gate = True;
   11383         }
   11384      } else {
   11385         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
   11386             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11387             INSNA(7,4)   == BITS4(0,0,0,1)) {
   11388            regD = INSNA(15,12);
   11389            regN = INSNA(19,16);
   11390            regM = INSNA(3,0);
   11391            if (regD != 15 && regN != 15 && regM != 15)
   11392               gate = True;
   11393         }
   11394      }
   11395 
   11396      if (gate) {
   11397         IRTemp rNt   = newTemp(Ity_I32);
   11398         IRTemp rMt   = newTemp(Ity_I32);
   11399         IRTemp res_q = newTemp(Ity_I32);
   11400 
   11401         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11402         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11403 
   11404         assign(res_q, binop(Iop_QAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
   11405         if (isT)
   11406            putIRegT( regD, mkexpr(res_q), condT );
   11407         else
   11408            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11409 
   11410         DIP("uqadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   11411         return True;
   11412      }
   11413      /* fall through */
   11414    }
   11415 
   11416    /* ------------------- uqsax<c> <Rd>,<Rn>,<Rm> ------------------- */
   11417    {
   11418      UInt regD = 99, regN = 99, regM = 99;
   11419      Bool gate = False;
   11420 
   11421      if (isT) {
   11422         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
   11423            regN = INSNT0(3,0);
   11424            regD = INSNT1(11,8);
   11425            regM = INSNT1(3,0);
   11426            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11427               gate = True;
   11428         }
   11429      } else {
   11430         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
   11431             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11432             INSNA(7,4)   == BITS4(0,1,0,1)) {
   11433            regD = INSNA(15,12);
   11434            regN = INSNA(19,16);
   11435            regM = INSNA(3,0);
   11436            if (regD != 15 && regN != 15 && regM != 15)
   11437               gate = True;
   11438         }
   11439      }
   11440 
   11441      if (gate) {
   11442         IRTemp irt_regN     = newTemp(Ity_I32);
   11443         IRTemp irt_regM     = newTemp(Ity_I32);
   11444         IRTemp irt_sum      = newTemp(Ity_I32);
   11445         IRTemp irt_diff     = newTemp(Ity_I32);
   11446         IRTemp irt_sum_res  = newTemp(Ity_I32);
   11447         IRTemp irt_diff_res = newTemp(Ity_I32);
   11448 
   11449         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   11450         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   11451 
   11452         assign( irt_diff,
   11453                 binop( Iop_Sub32,
   11454                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
   11455                        binop( Iop_Shr32,
   11456                               binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
   11457                               mkU8(16) ) ) );
   11458         armUnsignedSatQ( &irt_diff_res, NULL, irt_diff, 0x10);
   11459 
   11460         assign( irt_sum,
   11461                 binop( Iop_Add32,
   11462                        binop( Iop_Shr32,
   11463                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   11464                               mkU8(16) ),
   11465                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) )) );
   11466         armUnsignedSatQ( &irt_sum_res, NULL, irt_sum, 0x10 );
   11467 
   11468         IRExpr* ire_result = binop( Iop_Or32,
   11469                                     binop( Iop_Shl32, mkexpr(irt_diff_res),
   11470                                            mkU8(16) ),
   11471                                     binop( Iop_And32, mkexpr(irt_sum_res),
   11472                                            mkU32(0xFFFF)) );
   11473 
   11474         if (isT)
   11475            putIRegT( regD, ire_result, condT );
   11476         else
   11477            putIRegA( regD, ire_result, condT, Ijk_Boring );
   11478 
   11479         DIP( "uqsax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   11480         return True;
   11481      }
   11482      /* fall through */
   11483    }
   11484 
   11485    /* ------------------- uqasx<c> <Rd>,<Rn>,<Rm> ------------------- */
   11486    {
   11487      UInt regD = 99, regN = 99, regM = 99;
   11488      Bool gate = False;
   11489 
   11490      if (isT) {
   11491         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
   11492            regN = INSNT0(3,0);
   11493            regD = INSNT1(11,8);
   11494            regM = INSNT1(3,0);
   11495            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11496               gate = True;
   11497         }
   11498      } else {
   11499         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
   11500             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11501             INSNA(7,4)   == BITS4(0,0,1,1)) {
   11502            regD = INSNA(15,12);
   11503            regN = INSNA(19,16);
   11504            regM = INSNA(3,0);
   11505            if (regD != 15 && regN != 15 && regM != 15)
   11506               gate = True;
   11507         }
   11508      }
   11509 
   11510      if (gate) {
   11511         IRTemp irt_regN     = newTemp(Ity_I32);
   11512         IRTemp irt_regM     = newTemp(Ity_I32);
   11513         IRTemp irt_sum      = newTemp(Ity_I32);
   11514         IRTemp irt_diff     = newTemp(Ity_I32);
   11515         IRTemp irt_res_sum  = newTemp(Ity_I32);
   11516         IRTemp irt_res_diff = newTemp(Ity_I32);
   11517 
   11518         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   11519         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   11520 
   11521         assign( irt_diff,
   11522                 binop( Iop_Sub32,
   11523                        binop( Iop_Shr32,
   11524                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   11525                               mkU8(16) ),
   11526                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
   11527         armUnsignedSatQ( &irt_res_diff, NULL, irt_diff, 0x10 );
   11528 
   11529         assign( irt_sum,
   11530                 binop( Iop_Add32,
   11531                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
   11532                        binop( Iop_Shr32,
   11533                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
   11534                               mkU8(16) ) ) );
   11535         armUnsignedSatQ( &irt_res_sum, NULL, irt_sum, 0x10 );
   11536 
   11537         IRExpr* ire_result
   11538           = binop( Iop_Or32,
   11539                    binop( Iop_Shl32, mkexpr(irt_res_sum), mkU8(16) ),
   11540                    binop( Iop_And32, mkexpr(irt_res_diff), mkU32(0xFFFF) ) );
   11541 
   11542         if (isT)
   11543            putIRegT( regD, ire_result, condT );
   11544         else
   11545            putIRegA( regD, ire_result, condT, Ijk_Boring );
   11546 
   11547         DIP( "uqasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   11548         return True;
   11549      }
   11550      /* fall through */
   11551    }
   11552 
   11553    /* ------------------- usax<c> <Rd>,<Rn>,<Rm> ------------------- */
   11554    {
   11555      UInt regD = 99, regN = 99, regM = 99;
   11556      Bool gate = False;
   11557 
   11558      if (isT) {
   11559         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
   11560            regN = INSNT0(3,0);
   11561            regD = INSNT1(11,8);
   11562            regM = INSNT1(3,0);
   11563            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11564               gate = True;
   11565         }
   11566      } else {
   11567         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
   11568             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11569             INSNA(7,4)   == BITS4(0,1,0,1)) {
   11570            regD = INSNA(15,12);
   11571            regN = INSNA(19,16);
   11572            regM = INSNA(3,0);
   11573            if (regD != 15 && regN != 15 && regM != 15)
   11574               gate = True;
   11575         }
   11576      }
   11577 
   11578      if (gate) {
   11579         IRTemp irt_regN = newTemp(Ity_I32);
   11580         IRTemp irt_regM = newTemp(Ity_I32);
   11581         IRTemp irt_sum  = newTemp(Ity_I32);
   11582         IRTemp irt_diff = newTemp(Ity_I32);
   11583 
   11584         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   11585         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   11586 
   11587         assign( irt_sum,
   11588                 binop( Iop_Add32,
   11589                        unop( Iop_16Uto32,
   11590                              unop( Iop_32to16, mkexpr(irt_regN) )
   11591                        ),
   11592                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
   11593 
   11594         assign( irt_diff,
   11595                 binop( Iop_Sub32,
   11596                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
   11597                        unop( Iop_16Uto32,
   11598                              unop( Iop_32to16, mkexpr(irt_regM) )
   11599                        )
   11600                 )
   11601         );
   11602 
   11603         IRExpr* ire_result
   11604           = binop( Iop_Or32,
   11605                    binop( Iop_Shl32, mkexpr(irt_diff), mkU8(16) ),
   11606                    binop( Iop_And32, mkexpr(irt_sum), mkU32(0xFFFF) ) );
   11607 
   11608         IRTemp ge10 = newTemp(Ity_I32);
   11609         assign( ge10, IRExpr_ITE( binop( Iop_CmpLE32U,
   11610                                          mkU32(0x10000), mkexpr(irt_sum) ),
   11611                                   mkU32(1), mkU32(0) ) );
   11612         put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
   11613         put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
   11614 
   11615         IRTemp ge32 = newTemp(Ity_I32);
   11616         assign(ge32, unop(Iop_Not32, mkexpr(irt_diff)));
   11617         put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
   11618         put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
   11619 
   11620         if (isT)
   11621            putIRegT( regD, ire_result, condT );
   11622         else
   11623            putIRegA( regD, ire_result, condT, Ijk_Boring );
   11624 
   11625         DIP( "usax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   11626         return True;
   11627      }
   11628      /* fall through */
   11629    }
   11630 
   11631    /* ------------------- uasx<c> <Rd>,<Rn>,<Rm> ------------------- */
   11632    {
   11633      UInt regD = 99, regN = 99, regM = 99;
   11634      Bool gate = False;
   11635 
   11636      if (isT) {
   11637         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
   11638            regN = INSNT0(3,0);
   11639            regD = INSNT1(11,8);
   11640            regM = INSNT1(3,0);
   11641            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11642               gate = True;
   11643         }
   11644      } else {
   11645         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
   11646             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11647             INSNA(7,4)   == BITS4(0,0,1,1)) {
   11648            regD = INSNA(15,12);
   11649            regN = INSNA(19,16);
   11650            regM = INSNA(3,0);
   11651            if (regD != 15 && regN != 15 && regM != 15)
   11652               gate = True;
   11653         }
   11654      }
   11655 
   11656      if (gate) {
   11657         IRTemp irt_regN = newTemp(Ity_I32);
   11658         IRTemp irt_regM = newTemp(Ity_I32);
   11659         IRTemp irt_sum  = newTemp(Ity_I32);
   11660         IRTemp irt_diff = newTemp(Ity_I32);
   11661 
   11662         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   11663         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   11664 
   11665         assign( irt_diff,
   11666                 binop( Iop_Sub32,
   11667                        unop( Iop_16Uto32,
   11668                              unop( Iop_32to16, mkexpr(irt_regN) )
   11669                        ),
   11670                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
   11671 
   11672         assign( irt_sum,
   11673                 binop( Iop_Add32,
   11674                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
   11675                        unop( Iop_16Uto32,
   11676                              unop( Iop_32to16, mkexpr(irt_regM) )
   11677                        ) ) );
   11678 
   11679         IRExpr* ire_result
   11680           = binop( Iop_Or32,
   11681                    binop( Iop_Shl32, mkexpr(irt_sum), mkU8(16) ),
   11682                    binop( Iop_And32, mkexpr(irt_diff), mkU32(0xFFFF) ) );
   11683 
   11684         IRTemp ge10 = newTemp(Ity_I32);
   11685         assign(ge10, unop(Iop_Not32, mkexpr(irt_diff)));
   11686         put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
   11687         put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
   11688 
   11689         IRTemp ge32 = newTemp(Ity_I32);
   11690         assign( ge32, IRExpr_ITE( binop( Iop_CmpLE32U,
   11691                                          mkU32(0x10000), mkexpr(irt_sum) ),
   11692                                   mkU32(1), mkU32(0) ) );
   11693         put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
   11694         put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
   11695 
   11696         if (isT)
   11697            putIRegT( regD, ire_result, condT );
   11698         else
   11699            putIRegA( regD, ire_result, condT, Ijk_Boring );
   11700 
   11701         DIP( "uasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   11702         return True;
   11703      }
   11704      /* fall through */
   11705    }
   11706 
   11707    /* ------------------- ssax<c> <Rd>,<Rn>,<Rm> ------------------- */
   11708    {
   11709      UInt regD = 99, regN = 99, regM = 99;
   11710      Bool gate = False;
   11711 
   11712      if (isT) {
   11713         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
   11714            regN = INSNT0(3,0);
   11715            regD = INSNT1(11,8);
   11716            regM = INSNT1(3,0);
   11717            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11718               gate = True;
   11719         }
   11720      } else {
   11721         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
   11722             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11723             INSNA(7,4)   == BITS4(0,1,0,1)) {
   11724            regD = INSNA(15,12);
   11725            regN = INSNA(19,16);
   11726            regM = INSNA(3,0);
   11727            if (regD != 15 && regN != 15 && regM != 15)
   11728               gate = True;
   11729         }
   11730      }
   11731 
   11732      if (gate) {
   11733         IRTemp irt_regN = newTemp(Ity_I32);
   11734         IRTemp irt_regM = newTemp(Ity_I32);
   11735         IRTemp irt_sum  = newTemp(Ity_I32);
   11736         IRTemp irt_diff = newTemp(Ity_I32);
   11737 
   11738         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   11739         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   11740 
   11741         assign( irt_sum,
   11742                 binop( Iop_Add32,
   11743                        binop( Iop_Sar32,
   11744                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   11745                               mkU8(16) ),
   11746                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
   11747 
   11748         assign( irt_diff,
   11749                 binop( Iop_Sub32,
   11750                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
   11751                        binop( Iop_Sar32,
   11752                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
   11753                               mkU8(16) ) ) );
   11754 
   11755         IRExpr* ire_result
   11756           = binop( Iop_Or32,
   11757                    binop( Iop_Shl32, mkexpr(irt_diff), mkU8(16) ),
   11758                    binop( Iop_And32, mkexpr(irt_sum), mkU32(0xFFFF) ) );
   11759 
   11760         IRTemp ge10 = newTemp(Ity_I32);
   11761         assign(ge10, unop(Iop_Not32, mkexpr(irt_sum)));
   11762         put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
   11763         put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
   11764 
   11765         IRTemp ge32 = newTemp(Ity_I32);
   11766         assign(ge32, unop(Iop_Not32, mkexpr(irt_diff)));
   11767         put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
   11768         put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
   11769 
   11770         if (isT)
   11771            putIRegT( regD, ire_result, condT );
   11772         else
   11773            putIRegA( regD, ire_result, condT, Ijk_Boring );
   11774 
   11775         DIP( "ssax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   11776         return True;
   11777      }
   11778      /* fall through */
   11779    }
   11780 
   11781    /* ----------------- shsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
   11782    {
   11783      UInt regD = 99, regN = 99, regM = 99;
   11784      Bool gate = False;
   11785 
   11786      if (isT) {
   11787         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
   11788            regN = INSNT0(3,0);
   11789            regD = INSNT1(11,8);
   11790            regM = INSNT1(3,0);
   11791            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11792               gate = True;
   11793         }
   11794      } else {
   11795         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
   11796             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11797             INSNA(7,4)   == BITS4(1,1,1,1)) {
   11798            regD = INSNA(15,12);
   11799            regN = INSNA(19,16);
   11800            regM = INSNA(3,0);
   11801            if (regD != 15 && regN != 15 && regM != 15)
   11802               gate = True;
   11803         }
   11804      }
   11805 
   11806      if (gate) {
   11807         IRTemp rNt   = newTemp(Ity_I32);
   11808         IRTemp rMt   = newTemp(Ity_I32);
   11809         IRTemp res_q = newTemp(Ity_I32);
   11810 
   11811         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11812         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11813 
   11814         assign(res_q, binop(Iop_HSub8Sx4, mkexpr(rNt), mkexpr(rMt)));
   11815         if (isT)
   11816            putIRegT( regD, mkexpr(res_q), condT );
   11817         else
   11818            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11819 
   11820         DIP("shsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   11821         return True;
   11822      }
   11823      /* fall through */
   11824    }
   11825 
   11826    /* ----------------- sxtab16<c> Rd,Rn,Rm{,rot} ------------------ */
   11827    {
   11828      UInt regD = 99, regN = 99, regM = 99, rotate = 99;
   11829      Bool gate = False;
   11830 
   11831      if (isT) {
   11832         if (INSNT0(15,4) == 0xFA2 && (INSNT1(15,0) & 0xF0C0) == 0xF080) {
   11833            regN   = INSNT0(3,0);
   11834            regD   = INSNT1(11,8);
   11835            regM   = INSNT1(3,0);
   11836            rotate = INSNT1(5,4);
   11837            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11838               gate = True;
   11839         }
   11840      } else {
   11841         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
   11842             INSNA(9,4)   == BITS6(0,0,0,1,1,1) ) {
   11843            regD   = INSNA(15,12);
   11844            regN   = INSNA(19,16);
   11845            regM   = INSNA(3,0);
   11846            rotate = INSNA(11,10);
   11847            if (regD != 15 && regN != 15 && regM != 15)
   11848              gate = True;
   11849         }
   11850      }
   11851 
   11852      if (gate) {
   11853         IRTemp irt_regN = newTemp(Ity_I32);
   11854         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   11855 
   11856         IRTemp irt_regM = newTemp(Ity_I32);
   11857         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   11858 
   11859         IRTemp irt_rot = newTemp(Ity_I32);
   11860         assign( irt_rot, genROR32(irt_regM, 8 * rotate) );
   11861 
   11862         /* FIXME Maybe we can write this arithmetic in shorter form. */
   11863         IRExpr* resLo
   11864            = binop(Iop_And32,
   11865                    binop(Iop_Add32,
   11866                          mkexpr(irt_regN),
   11867                          unop(Iop_16Uto32,
   11868                               unop(Iop_8Sto16,
   11869                                    unop(Iop_32to8, mkexpr(irt_rot))))),
   11870                    mkU32(0x0000FFFF));
   11871 
   11872         IRExpr* resHi
   11873            = binop(Iop_And32,
   11874                    binop(Iop_Add32,
   11875                          mkexpr(irt_regN),
   11876                          binop(Iop_Shl32,
   11877                                unop(Iop_16Uto32,
   11878                                     unop(Iop_8Sto16,
   11879                                          unop(Iop_32to8,
   11880                                               binop(Iop_Shr32,
   11881                                                     mkexpr(irt_rot),
   11882                                                     mkU8(16))))),
   11883                                mkU8(16))),
   11884                    mkU32(0xFFFF0000));
   11885 
   11886         IRExpr* ire_result
   11887            = binop( Iop_Or32, resHi, resLo );
   11888 
   11889         if (isT)
   11890            putIRegT( regD, ire_result, condT );
   11891         else
   11892            putIRegA( regD, ire_result, condT, Ijk_Boring );
   11893 
   11894         DIP( "sxtab16%s r%u, r%u, r%u, ROR #%u\n",
   11895              nCC(conq), regD, regN, regM, 8 * rotate );
   11896         return True;
   11897      }
   11898      /* fall through */
   11899    }
   11900 
   11901    /* ----------------- shasx<c> <Rd>,<Rn>,<Rm> ------------------- */
   11902    {
   11903      UInt regD = 99, regN = 99, regM = 99;
   11904      Bool gate = False;
   11905 
   11906      if (isT) {
   11907         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
   11908            regN = INSNT0(3,0);
   11909            regD = INSNT1(11,8);
   11910            regM = INSNT1(3,0);
   11911            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11912               gate = True;
   11913         }
   11914      } else {
   11915         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
   11916             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11917             INSNA(7,4)   == BITS4(0,0,1,1)) {
   11918            regD = INSNA(15,12);
   11919            regN = INSNA(19,16);
   11920            regM = INSNA(3,0);
   11921            if (regD != 15 && regN != 15 && regM != 15)
   11922               gate = True;
   11923         }
   11924      }
   11925 
   11926      if (gate) {
   11927         IRTemp rNt   = newTemp(Ity_I32);
   11928         IRTemp rMt   = newTemp(Ity_I32);
   11929         IRTemp irt_diff  = newTemp(Ity_I32);
   11930         IRTemp irt_sum   = newTemp(Ity_I32);
   11931         IRTemp res_q = newTemp(Ity_I32);
   11932 
   11933         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11934         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11935 
   11936         assign( irt_diff,
   11937                 binop(Iop_Sub32,
   11938                       unop(Iop_16Sto32,
   11939                            unop(Iop_32to16,
   11940                                 mkexpr(rNt)
   11941                            )
   11942                       ),
   11943                       unop(Iop_16Sto32,
   11944                            unop(Iop_32to16,
   11945                                 binop(Iop_Shr32,
   11946                                       mkexpr(rMt), mkU8(16)
   11947                                 )
   11948                            )
   11949                       )
   11950                 )
   11951         );
   11952 
   11953         assign( irt_sum,
   11954                 binop(Iop_Add32,
   11955                       unop(Iop_16Sto32,
   11956                            unop(Iop_32to16,
   11957                                 binop(Iop_Shr32,
   11958                                       mkexpr(rNt), mkU8(16)
   11959                                 )
   11960                            )
   11961                       ),
   11962                       unop(Iop_16Sto32,
   11963                            unop(Iop_32to16, mkexpr(rMt)
   11964                            )
   11965                       )
   11966                 )
   11967         );
   11968 
   11969         assign( res_q,
   11970                 binop(Iop_Or32,
   11971                       unop(Iop_16Uto32,
   11972                            unop(Iop_32to16,
   11973                                 binop(Iop_Shr32,
   11974                                       mkexpr(irt_diff), mkU8(1)
   11975                                 )
   11976                            )
   11977                       ),
   11978                       binop(Iop_Shl32,
   11979                             binop(Iop_Shr32,
   11980                                   mkexpr(irt_sum), mkU8(1)
   11981                             ),
   11982                             mkU8(16)
   11983                      )
   11984                 )
   11985         );
   11986 
   11987         if (isT)
   11988            putIRegT( regD, mkexpr(res_q), condT );
   11989         else
   11990            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11991 
   11992         DIP("shasx%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   11993         return True;
   11994      }
   11995      /* fall through */
   11996    }
   11997 
   11998    /* ----------------- uhasx<c> <Rd>,<Rn>,<Rm> ------------------- */
   11999    {
   12000      UInt regD = 99, regN = 99, regM = 99;
   12001      Bool gate = False;
   12002 
   12003      if (isT) {
   12004         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
   12005            regN = INSNT0(3,0);
   12006            regD = INSNT1(11,8);
   12007            regM = INSNT1(3,0);
   12008            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   12009               gate = True;
   12010         }
   12011      } else {
   12012         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
   12013             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   12014             INSNA(7,4)   == BITS4(0,0,1,1)) {
   12015            regD = INSNA(15,12);
   12016            regN = INSNA(19,16);
   12017            regM = INSNA(3,0);
   12018            if (regD != 15 && regN != 15 && regM != 15)
   12019               gate = True;
   12020         }
   12021      }
   12022 
   12023      if (gate) {
   12024         IRTemp rNt   = newTemp(Ity_I32);
   12025         IRTemp rMt   = newTemp(Ity_I32);
   12026         IRTemp irt_diff  = newTemp(Ity_I32);
   12027         IRTemp irt_sum   = newTemp(Ity_I32);
   12028         IRTemp res_q = newTemp(Ity_I32);
   12029 
   12030         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   12031         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   12032 
   12033         assign( irt_diff,
   12034                 binop(Iop_Sub32,
   12035                       unop(Iop_16Uto32,
   12036                            unop(Iop_32to16,
   12037                                 mkexpr(rNt)
   12038                            )
   12039                       ),
   12040                       unop(Iop_16Uto32,
   12041                            unop(Iop_32to16,
   12042                                 binop(Iop_Shr32,
   12043                                       mkexpr(rMt), mkU8(16)
   12044                                 )
   12045                            )
   12046                       )
   12047                 )
   12048         );
   12049 
   12050         assign( irt_sum,
   12051                 binop(Iop_Add32,
   12052                       unop(Iop_16Uto32,
   12053                            unop(Iop_32to16,
   12054                                 binop(Iop_Shr32,
   12055                                       mkexpr(rNt), mkU8(16)
   12056                                 )
   12057                            )
   12058                       ),
   12059                       unop(Iop_16Uto32,
   12060                            unop(Iop_32to16, mkexpr(rMt)
   12061                            )
   12062                       )
   12063                 )
   12064         );
   12065 
   12066         assign( res_q,
   12067                 binop(Iop_Or32,
   12068                       unop(Iop_16Uto32,
   12069                            unop(Iop_32to16,
   12070                                 binop(Iop_Shr32,
   12071                                       mkexpr(irt_diff), mkU8(1)
   12072                                 )
   12073                            )
   12074                       ),
   12075                       binop(Iop_Shl32,
   12076                             binop(Iop_Shr32,
   12077                                   mkexpr(irt_sum), mkU8(1)
   12078                             ),
   12079                             mkU8(16)
   12080                      )
   12081                 )
   12082         );
   12083 
   12084         if (isT)
   12085            putIRegT( regD, mkexpr(res_q), condT );
   12086         else
   12087            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   12088 
   12089         DIP("uhasx%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   12090         return True;
   12091      }
   12092      /* fall through */
   12093    }
   12094 
   12095    /* ----------------- shsax<c> <Rd>,<Rn>,<Rm> ------------------- */
   12096    {
   12097      UInt regD = 99, regN = 99, regM = 99;
   12098      Bool gate = False;
   12099 
   12100      if (isT) {
   12101         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
   12102            regN = INSNT0(3,0);
   12103            regD = INSNT1(11,8);
   12104            regM = INSNT1(3,0);
   12105            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   12106               gate = True;
   12107         }
   12108      } else {
   12109         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
   12110             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   12111             INSNA(7,4)   == BITS4(0,1,0,1)) {
   12112            regD = INSNA(15,12);
   12113            regN = INSNA(19,16);
   12114            regM = INSNA(3,0);
   12115            if (regD != 15 && regN != 15 && regM != 15)
   12116               gate = True;
   12117         }
   12118      }
   12119 
   12120      if (gate) {
   12121         IRTemp rNt   = newTemp(Ity_I32);
   12122         IRTemp rMt   = newTemp(Ity_I32);
   12123         IRTemp irt_diff  = newTemp(Ity_I32);
   12124         IRTemp irt_sum   = newTemp(Ity_I32);
   12125         IRTemp res_q = newTemp(Ity_I32);
   12126 
   12127         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   12128         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   12129 
   12130         assign( irt_sum,
   12131                 binop(Iop_Add32,
   12132                       unop(Iop_16Sto32,
   12133                            unop(Iop_32to16,
   12134                                 mkexpr(rNt)
   12135                            )
   12136                       ),
   12137                       unop(Iop_16Sto32,
   12138                            unop(Iop_32to16,
   12139                                 binop(Iop_Shr32,
   12140                                       mkexpr(rMt), mkU8(16)
   12141                                 )
   12142                            )
   12143                       )
   12144                 )
   12145         );
   12146 
   12147         assign( irt_diff,
   12148                 binop(Iop_Sub32,
   12149                       unop(Iop_16Sto32,
   12150                            unop(Iop_32to16,
   12151                                 binop(Iop_Shr32,
   12152                                       mkexpr(rNt), mkU8(16)
   12153                                 )
   12154                            )
   12155                       ),
   12156                       unop(Iop_16Sto32,
   12157                            unop(Iop_32to16, mkexpr(rMt)
   12158                            )
   12159                       )
   12160                 )
   12161         );
   12162 
   12163         assign( res_q,
   12164                 binop(Iop_Or32,
   12165                       unop(Iop_16Uto32,
   12166                            unop(Iop_32to16,
   12167                                 binop(Iop_Shr32,
   12168                                       mkexpr(irt_sum), mkU8(1)
   12169                                 )
   12170                            )
   12171                       ),
   12172                       binop(Iop_Shl32,
   12173                             binop(Iop_Shr32,
   12174                                   mkexpr(irt_diff), mkU8(1)
   12175                             ),
   12176                             mkU8(16)
   12177                      )
   12178                 )
   12179         );
   12180 
   12181         if (isT)
   12182            putIRegT( regD, mkexpr(res_q), condT );
   12183         else
   12184            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   12185 
   12186         DIP("shsax%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   12187         return True;
   12188      }
   12189      /* fall through */
   12190    }
   12191 
   12192    /* ----------------- uhsax<c> <Rd>,<Rn>,<Rm> ------------------- */
   12193    {
   12194      UInt regD = 99, regN = 99, regM = 99;
   12195      Bool gate = False;
   12196 
   12197      if (isT) {
   12198         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
   12199            regN = INSNT0(3,0);
   12200            regD = INSNT1(11,8);
   12201            regM = INSNT1(3,0);
   12202            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   12203               gate = True;
   12204         }
   12205      } else {
   12206         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
   12207             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   12208             INSNA(7,4)   == BITS4(0,1,0,1)) {
   12209            regD = INSNA(15,12);
   12210            regN = INSNA(19,16);
   12211            regM = INSNA(3,0);
   12212            if (regD != 15 && regN != 15 && regM != 15)
   12213               gate = True;
   12214         }
   12215      }
   12216 
   12217      if (gate) {
   12218         IRTemp rNt   = newTemp(Ity_I32);
   12219         IRTemp rMt   = newTemp(Ity_I32);
   12220         IRTemp irt_diff  = newTemp(Ity_I32);
   12221         IRTemp irt_sum   = newTemp(Ity_I32);
   12222         IRTemp res_q = newTemp(Ity_I32);
   12223 
   12224         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   12225         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   12226 
   12227         assign( irt_sum,
   12228                 binop(Iop_Add32,
   12229                       unop(Iop_16Uto32,
   12230                            unop(Iop_32to16,
   12231                                 mkexpr(rNt)
   12232                            )
   12233                       ),
   12234                       unop(Iop_16Uto32,
   12235                            unop(Iop_32to16,
   12236                                 binop(Iop_Shr32,
   12237                                       mkexpr(rMt), mkU8(16)
   12238                                 )
   12239                            )
   12240                       )
   12241                 )
   12242         );
   12243 
   12244         assign( irt_diff,
   12245                 binop(Iop_Sub32,
   12246                       unop(Iop_16Uto32,
   12247                            unop(Iop_32to16,
   12248                                 binop(Iop_Shr32,
   12249                                       mkexpr(rNt), mkU8(16)
   12250                                 )
   12251                            )
   12252                       ),
   12253                       unop(Iop_16Uto32,
   12254                            unop(Iop_32to16, mkexpr(rMt)
   12255                            )
   12256                       )
   12257                 )
   12258         );
   12259 
   12260         assign( res_q,
   12261                 binop(Iop_Or32,
   12262                       unop(Iop_16Uto32,
   12263                            unop(Iop_32to16,
   12264                                 binop(Iop_Shr32,
   12265                                       mkexpr(irt_sum), mkU8(1)
   12266                                 )
   12267                            )
   12268                       ),
   12269                       binop(Iop_Shl32,
   12270                             binop(Iop_Shr32,
   12271                                   mkexpr(irt_diff), mkU8(1)
   12272                             ),
   12273                             mkU8(16)
   12274                      )
   12275                 )
   12276         );
   12277 
   12278         if (isT)
   12279            putIRegT( regD, mkexpr(res_q), condT );
   12280         else
   12281            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   12282 
   12283         DIP("uhsax%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   12284         return True;
   12285      }
   12286      /* fall through */
   12287    }
   12288 
   12289    /* ----------------- shsub16<c> <Rd>,<Rn>,<Rm> ------------------- */
   12290    {
   12291      UInt regD = 99, regN = 99, regM = 99;
   12292      Bool gate = False;
   12293 
   12294      if (isT) {
   12295         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
   12296            regN = INSNT0(3,0);
   12297            regD = INSNT1(11,8);
   12298            regM = INSNT1(3,0);
   12299            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   12300               gate = True;
   12301         }
   12302      } else {
   12303         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
   12304             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   12305             INSNA(7,4)   == BITS4(0,1,1,1)) {
   12306            regD = INSNA(15,12);
   12307            regN = INSNA(19,16);
   12308            regM = INSNA(3,0);
   12309            if (regD != 15 && regN != 15 && regM != 15)
   12310               gate = True;
   12311         }
   12312      }
   12313 
   12314      if (gate) {
   12315         IRTemp rNt   = newTemp(Ity_I32);
   12316         IRTemp rMt   = newTemp(Ity_I32);
   12317         IRTemp res_q = newTemp(Ity_I32);
   12318 
   12319         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   12320         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   12321 
   12322         assign(res_q, binop(Iop_HSub16Sx2, mkexpr(rNt), mkexpr(rMt)));
   12323         if (isT)
   12324            putIRegT( regD, mkexpr(res_q), condT );
   12325         else
   12326            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   12327 
   12328         DIP("shsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   12329         return True;
   12330      }
   12331      /* fall through */
   12332    }
   12333 
   12334    /* ----------------- smmls{r}<c> <Rd>,<Rn>,<Rm>,<Ra> ------------------- */
   12335    {
   12336      UInt rD = 99, rN = 99, rM = 99, rA = 99;
   12337      Bool round  = False;
   12338      Bool gate   = False;
   12339 
   12340      if (isT) {
   12341         if (INSNT0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
   12342             && INSNT0(6,4) == BITS3(1,1,0)
   12343             && INSNT1(7,5) == BITS3(0,0,0)) {
   12344            round = INSNT1(4,4);
   12345            rA    = INSNT1(15,12);
   12346            rD    = INSNT1(11,8);
   12347            rM    = INSNT1(3,0);
   12348            rN    = INSNT0(3,0);
   12349            if (!isBadRegT(rD)
   12350                && !isBadRegT(rN) && !isBadRegT(rM) && !isBadRegT(rA))
   12351               gate = True;
   12352         }
   12353      } else {
   12354         if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,1)
   12355             && INSNA(15,12) != BITS4(1,1,1,1)
   12356             && (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(1,1,0,1)) {
   12357            round = INSNA(5,5);
   12358            rD    = INSNA(19,16);
   12359            rA    = INSNA(15,12);
   12360            rM    = INSNA(11,8);
   12361            rN    = INSNA(3,0);
   12362            if (rD != 15 && rM != 15 && rN != 15)
   12363               gate = True;
   12364         }
   12365      }
   12366      if (gate) {
   12367         IRTemp irt_rA   = newTemp(Ity_I32);
   12368         IRTemp irt_rN   = newTemp(Ity_I32);
   12369         IRTemp irt_rM   = newTemp(Ity_I32);
   12370         assign( irt_rA, isT ? getIRegT(rA) : getIRegA(rA) );
   12371         assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN) );
   12372         assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM) );
   12373         IRExpr* res
   12374         = unop(Iop_64HIto32,
   12375                binop(Iop_Add64,
   12376                      binop(Iop_Sub64,
   12377                            binop(Iop_32HLto64, mkexpr(irt_rA), mkU32(0)),
   12378                            binop(Iop_MullS32, mkexpr(irt_rN), mkexpr(irt_rM))),
   12379                      mkU64(round ? 0x80000000ULL : 0ULL)));
   12380         if (isT)
   12381            putIRegT( rD, res, condT );
   12382         else
   12383            putIRegA(rD, res, condT, Ijk_Boring);
   12384         DIP("smmls%s%s r%u, r%u, r%u, r%u\n",
   12385             round ? "r" : "", nCC(conq), rD, rN, rM, rA);
   12386         return True;
   12387      }
   12388      /* fall through */
   12389    }
   12390 
   12391    /* -------------- smlald{x}<c> <RdLo>,<RdHi>,<Rn>,<Rm> ---------------- */
   12392    {
   12393      UInt rN = 99, rDlo = 99, rDhi = 99, rM = 99;
   12394      Bool m_swap = False;
   12395      Bool gate   = False;
   12396 
   12397      if (isT) {
   12398         if (INSNT0(15,4) == 0xFBC &&
   12399             (INSNT1(7,4) & BITS4(1,1,1,0)) == BITS4(1,1,0,0)) {
   12400            rN     = INSNT0(3,0);
   12401            rDlo   = INSNT1(15,12);
   12402            rDhi   = INSNT1(11,8);
   12403            rM     = INSNT1(3,0);
   12404            m_swap = (INSNT1(4,4) & 1) == 1;
   12405            if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
   12406                && !isBadRegT(rM) && rDhi != rDlo)
   12407               gate = True;
   12408         }
   12409      } else {
   12410         if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,0)
   12411             && (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
   12412            rN     = INSNA(3,0);
   12413            rDlo   = INSNA(15,12);
   12414            rDhi   = INSNA(19,16);
   12415            rM     = INSNA(11,8);
   12416            m_swap = ( INSNA(5,5) & 1 ) == 1;
   12417            if (rDlo != 15 && rDhi != 15
   12418                && rN != 15 && rM != 15 && rDlo != rDhi)
   12419               gate = True;
   12420         }
   12421      }
   12422 
   12423      if (gate) {
   12424         IRTemp irt_rM   = newTemp(Ity_I32);
   12425         IRTemp irt_rN   = newTemp(Ity_I32);
   12426         IRTemp irt_rDhi = newTemp(Ity_I32);
   12427         IRTemp irt_rDlo = newTemp(Ity_I32);
   12428         IRTemp op_2     = newTemp(Ity_I32);
   12429         IRTemp pr_1     = newTemp(Ity_I64);
   12430         IRTemp pr_2     = newTemp(Ity_I64);
   12431         IRTemp result   = newTemp(Ity_I64);
   12432         IRTemp resHi    = newTemp(Ity_I32);
   12433         IRTemp resLo    = newTemp(Ity_I32);
   12434         assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM));
   12435         assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN));
   12436         assign( irt_rDhi, isT ? getIRegT(rDhi) : getIRegA(rDhi));
   12437         assign( irt_rDlo, isT ? getIRegT(rDlo) : getIRegA(rDlo));
   12438         assign( op_2, genROR32(irt_rM, m_swap ? 16 : 0) );
   12439         assign( pr_1, binop(Iop_MullS32,
   12440                             unop(Iop_16Sto32,
   12441                                  unop(Iop_32to16, mkexpr(irt_rN))
   12442                             ),
   12443                             unop(Iop_16Sto32,
   12444                                  unop(Iop_32to16, mkexpr(op_2))
   12445                             )
   12446                       )
   12447         );
   12448         assign( pr_2, binop(Iop_MullS32,
   12449                             binop(Iop_Sar32, mkexpr(irt_rN), mkU8(16)),
   12450                             binop(Iop_Sar32, mkexpr(op_2), mkU8(16))
   12451                       )
   12452         );
   12453         assign( result, binop(Iop_Add64,
   12454                               binop(Iop_Add64,
   12455                                     mkexpr(pr_1),
   12456                                     mkexpr(pr_2)
   12457                               ),
   12458                               binop(Iop_32HLto64,
   12459                                     mkexpr(irt_rDhi),
   12460                                     mkexpr(irt_rDlo)
   12461                               )
   12462                         )
   12463         );
   12464         assign( resHi, unop(Iop_64HIto32, mkexpr(result)) );
   12465         assign( resLo, unop(Iop_64to32, mkexpr(result)) );
   12466         if (isT) {
   12467            putIRegT( rDhi, mkexpr(resHi), condT );
   12468            putIRegT( rDlo, mkexpr(resLo), condT );
   12469         } else {
   12470            putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
   12471            putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
   12472         }
   12473         DIP("smlald%c%s r%u, r%u, r%u, r%u\n",
   12474             m_swap ? 'x' : ' ', nCC(conq), rDlo, rDhi, rN, rM);
   12475         return True;
   12476      }
   12477      /* fall through */
   12478    }
   12479 
   12480    /* -------------- smlsld{x}<c> <RdLo>,<RdHi>,<Rn>,<Rm> ---------------- */
   12481    {
   12482      UInt rN = 99, rDlo = 99, rDhi = 99, rM = 99;
   12483      Bool m_swap = False;
   12484      Bool gate   = False;
   12485 
   12486      if (isT) {
   12487         if ((INSNT0(15,4) == 0xFBD &&
   12488             (INSNT1(7,4) & BITS4(1,1,1,0)) == BITS4(1,1,0,0))) {
   12489            rN     = INSNT0(3,0);
   12490            rDlo   = INSNT1(15,12);
   12491            rDhi   = INSNT1(11,8);
   12492            rM     = INSNT1(3,0);
   12493            m_swap = (INSNT1(4,4) & 1) == 1;
   12494            if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN) &&
   12495                !isBadRegT(rM) && rDhi != rDlo)
   12496               gate = True;
   12497         }
   12498      } else {
   12499         if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,0) &&
   12500             (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(0,1,0,1)) {
   12501            rN     = INSNA(3,0);
   12502            rDlo   = INSNA(15,12);
   12503            rDhi   = INSNA(19,16);
   12504            rM     = INSNA(11,8);
   12505            m_swap = (INSNA(5,5) & 1) == 1;
   12506            if (rDlo != 15 && rDhi != 15 &&
   12507                rN != 15 && rM != 15 && rDlo != rDhi)
   12508               gate = True;
   12509         }
   12510      }
   12511      if (gate) {
   12512         IRTemp irt_rM   = newTemp(Ity_I32);
   12513         IRTemp irt_rN   = newTemp(Ity_I32);
   12514         IRTemp irt_rDhi = newTemp(Ity_I32);
   12515         IRTemp irt_rDlo = newTemp(Ity_I32);
   12516         IRTemp op_2     = newTemp(Ity_I32);
   12517         IRTemp pr_1     = newTemp(Ity_I64);
   12518         IRTemp pr_2     = newTemp(Ity_I64);
   12519         IRTemp result   = newTemp(Ity_I64);
   12520         IRTemp resHi    = newTemp(Ity_I32);
   12521         IRTemp resLo    = newTemp(Ity_I32);
   12522         assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM) );
   12523         assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN) );
   12524         assign( irt_rDhi, isT ? getIRegT(rDhi) : getIRegA(rDhi) );
   12525         assign( irt_rDlo, isT ? getIRegT(rDlo) : getIRegA(rDlo) );
   12526         assign( op_2, genROR32(irt_rM, m_swap ? 16 : 0) );
   12527         assign( pr_1, binop(Iop_MullS32,
   12528                             unop(Iop_16Sto32,
   12529                                  unop(Iop_32to16, mkexpr(irt_rN))
   12530                             ),
   12531                             unop(Iop_16Sto32,
   12532                                  unop(Iop_32to16, mkexpr(op_2))
   12533                             )
   12534                       )
   12535         );
   12536         assign( pr_2, binop(Iop_MullS32,
   12537                             binop(Iop_Sar32, mkexpr(irt_rN), mkU8(16)),
   12538                             binop(Iop_Sar32, mkexpr(op_2), mkU8(16))
   12539                       )
   12540         );
   12541         assign( result, binop(Iop_Add64,
   12542                               binop(Iop_Sub64,
   12543                                     mkexpr(pr_1),
   12544                                     mkexpr(pr_2)
   12545                               ),
   12546                               binop(Iop_32HLto64,
   12547                                     mkexpr(irt_rDhi),
   12548                                     mkexpr(irt_rDlo)
   12549                               )
   12550                         )
   12551         );
   12552         assign( resHi, unop(Iop_64HIto32, mkexpr(result)) );
   12553         assign( resLo, unop(Iop_64to32, mkexpr(result)) );
   12554         if (isT) {
   12555            putIRegT( rDhi, mkexpr(resHi), condT );
   12556            putIRegT( rDlo, mkexpr(resLo), condT );
   12557         } else {
   12558            putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
   12559            putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
   12560         }
   12561         DIP("smlsld%c%s r%u, r%u, r%u, r%u\n",
   12562             m_swap ? 'x' : ' ', nCC(conq), rDlo, rDhi, rN, rM);
   12563         return True;
   12564      }
   12565      /* fall through */
   12566    }
   12567 
   12568    /* ---------- Doesn't match anything. ---------- */
   12569    return False;
   12570 
   12571 #  undef INSNA
   12572 #  undef INSNT0
   12573 #  undef INSNT1
   12574 }
   12575 
   12576 
   12577 /*------------------------------------------------------------*/
   12578 /*--- LDMxx/STMxx helper (both ARM and Thumb32)            ---*/
   12579 /*------------------------------------------------------------*/
   12580 
   12581 /* Generate IR for LDMxx and STMxx.  This is complex.  Assumes it's
   12582    unconditional, so the caller must produce a jump-around before
   12583    calling this, if the insn is to be conditional.  Caller is
   12584    responsible for all validation of parameters.  For LDMxx, if PC is
   12585    amongst the values loaded, caller is also responsible for
   12586    generating the jump. */
   12587 static void mk_ldm_stm ( Bool arm,     /* True: ARM, False: Thumb */
   12588                          UInt rN,      /* base reg */
   12589                          UInt bINC,    /* 1: inc,  0: dec */
   12590                          UInt bBEFORE, /* 1: inc/dec before, 0: after */
   12591                          UInt bW,      /* 1: writeback to Rn */
   12592                          UInt bL,      /* 1: load, 0: store */
   12593                          UInt regList )
   12594 {
   12595    Int i, r, m, nRegs;
   12596    IRTemp jk = Ijk_Boring;
   12597 
   12598    /* Get hold of the old Rn value.  We might need to write its value
   12599       to memory during a store, and if it's also the writeback
   12600       register then we need to get its value now.  We can't treat it
   12601       exactly like the other registers we're going to transfer,
   12602       because for xxMDA and xxMDB writeback forms, the generated IR
   12603       updates Rn in the guest state before any transfers take place.
   12604       We have to do this as per comments below, in order that if Rn is
   12605       the stack pointer then it always has a value is below or equal
   12606       to any of the transfer addresses.  Ick. */
   12607    IRTemp oldRnT = newTemp(Ity_I32);
   12608    assign(oldRnT, arm ? getIRegA(rN) : getIRegT(rN));
   12609 
   12610    IRTemp anchorT = newTemp(Ity_I32);
   12611    /* The old (Addison-Wesley) ARM ARM seems to say that LDMxx/STMxx
   12612       ignore the bottom two bits of the address.  However, Cortex-A8
   12613       doesn't seem to care.  Hence: */
   12614    /* No .. don't force alignment .. */
   12615    /* assign(anchorT, binop(Iop_And32, mkexpr(oldRnT), mkU32(~3U))); */
   12616    /* Instead, use the potentially misaligned address directly. */
   12617    assign(anchorT, mkexpr(oldRnT));
   12618 
   12619    IROp opADDorSUB = bINC ? Iop_Add32 : Iop_Sub32;
   12620    // bINC == 1:  xxMIA, xxMIB
   12621    // bINC == 0:  xxMDA, xxMDB
   12622 
   12623    // For xxMDA and xxMDB, update Rn first if necessary.  We have
   12624    // to do this first so that, for the common idiom of the transfers
   12625    // faulting because we're pushing stuff onto a stack and the stack
   12626    // is growing down onto allocate-on-fault pages (as Valgrind simulates),
   12627    // we need to have the SP up-to-date "covering" (pointing below) the
   12628    // transfer area.  For the same reason, if we are doing xxMIA or xxMIB,
   12629    // do the transfer first, and then update rN afterwards.
   12630    nRegs = 0;
   12631    for (i = 0; i < 16; i++) {
   12632      if ((regList & (1 << i)) != 0)
   12633          nRegs++;
   12634    }
   12635    if (bW == 1 && !bINC) {
   12636       IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
   12637       if (arm)
   12638          putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
   12639       else
   12640          putIRegT( rN, e, IRTemp_INVALID );
   12641    }
   12642 
   12643    // Make up a list of the registers to transfer, and their offsets
   12644    // in memory relative to the anchor.  If the base reg (Rn) is part
   12645    // of the transfer, then do it last for a load and first for a store.
   12646    UInt xReg[16], xOff[16];
   12647    Int  nX = 0;
   12648    m = 0;
   12649    for (i = 0; i < 16; i++) {
   12650       r = bINC ? i : (15-i);
   12651       if (0 == (regList & (1<<r)))
   12652          continue;
   12653       if (bBEFORE)
   12654          m++;
   12655       /* paranoia: check we aren't transferring the writeback
   12656          register during a load. Should be assured by decode-point
   12657          check above. */
   12658       if (bW == 1 && bL == 1)
   12659          vassert(r != rN);
   12660 
   12661       xOff[nX] = 4 * m;
   12662       xReg[nX] = r;
   12663       nX++;
   12664 
   12665       if (!bBEFORE)
   12666          m++;
   12667    }
   12668    vassert(m == nRegs);
   12669    vassert(nX == nRegs);
   12670    vassert(nX <= 16);
   12671 
   12672    if (bW == 0 && (regList & (1<<rN)) != 0) {
   12673       /* Non-writeback, and basereg is to be transferred.  Do its
   12674          transfer last for a load and first for a store.  Requires
   12675          reordering xOff/xReg. */
   12676       if (0) {
   12677          vex_printf("\nREG_LIST_PRE: (rN=%d)\n", rN);
   12678          for (i = 0; i < nX; i++)
   12679             vex_printf("reg %d   off %d\n", xReg[i], xOff[i]);
   12680          vex_printf("\n");
   12681       }
   12682 
   12683       vassert(nX > 0);
   12684       for (i = 0; i < nX; i++) {
   12685          if (xReg[i] == rN)
   12686              break;
   12687       }
   12688       vassert(i < nX); /* else we didn't find it! */
   12689       UInt tReg = xReg[i];
   12690       UInt tOff = xOff[i];
   12691       if (bL == 1) {
   12692          /* load; make this transfer happen last */
   12693          if (i < nX-1) {
   12694             for (m = i+1; m < nX; m++) {
   12695                xReg[m-1] = xReg[m];
   12696                xOff[m-1] = xOff[m];
   12697             }
   12698             vassert(m == nX);
   12699             xReg[m-1] = tReg;
   12700             xOff[m-1] = tOff;
   12701          }
   12702       } else {
   12703          /* store; make this transfer happen first */
   12704          if (i > 0) {
   12705             for (m = i-1; m >= 0; m--) {
   12706                xReg[m+1] = xReg[m];
   12707                xOff[m+1] = xOff[m];
   12708             }
   12709             vassert(m == -1);
   12710             xReg[0] = tReg;
   12711             xOff[0] = tOff;
   12712          }
   12713       }
   12714 
   12715       if (0) {
   12716          vex_printf("REG_LIST_POST:\n");
   12717          for (i = 0; i < nX; i++)
   12718             vex_printf("reg %d   off %d\n", xReg[i], xOff[i]);
   12719          vex_printf("\n");
   12720       }
   12721    }
   12722 
   12723    /* According to the Cortex A8 TRM Sec. 5.2.1, LDM(1) with r13 as the base
   12724        register and PC in the register list is a return for purposes of branch
   12725        prediction.
   12726       The ARM ARM Sec. C9.10.1 further specifies that writeback must be enabled
   12727        to be counted in event 0x0E (Procedure return).*/
   12728    if (rN == 13 && bL == 1 && bINC && !bBEFORE && bW == 1) {
   12729       jk = Ijk_Ret;
   12730    }
   12731 
   12732    /* Actually generate the transfers */
   12733    for (i = 0; i < nX; i++) {
   12734       r = xReg[i];
   12735       if (bL == 1) {
   12736          IRExpr* e = loadLE(Ity_I32,
   12737                             binop(opADDorSUB, mkexpr(anchorT),
   12738                                   mkU32(xOff[i])));
   12739          if (arm) {
   12740             putIRegA( r, e, IRTemp_INVALID, jk );
   12741          } else {
   12742             // no: putIRegT( r, e, IRTemp_INVALID );
   12743             // putIRegT refuses to write to R15.  But that might happen.
   12744             // Since this is uncond, and we need to be able to
   12745             // write the PC, just use the low level put:
   12746             llPutIReg( r, e );
   12747          }
   12748       } else {
   12749          /* if we're storing Rn, make sure we use the correct
   12750             value, as per extensive comments above */
   12751          storeLE( binop(opADDorSUB, mkexpr(anchorT), mkU32(xOff[i])),
   12752                   r == rN ? mkexpr(oldRnT)
   12753                           : (arm ? getIRegA(r) : getIRegT(r) ) );
   12754       }
   12755    }
   12756 
   12757    // If we are doing xxMIA or xxMIB,
   12758    // do the transfer first, and then update rN afterwards.
   12759    if (bW == 1 && bINC) {
   12760       IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
   12761       if (arm)
   12762          putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
   12763       else
   12764          putIRegT( rN, e, IRTemp_INVALID );
   12765    }
   12766 }
   12767 
   12768 
   12769 /*------------------------------------------------------------*/
   12770 /*--- VFP (CP 10 and 11) instructions                      ---*/
   12771 /*------------------------------------------------------------*/
   12772 
   12773 /* Both ARM and Thumb */
   12774 
   12775 /* Translate a CP10 or CP11 instruction.  If successful, returns
   12776    True and *dres may or may not be updated.  If failure, returns
   12777    False and doesn't change *dres nor create any IR.
   12778 
   12779    The ARM and Thumb encodings are identical for the low 28 bits of
   12780    the insn (yay!) and that's what the caller must supply, iow, imm28
   12781    has the top 4 bits masked out.  Caller is responsible for
   12782    determining whether the masked-out bits are valid for a CP10/11
   12783    insn.  The rules for the top 4 bits are:
   12784 
   12785      ARM: 0000 to 1110 allowed, and this is the gating condition.
   12786      1111 (NV) is not allowed.
   12787 
   12788      Thumb: must be 1110.  The gating condition is taken from
   12789      ITSTATE in the normal way.
   12790 
   12791    Conditionalisation:
   12792 
   12793    Caller must supply an IRTemp 'condT' holding the gating condition,
   12794    or IRTemp_INVALID indicating the insn is always executed.
   12795 
   12796    Caller must also supply an ARMCondcode 'cond'.  This is only used
   12797    for debug printing, no other purpose.  For ARM, this is simply the
   12798    top 4 bits of the original instruction.  For Thumb, the condition
   12799    is not (really) known until run time, and so ARMCondAL should be
   12800    passed, only so that printing of these instructions does not show
   12801    any condition.
   12802 
   12803    Finally, the caller must indicate whether this occurs in ARM or
   12804    Thumb code.
   12805 */
   12806 static Bool decode_CP10_CP11_instruction (
   12807                /*MOD*/DisResult* dres,
   12808                UInt              insn28,
   12809                IRTemp            condT,
   12810                ARMCondcode       conq,
   12811                Bool              isT
   12812             )
   12813 {
   12814 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn28, (_bMax), (_bMin))
   12815 
   12816    vassert(INSN(31,28) == BITS4(0,0,0,0)); // caller's obligation
   12817 
   12818    if (isT) {
   12819       vassert(conq == ARMCondAL);
   12820    } else {
   12821       vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
   12822    }
   12823 
   12824    /* ----------------------------------------------------------- */
   12825    /* -- VFP instructions -- double precision (mostly)         -- */
   12826    /* ----------------------------------------------------------- */
   12827 
   12828    /* --------------------- fldmx, fstmx --------------------- */
   12829    /*
   12830                                  31   27   23   19 15 11   7   0
   12831                                          P U WL
   12832       C4-100, C5-26  1  FSTMX    cond 1100 1000 Rn Dd 1011 offset
   12833       C4-100, C5-28  2  FSTMIAX  cond 1100 1010 Rn Dd 1011 offset
   12834       C4-100, C5-30  3  FSTMDBX  cond 1101 0010 Rn Dd 1011 offset
   12835 
   12836       C4-42, C5-26   1  FLDMX    cond 1100 1001 Rn Dd 1011 offset
   12837       C4-42, C5-28   2  FLDMIAX  cond 1100 1011 Rn Dd 1011 offset
   12838       C4-42, C5-30   3  FLDMDBX  cond 1101 0011 Rn Dd 1011 offset
   12839 
   12840       Regs transferred: Dd .. D(d + (offset-3)/2)
   12841       offset must be odd, must not imply a reg > 15
   12842       IA/DB: Rn is changed by (4 + 8 x # regs transferred)
   12843 
   12844       case coding:
   12845          1  at-Rn   (access at Rn)
   12846          2  ia-Rn   (access at Rn, then Rn += 4+8n)
   12847          3  db-Rn   (Rn -= 4+8n,   then access at Rn)
   12848    */
   12849    if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
   12850        && INSN(11,8) == BITS4(1,0,1,1)) {
   12851       UInt bP      = (insn28 >> 24) & 1;
   12852       UInt bU      = (insn28 >> 23) & 1;
   12853       UInt bW      = (insn28 >> 21) & 1;
   12854       UInt bL      = (insn28 >> 20) & 1;
   12855       UInt offset  = (insn28 >> 0) & 0xFF;
   12856       UInt rN      = INSN(19,16);
   12857       UInt dD      = (INSN(22,22) << 4) | INSN(15,12);
   12858       UInt nRegs   = (offset - 1) / 2;
   12859       UInt summary = 0;
   12860       Int  i;
   12861 
   12862       /**/ if (bP == 0 && bU == 1 && bW == 0) {
   12863          summary = 1;
   12864       }
   12865       else if (bP == 0 && bU == 1 && bW == 1) {
   12866          summary = 2;
   12867       }
   12868       else if (bP == 1 && bU == 0 && bW == 1) {
   12869          summary = 3;
   12870       }
   12871       else goto after_vfp_fldmx_fstmx;
   12872 
   12873       /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
   12874       if (rN == 15 && (summary == 2 || summary == 3 || isT))
   12875          goto after_vfp_fldmx_fstmx;
   12876 
   12877       /* offset must be odd, and specify at least one register */
   12878       if (0 == (offset & 1) || offset < 3)
   12879          goto after_vfp_fldmx_fstmx;
   12880 
   12881       /* can't transfer regs after D15 */
   12882       if (dD + nRegs - 1 >= 32)
   12883          goto after_vfp_fldmx_fstmx;
   12884 
   12885       /* Now, we can't do a conditional load or store, since that very
   12886          likely will generate an exception.  So we have to take a side
   12887          exit at this point if the condition is false. */
   12888       if (condT != IRTemp_INVALID) {
   12889          if (isT)
   12890             mk_skip_over_T32_if_cond_is_false( condT );
   12891          else
   12892             mk_skip_over_A32_if_cond_is_false( condT );
   12893          condT = IRTemp_INVALID;
   12894       }
   12895       /* Ok, now we're unconditional.  Do the load or store. */
   12896 
   12897       /* get the old Rn value */
   12898       IRTemp rnT = newTemp(Ity_I32);
   12899       assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
   12900                            rN == 15));
   12901 
   12902       /* make a new value for Rn, post-insn */
   12903       IRTemp rnTnew = IRTemp_INVALID;
   12904       if (summary == 2 || summary == 3) {
   12905          rnTnew = newTemp(Ity_I32);
   12906          assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
   12907                               mkexpr(rnT),
   12908                               mkU32(4 + 8 * nRegs)));
   12909       }
   12910 
   12911       /* decide on the base transfer address */
   12912       IRTemp taT = newTemp(Ity_I32);
   12913       assign(taT,  summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
   12914 
   12915       /* update Rn if necessary -- in case 3, we're moving it down, so
   12916          update before any memory reference, in order to keep Memcheck
   12917          and V's stack-extending logic (on linux) happy */
   12918       if (summary == 3) {
   12919          if (isT)
   12920             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
   12921          else
   12922             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
   12923       }
   12924 
   12925       /* generate the transfers */
   12926       for (i = 0; i < nRegs; i++) {
   12927          IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
   12928          if (bL) {
   12929             putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
   12930          } else {
   12931             storeLE(addr, getDReg(dD + i));
   12932          }
   12933       }
   12934 
   12935       /* update Rn if necessary -- in case 2, we're moving it up, so
   12936          update after any memory reference, in order to keep Memcheck
   12937          and V's stack-extending logic (on linux) happy */
   12938       if (summary == 2) {
   12939          if (isT)
   12940             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
   12941          else
   12942             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
   12943       }
   12944 
   12945       const HChar* nm = bL==1 ? "ld" : "st";
   12946       switch (summary) {
   12947          case 1:  DIP("f%smx%s r%u, {d%u-d%u}\n",
   12948                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
   12949                   break;
   12950          case 2:  DIP("f%smiax%s r%u!, {d%u-d%u}\n",
   12951                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
   12952                   break;
   12953          case 3:  DIP("f%smdbx%s r%u!, {d%u-d%u}\n",
   12954                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
   12955                   break;
   12956          default: vassert(0);
   12957       }
   12958 
   12959       goto decode_success_vfp;
   12960       /* FIXME alignment constraints? */
   12961    }
   12962 
   12963   after_vfp_fldmx_fstmx:
   12964 
   12965    /* --------------------- fldmd, fstmd --------------------- */
   12966    /*
   12967                                  31   27   23   19 15 11   7   0
   12968                                          P U WL
   12969       C4-96, C5-26   1  FSTMD    cond 1100 1000 Rn Dd 1011 offset
   12970       C4-96, C5-28   2  FSTMDIA  cond 1100 1010 Rn Dd 1011 offset
   12971       C4-96, C5-30   3  FSTMDDB  cond 1101 0010 Rn Dd 1011 offset
   12972 
   12973       C4-38, C5-26   1  FLDMD    cond 1100 1001 Rn Dd 1011 offset
   12974       C4-38, C5-28   2  FLDMIAD  cond 1100 1011 Rn Dd 1011 offset
   12975       C4-38, C5-30   3  FLDMDBD  cond 1101 0011 Rn Dd 1011 offset
   12976 
   12977       Regs transferred: Dd .. D(d + (offset-2)/2)
   12978       offset must be even, must not imply a reg > 15
   12979       IA/DB: Rn is changed by (8 x # regs transferred)
   12980 
   12981       case coding:
   12982          1  at-Rn   (access at Rn)
   12983          2  ia-Rn   (access at Rn, then Rn += 8n)
   12984          3  db-Rn   (Rn -= 8n,     then access at Rn)
   12985    */
   12986    if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
   12987        && INSN(11,8) == BITS4(1,0,1,1)) {
   12988       UInt bP      = (insn28 >> 24) & 1;
   12989       UInt bU      = (insn28 >> 23) & 1;
   12990       UInt bW      = (insn28 >> 21) & 1;
   12991       UInt bL      = (insn28 >> 20) & 1;
   12992       UInt offset  = (insn28 >> 0) & 0xFF;
   12993       UInt rN      = INSN(19,16);
   12994       UInt dD      = (INSN(22,22) << 4) | INSN(15,12);
   12995       UInt nRegs   = offset / 2;
   12996       UInt summary = 0;
   12997       Int  i;
   12998 
   12999       /**/ if (bP == 0 && bU == 1 && bW == 0) {
   13000          summary = 1;
   13001       }
   13002       else if (bP == 0 && bU == 1 && bW == 1) {
   13003          summary = 2;
   13004       }
   13005       else if (bP == 1 && bU == 0 && bW == 1) {
   13006          summary = 3;
   13007       }
   13008       else goto after_vfp_fldmd_fstmd;
   13009 
   13010       /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
   13011       if (rN == 15 && (summary == 2 || summary == 3 || isT))
   13012          goto after_vfp_fldmd_fstmd;
   13013 
   13014       /* offset must be even, and specify at least one register */
   13015       if (1 == (offset & 1) || offset < 2)
   13016          goto after_vfp_fldmd_fstmd;
   13017 
   13018       /* can't transfer regs after D15 */
   13019       if (dD + nRegs - 1 >= 32)
   13020          goto after_vfp_fldmd_fstmd;
   13021 
   13022       /* Now, we can't do a conditional load or store, since that very
   13023          likely will generate an exception.  So we have to take a side
   13024          exit at this point if the condition is false. */
   13025       if (condT != IRTemp_INVALID) {
   13026          if (isT)
   13027             mk_skip_over_T32_if_cond_is_false( condT );
   13028          else
   13029             mk_skip_over_A32_if_cond_is_false( condT );
   13030          condT = IRTemp_INVALID;
   13031       }
   13032       /* Ok, now we're unconditional.  Do the load or store. */
   13033 
   13034       /* get the old Rn value */
   13035       IRTemp rnT = newTemp(Ity_I32);
   13036       assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
   13037                            rN == 15));
   13038 
   13039       /* make a new value for Rn, post-insn */
   13040       IRTemp rnTnew = IRTemp_INVALID;
   13041       if (summary == 2 || summary == 3) {
   13042          rnTnew = newTemp(Ity_I32);
   13043          assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
   13044                               mkexpr(rnT),
   13045                               mkU32(8 * nRegs)));
   13046       }
   13047 
   13048       /* decide on the base transfer address */
   13049       IRTemp taT = newTemp(Ity_I32);
   13050       assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
   13051 
   13052       /* update Rn if necessary -- in case 3, we're moving it down, so
   13053          update before any memory reference, in order to keep Memcheck
   13054          and V's stack-extending logic (on linux) happy */
   13055       if (summary == 3) {
   13056          if (isT)
   13057             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
   13058          else
   13059             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
   13060       }
   13061 
   13062       /* generate the transfers */
   13063       for (i = 0; i < nRegs; i++) {
   13064          IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
   13065          if (bL) {
   13066             putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
   13067          } else {
   13068             storeLE(addr, getDReg(dD + i));
   13069          }
   13070       }
   13071 
   13072       /* update Rn if necessary -- in case 2, we're moving it up, so
   13073          update after any memory reference, in order to keep Memcheck
   13074          and V's stack-extending logic (on linux) happy */
   13075       if (summary == 2) {
   13076          if (isT)
   13077             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
   13078          else
   13079             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
   13080       }
   13081 
   13082       const HChar* nm = bL==1 ? "ld" : "st";
   13083       switch (summary) {
   13084          case 1:  DIP("f%smd%s r%u, {d%u-d%u}\n",
   13085                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
   13086                   break;
   13087          case 2:  DIP("f%smiad%s r%u!, {d%u-d%u}\n",
   13088                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
   13089                   break;
   13090          case 3:  DIP("f%smdbd%s r%u!, {d%u-d%u}\n",
   13091                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
   13092                   break;
   13093          default: vassert(0);
   13094       }
   13095 
   13096       goto decode_success_vfp;
   13097       /* FIXME alignment constraints? */
   13098    }
   13099 
   13100   after_vfp_fldmd_fstmd:
   13101 
   13102    /* ------------------- fmrx, fmxr ------------------- */
   13103    if (BITS8(1,1,1,0,1,1,1,1) == INSN(27,20)
   13104        && BITS4(1,0,1,0) == INSN(11,8)
   13105        && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
   13106       UInt rD  = INSN(15,12);
   13107       UInt reg = INSN(19,16);
   13108       if (reg == BITS4(0,0,0,1)) {
   13109          if (rD == 15) {
   13110             IRTemp nzcvT = newTemp(Ity_I32);
   13111             /* When rD is 15, we are copying the top 4 bits of FPSCR
   13112                into CPSR.  That is, set the flags thunk to COPY and
   13113                install FPSCR[31:28] as the value to copy. */
   13114             assign(nzcvT, binop(Iop_And32,
   13115                                 IRExpr_Get(OFFB_FPSCR, Ity_I32),
   13116                                 mkU32(0xF0000000)));
   13117             setFlags_D1(ARMG_CC_OP_COPY, nzcvT, condT);
   13118             DIP("fmstat%s\n", nCC(conq));
   13119          } else {
   13120             /* Otherwise, merely transfer FPSCR to r0 .. r14. */
   13121             IRExpr* e = IRExpr_Get(OFFB_FPSCR, Ity_I32);
   13122             if (isT)
   13123                putIRegT(rD, e, condT);
   13124             else
   13125                putIRegA(rD, e, condT, Ijk_Boring);
   13126             DIP("fmrx%s r%u, fpscr\n", nCC(conq), rD);
   13127          }
   13128          goto decode_success_vfp;
   13129       }
   13130       /* fall through */
   13131    }
   13132 
   13133    if (BITS8(1,1,1,0,1,1,1,0) == INSN(27,20)
   13134        && BITS4(1,0,1,0) == INSN(11,8)
   13135        && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
   13136       UInt rD  = INSN(15,12);
   13137       UInt reg = INSN(19,16);
   13138       if (reg == BITS4(0,0,0,1)) {
   13139          putMiscReg32(OFFB_FPSCR,
   13140                       isT ? getIRegT(rD) : getIRegA(rD), condT);
   13141          DIP("fmxr%s fpscr, r%u\n", nCC(conq), rD);
   13142          goto decode_success_vfp;
   13143       }
   13144       /* fall through */
   13145    }
   13146 
   13147    /* --------------------- vmov --------------------- */
   13148    // VMOV dM, rD, rN
   13149    if (0x0C400B10 == (insn28 & 0x0FF00FD0)) {
   13150       UInt dM = INSN(3,0) | (INSN(5,5) << 4);
   13151       UInt rD = INSN(15,12); /* lo32 */
   13152       UInt rN = INSN(19,16); /* hi32 */
   13153       if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))) {
   13154          /* fall through */
   13155       } else {
   13156          putDReg(dM,
   13157                  unop(Iop_ReinterpI64asF64,
   13158                       binop(Iop_32HLto64,
   13159                             isT ? getIRegT(rN) : getIRegA(rN),
   13160                             isT ? getIRegT(rD) : getIRegA(rD))),
   13161                  condT);
   13162          DIP("vmov%s d%u, r%u, r%u\n", nCC(conq), dM, rD, rN);
   13163          goto decode_success_vfp;
   13164       }
   13165       /* fall through */
   13166    }
   13167 
   13168    // VMOV rD, rN, dM
   13169    if (0x0C500B10 == (insn28 & 0x0FF00FD0)) {
   13170       UInt dM = INSN(3,0) | (INSN(5,5) << 4);
   13171       UInt rD = INSN(15,12); /* lo32 */
   13172       UInt rN = INSN(19,16); /* hi32 */
   13173       if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))
   13174           || rD == rN) {
   13175          /* fall through */
   13176       } else {
   13177          IRTemp i64 = newTemp(Ity_I64);
   13178          assign(i64, unop(Iop_ReinterpF64asI64, getDReg(dM)));
   13179          IRExpr* hi32 = unop(Iop_64HIto32, mkexpr(i64));
   13180          IRExpr* lo32 = unop(Iop_64to32,   mkexpr(i64));
   13181          if (isT) {
   13182             putIRegT(rN, hi32, condT);
   13183             putIRegT(rD, lo32, condT);
   13184          } else {
   13185             putIRegA(rN, hi32, condT, Ijk_Boring);
   13186             putIRegA(rD, lo32, condT, Ijk_Boring);
   13187          }
   13188          DIP("vmov%s r%u, r%u, d%u\n", nCC(conq), rD, rN, dM);
   13189          goto decode_success_vfp;
   13190       }
   13191       /* fall through */
   13192    }
   13193 
   13194    // VMOV sD, sD+1, rN, rM
   13195    if (0x0C400A10 == (insn28 & 0x0FF00FD0)) {
   13196       UInt sD = (INSN(3,0) << 1) | INSN(5,5);
   13197       UInt rN = INSN(15,12);
   13198       UInt rM = INSN(19,16);
   13199       if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
   13200           || sD == 31) {
   13201          /* fall through */
   13202       } else {
   13203          putFReg(sD,
   13204                  unop(Iop_ReinterpI32asF32, isT ? getIRegT(rN) : getIRegA(rN)),
   13205                  condT);
   13206          putFReg(sD+1,
   13207                  unop(Iop_ReinterpI32asF32, isT ? getIRegT(rM) : getIRegA(rM)),
   13208                  condT);
   13209          DIP("vmov%s, s%u, s%u, r%u, r%u\n",
   13210               nCC(conq), sD, sD + 1, rN, rM);
   13211          goto decode_success_vfp;
   13212       }
   13213    }
   13214 
   13215    // VMOV rN, rM, sD, sD+1
   13216    if (0x0C500A10 == (insn28 & 0x0FF00FD0)) {
   13217       UInt sD = (INSN(3,0) << 1) | INSN(5,5);
   13218       UInt rN = INSN(15,12);
   13219       UInt rM = INSN(19,16);
   13220       if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
   13221           || sD == 31 || rN == rM) {
   13222          /* fall through */
   13223       } else {
   13224          IRExpr* res0 = unop(Iop_ReinterpF32asI32, getFReg(sD));
   13225          IRExpr* res1 = unop(Iop_ReinterpF32asI32, getFReg(sD+1));
   13226          if (isT) {
   13227             putIRegT(rN, res0, condT);
   13228             putIRegT(rM, res1, condT);
   13229          } else {
   13230             putIRegA(rN, res0, condT, Ijk_Boring);
   13231             putIRegA(rM, res1, condT, Ijk_Boring);
   13232          }
   13233          DIP("vmov%s, r%u, r%u, s%u, s%u\n",
   13234              nCC(conq), rN, rM, sD, sD + 1);
   13235          goto decode_success_vfp;
   13236       }
   13237    }
   13238 
   13239    // VMOV rD[x], rT  (ARM core register to scalar)
   13240    if (0x0E000B10 == (insn28 & 0x0F900F1F)) {
   13241       UInt rD  = (INSN(7,7) << 4) | INSN(19,16);
   13242       UInt rT  = INSN(15,12);
   13243       UInt opc = (INSN(22,21) << 2) | INSN(6,5);
   13244       UInt index;
   13245       if (rT == 15 || (isT && rT == 13)) {
   13246          /* fall through */
   13247       } else {
   13248          if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
   13249             index = opc & 7;
   13250             putDRegI64(rD, triop(Iop_SetElem8x8,
   13251                                  getDRegI64(rD),
   13252                                  mkU8(index),
   13253                                  unop(Iop_32to8,
   13254                                       isT ? getIRegT(rT) : getIRegA(rT))),
   13255                            condT);
   13256             DIP("vmov%s.8 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
   13257             goto decode_success_vfp;
   13258          }
   13259          else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
   13260             index = (opc >> 1) & 3;
   13261             putDRegI64(rD, triop(Iop_SetElem16x4,
   13262                                  getDRegI64(rD),
   13263                                  mkU8(index),
   13264                                  unop(Iop_32to16,
   13265                                       isT ? getIRegT(rT) : getIRegA(rT))),
   13266                            condT);
   13267             DIP("vmov%s.16 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
   13268             goto decode_success_vfp;
   13269          }
   13270          else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0)) {
   13271             index = (opc >> 2) & 1;
   13272             putDRegI64(rD, triop(Iop_SetElem32x2,
   13273                                  getDRegI64(rD),
   13274                                  mkU8(index),
   13275                                  isT ? getIRegT(rT) : getIRegA(rT)),
   13276                            condT);
   13277             DIP("vmov%s.32 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
   13278             goto decode_success_vfp;
   13279          } else {
   13280             /* fall through */
   13281          }
   13282       }
   13283    }
   13284 
   13285    // VMOV (scalar to ARM core register)
   13286    // VMOV rT, rD[x]
   13287    if (0x0E100B10 == (insn28 & 0x0F100F1F)) {
   13288       UInt rN  = (INSN(7,7) << 4) | INSN(19,16);
   13289       UInt rT  = INSN(15,12);
   13290       UInt U   = INSN(23,23);
   13291       UInt opc = (INSN(22,21) << 2) | INSN(6,5);
   13292       UInt index;
   13293       if (rT == 15 || (isT && rT == 13)) {
   13294          /* fall through */
   13295       } else {
   13296          if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
   13297             index = opc & 7;
   13298             IRExpr* e = unop(U ? Iop_8Uto32 : Iop_8Sto32,
   13299                              binop(Iop_GetElem8x8,
   13300                                    getDRegI64(rN),
   13301                                    mkU8(index)));
   13302             if (isT)
   13303                putIRegT(rT, e, condT);
   13304             else
   13305                putIRegA(rT, e, condT, Ijk_Boring);
   13306             DIP("vmov%s.%c8 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
   13307                   rT, rN, index);
   13308             goto decode_success_vfp;
   13309          }
   13310          else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
   13311             index = (opc >> 1) & 3;
   13312             IRExpr* e = unop(U ? Iop_16Uto32 : Iop_16Sto32,
   13313                              binop(Iop_GetElem16x4,
   13314                                    getDRegI64(rN),
   13315                                    mkU8(index)));
   13316             if (isT)
   13317                putIRegT(rT, e, condT);
   13318             else
   13319                putIRegA(rT, e, condT, Ijk_Boring);
   13320             DIP("vmov%s.%c16 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
   13321                   rT, rN, index);
   13322             goto decode_success_vfp;
   13323          }
   13324          else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0) && U == 0) {
   13325             index = (opc >> 2) & 1;
   13326             IRExpr* e = binop(Iop_GetElem32x2, getDRegI64(rN), mkU8(index));
   13327             if (isT)
   13328                putIRegT(rT, e, condT);
   13329             else
   13330                putIRegA(rT, e, condT, Ijk_Boring);
   13331             DIP("vmov%s.32 r%u, d%u[%u]\n", nCC(conq), rT, rN, index);
   13332             goto decode_success_vfp;
   13333          } else {
   13334             /* fall through */
   13335          }
   13336       }
   13337    }
   13338 
   13339    // VMOV.F32 sD, #imm
   13340    // FCONSTS sD, #imm
   13341    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   13342        && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,0)) {
   13343       UInt rD   = (INSN(15,12) << 1) | INSN(22,22);
   13344       UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
   13345       UInt b    = (imm8 >> 6) & 1;
   13346       UInt imm;
   13347       imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,(imm8 >> 5) & 1) << 8)
   13348              | ((imm8 & 0x1f) << 3);
   13349       imm <<= 16;
   13350       putFReg(rD, unop(Iop_ReinterpI32asF32, mkU32(imm)), condT);
   13351       DIP("fconsts%s s%u #%u", nCC(conq), rD, imm8);
   13352       goto decode_success_vfp;
   13353    }
   13354 
   13355    // VMOV.F64 dD, #imm
   13356    // FCONSTD dD, #imm
   13357    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   13358        && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,1)) {
   13359       UInt rD   = INSN(15,12) | (INSN(22,22) << 4);
   13360       UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
   13361       UInt b    = (imm8 >> 6) & 1;
   13362       ULong imm;
   13363       imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,b) << 8)
   13364              | BITS8(b,b,0,0,0,0,0,0) | (imm8 & 0x3f);
   13365       imm <<= 48;
   13366       putDReg(rD, unop(Iop_ReinterpI64asF64, mkU64(imm)), condT);
   13367       DIP("fconstd%s d%u #%u", nCC(conq), rD, imm8);
   13368       goto decode_success_vfp;
   13369    }
   13370 
   13371    /* ---------------------- vdup ------------------------- */
   13372    // VDUP dD, rT
   13373    // VDUP qD, rT
   13374    if (BITS8(1,1,1,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,1))
   13375        && BITS4(1,0,1,1) == INSN(11,8) && INSN(6,6) == 0 && INSN(4,4) == 1) {
   13376       UInt rD   = (INSN(7,7) << 4) | INSN(19,16);
   13377       UInt rT   = INSN(15,12);
   13378       UInt Q    = INSN(21,21);
   13379       UInt size = (INSN(22,22) << 1) | INSN(5,5);
   13380       if (rT == 15 || (isT && rT == 13) || size == 3 || (Q && (rD & 1))) {
   13381          /* fall through */
   13382       } else {
   13383          IRExpr* e = isT ? getIRegT(rT) : getIRegA(rT);
   13384          if (Q) {
   13385             rD >>= 1;
   13386             switch (size) {
   13387                case 0:
   13388                   putQReg(rD, unop(Iop_Dup32x4, e), condT);
   13389                   break;
   13390                case 1:
   13391                   putQReg(rD, unop(Iop_Dup16x8, unop(Iop_32to16, e)),
   13392                               condT);
   13393                   break;
   13394                case 2:
   13395                   putQReg(rD, unop(Iop_Dup8x16, unop(Iop_32to8, e)),
   13396                               condT);
   13397                   break;
   13398                default:
   13399                   vassert(0);
   13400             }
   13401             DIP("vdup.%u q%u, r%u\n", 32 / (1<<size), rD, rT);
   13402          } else {
   13403             switch (size) {
   13404                case 0:
   13405                   putDRegI64(rD, unop(Iop_Dup32x2, e), condT);
   13406                   break;
   13407                case 1:
   13408                   putDRegI64(rD, unop(Iop_Dup16x4, unop(Iop_32to16, e)),
   13409                                condT);
   13410                   break;
   13411                case 2:
   13412                   putDRegI64(rD, unop(Iop_Dup8x8, unop(Iop_32to8, e)),
   13413                                condT);
   13414                   break;
   13415                default:
   13416                   vassert(0);
   13417             }
   13418             DIP("vdup.%u d%u, r%u\n", 32 / (1<<size), rD, rT);
   13419          }
   13420          goto decode_success_vfp;
   13421       }
   13422    }
   13423 
   13424    /* --------------------- f{ld,st}d --------------------- */
   13425    // FLDD, FSTD
   13426    if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
   13427        && BITS4(1,0,1,1) == INSN(11,8)) {
   13428       UInt dD     = INSN(15,12) | (INSN(22,22) << 4);
   13429       UInt rN     = INSN(19,16);
   13430       UInt offset = (insn28 & 0xFF) << 2;
   13431       UInt bU     = (insn28 >> 23) & 1; /* 1: +offset  0: -offset */
   13432       UInt bL     = (insn28 >> 20) & 1; /* 1: load  0: store */
   13433       /* make unconditional */
   13434       if (condT != IRTemp_INVALID) {
   13435          if (isT)
   13436             mk_skip_over_T32_if_cond_is_false( condT );
   13437          else
   13438             mk_skip_over_A32_if_cond_is_false( condT );
   13439          condT = IRTemp_INVALID;
   13440       }
   13441       IRTemp ea = newTemp(Ity_I32);
   13442       assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
   13443                        align4if(isT ? getIRegT(rN) : getIRegA(rN),
   13444                                 rN == 15),
   13445                        mkU32(offset)));
   13446       if (bL) {
   13447          putDReg(dD, loadLE(Ity_F64,mkexpr(ea)), IRTemp_INVALID);
   13448       } else {
   13449          storeLE(mkexpr(ea), getDReg(dD));
   13450       }
   13451       DIP("f%sd%s d%u, [r%u, %c#%u]\n",
   13452           bL ? "ld" : "st", nCC(conq), dD, rN,
   13453           bU ? '+' : '-', offset);
   13454       goto decode_success_vfp;
   13455    }
   13456 
   13457    /* --------------------- dp insns (D) --------------------- */
   13458    if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
   13459        && BITS4(1,0,1,1) == INSN(11,8)
   13460        && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
   13461       UInt    dM  = INSN(3,0)   | (INSN(5,5) << 4);       /* argR */
   13462       UInt    dD  = INSN(15,12) | (INSN(22,22) << 4);   /* dst/acc */
   13463       UInt    dN  = INSN(19,16) | (INSN(7,7) << 4);     /* argL */
   13464       UInt    bP  = (insn28 >> 23) & 1;
   13465       UInt    bQ  = (insn28 >> 21) & 1;
   13466       UInt    bR  = (insn28 >> 20) & 1;
   13467       UInt    bS  = (insn28 >> 6) & 1;
   13468       UInt    opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
   13469       IRExpr* rm  = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
   13470       switch (opc) {
   13471          case BITS4(0,0,0,0): /* MAC: d + n * m */
   13472             putDReg(dD, triop(Iop_AddF64, rm,
   13473                               getDReg(dD),
   13474                               triop(Iop_MulF64, rm, getDReg(dN),
   13475                                                     getDReg(dM))),
   13476                         condT);
   13477             DIP("fmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13478             goto decode_success_vfp;
   13479          case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
   13480             putDReg(dD, triop(Iop_AddF64, rm,
   13481                               getDReg(dD),
   13482                               unop(Iop_NegF64,
   13483                                    triop(Iop_MulF64, rm, getDReg(dN),
   13484                                                          getDReg(dM)))),
   13485                         condT);
   13486             DIP("fnmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13487             goto decode_success_vfp;
   13488          case BITS4(0,0,1,0): /* MSC: - d + n * m */
   13489             putDReg(dD, triop(Iop_AddF64, rm,
   13490                               unop(Iop_NegF64, getDReg(dD)),
   13491                               triop(Iop_MulF64, rm, getDReg(dN),
   13492                                                     getDReg(dM))),
   13493                         condT);
   13494             DIP("fmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13495             goto decode_success_vfp;
   13496          case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
   13497             putDReg(dD, triop(Iop_AddF64, rm,
   13498                               unop(Iop_NegF64, getDReg(dD)),
   13499                               unop(Iop_NegF64,
   13500                                    triop(Iop_MulF64, rm, getDReg(dN),
   13501                                                          getDReg(dM)))),
   13502                         condT);
   13503             DIP("fnmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13504             goto decode_success_vfp;
   13505          case BITS4(0,1,0,0): /* MUL: n * m */
   13506             putDReg(dD, triop(Iop_MulF64, rm, getDReg(dN), getDReg(dM)),
   13507                         condT);
   13508             DIP("fmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13509             goto decode_success_vfp;
   13510          case BITS4(0,1,0,1): /* NMUL: - n * m */
   13511             putDReg(dD, unop(Iop_NegF64,
   13512                              triop(Iop_MulF64, rm, getDReg(dN),
   13513                                                    getDReg(dM))),
   13514                     condT);
   13515             DIP("fnmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13516             goto decode_success_vfp;
   13517          case BITS4(0,1,1,0): /* ADD: n + m */
   13518             putDReg(dD, triop(Iop_AddF64, rm, getDReg(dN), getDReg(dM)),
   13519                         condT);
   13520             DIP("faddd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13521             goto decode_success_vfp;
   13522          case BITS4(0,1,1,1): /* SUB: n - m */
   13523             putDReg(dD, triop(Iop_SubF64, rm, getDReg(dN), getDReg(dM)),
   13524                         condT);
   13525             DIP("fsubd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13526             goto decode_success_vfp;
   13527          case BITS4(1,0,0,0): /* DIV: n / m */
   13528             putDReg(dD, triop(Iop_DivF64, rm, getDReg(dN), getDReg(dM)),
   13529                         condT);
   13530             DIP("fdivd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13531             goto decode_success_vfp;
   13532          case BITS4(1,0,1,0): /* VNFMS: -(d - n * m) (fused) */
   13533             /* XXXROUNDINGFIXME look up ARM reference for fused
   13534                multiply-add rounding */
   13535             putDReg(dD, triop(Iop_AddF64, rm,
   13536                               unop(Iop_NegF64, getDReg(dD)),
   13537                               triop(Iop_MulF64, rm,
   13538                                                 getDReg(dN),
   13539                                                 getDReg(dM))),
   13540                         condT);
   13541             DIP("vfnmsd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13542             goto decode_success_vfp;
   13543          case BITS4(1,0,1,1): /* VNFMA: -(d + n * m) (fused) */
   13544             /* XXXROUNDINGFIXME look up ARM reference for fused
   13545                multiply-add rounding */
   13546             putDReg(dD, triop(Iop_AddF64, rm,
   13547                               unop(Iop_NegF64, getDReg(dD)),
   13548                               triop(Iop_MulF64, rm,
   13549                                                 unop(Iop_NegF64, getDReg(dN)),
   13550                                                 getDReg(dM))),
   13551                         condT);
   13552             DIP("vfnmad%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13553             goto decode_success_vfp;
   13554          case BITS4(1,1,0,0): /* VFMA: d + n * m (fused) */
   13555             /* XXXROUNDINGFIXME look up ARM reference for fused
   13556                multiply-add rounding */
   13557             putDReg(dD, triop(Iop_AddF64, rm,
   13558                               getDReg(dD),
   13559                               triop(Iop_MulF64, rm, getDReg(dN),
   13560                                                     getDReg(dM))),
   13561                         condT);
   13562             DIP("vfmad%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13563             goto decode_success_vfp;
   13564          case BITS4(1,1,0,1): /* VFMS: d + (-n * m) (fused) */
   13565             /* XXXROUNDINGFIXME look up ARM reference for fused
   13566                multiply-add rounding */
   13567             putDReg(dD, triop(Iop_AddF64, rm,
   13568                               getDReg(dD),
   13569                               triop(Iop_MulF64, rm,
   13570                                     unop(Iop_NegF64, getDReg(dN)),
   13571                                     getDReg(dM))),
   13572                         condT);
   13573             DIP("vfmsd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   13574             goto decode_success_vfp;
   13575          default:
   13576             break;
   13577       }
   13578    }
   13579 
   13580    /* --------------------- compares (D) --------------------- */
   13581    /*          31   27   23   19   15 11   7    3
   13582                  28   24   20   16 12    8    4    0
   13583       FCMPD    cond 1110 1D11 0100 Dd 1011 0100 Dm
   13584       FCMPED   cond 1110 1D11 0100 Dd 1011 1100 Dm
   13585       FCMPZD   cond 1110 1D11 0101 Dd 1011 0100 0000
   13586       FCMPZED  cond 1110 1D11 0101 Dd 1011 1100 0000
   13587                                  Z         N
   13588 
   13589       Z=0 Compare Dd vs Dm     and set FPSCR 31:28 accordingly
   13590       Z=1 Compare Dd vs zero
   13591 
   13592       N=1 generates Invalid Operation exn if either arg is any kind of NaN
   13593       N=0 generates Invalid Operation exn if either arg is a signalling NaN
   13594       (Not that we pay any attention to N here)
   13595    */
   13596    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   13597        && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
   13598        && BITS4(1,0,1,1) == INSN(11,8)
   13599        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   13600       UInt bZ = (insn28 >> 16) & 1;
   13601       UInt bN = (insn28 >> 7) & 1;
   13602       UInt dD = INSN(15,12) | (INSN(22,22) << 4);
   13603       UInt dM = INSN(3,0) | (INSN(5,5) << 4);
   13604       if (bZ && INSN(3,0) != 0) {
   13605          /* does not decode; fall through */
   13606       } else {
   13607          IRTemp argL = newTemp(Ity_F64);
   13608          IRTemp argR = newTemp(Ity_F64);
   13609          IRTemp irRes = newTemp(Ity_I32);
   13610          assign(argL, getDReg(dD));
   13611          assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0)) : getDReg(dM));
   13612          assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
   13613 
   13614          IRTemp nzcv     = IRTemp_INVALID;
   13615          IRTemp oldFPSCR = newTemp(Ity_I32);
   13616          IRTemp newFPSCR = newTemp(Ity_I32);
   13617 
   13618          /* This is where the fun starts.  We have to convert 'irRes'
   13619             from an IR-convention return result (IRCmpF64Result) to an
   13620             ARM-encoded (N,Z,C,V) group.  The final result is in the
   13621             bottom 4 bits of 'nzcv'. */
   13622          /* Map compare result from IR to ARM(nzcv) */
   13623          /*
   13624             FP cmp result | IR   | ARM(nzcv)
   13625             --------------------------------
   13626             UN              0x45   0011
   13627             LT              0x01   1000
   13628             GT              0x00   0010
   13629             EQ              0x40   0110
   13630          */
   13631          nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
   13632 
   13633          /* And update FPSCR accordingly */
   13634          assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
   13635          assign(newFPSCR,
   13636                 binop(Iop_Or32,
   13637                       binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
   13638                       binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
   13639 
   13640          putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
   13641 
   13642          if (bZ) {
   13643             DIP("fcmpz%sd%s d%u\n", bN ? "e" : "", nCC(conq), dD);
   13644          } else {
   13645             DIP("fcmp%sd%s d%u, d%u\n", bN ? "e" : "", nCC(conq), dD, dM);
   13646          }
   13647          goto decode_success_vfp;
   13648       }
   13649       /* fall through */
   13650    }
   13651 
   13652    /* --------------------- unary (D) --------------------- */
   13653    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   13654        && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
   13655        && BITS4(1,0,1,1) == INSN(11,8)
   13656        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   13657       UInt dD  = INSN(15,12) | (INSN(22,22) << 4);
   13658       UInt dM  = INSN(3,0) | (INSN(5,5) << 4);
   13659       UInt b16 = (insn28 >> 16) & 1;
   13660       UInt b7  = (insn28 >> 7) & 1;
   13661       /**/ if (b16 == 0 && b7 == 0) {
   13662          // FCPYD
   13663          putDReg(dD, getDReg(dM), condT);
   13664          DIP("fcpyd%s d%u, d%u\n", nCC(conq), dD, dM);
   13665          goto decode_success_vfp;
   13666       }
   13667       else if (b16 == 0 && b7 == 1) {
   13668          // FABSD
   13669          putDReg(dD, unop(Iop_AbsF64, getDReg(dM)), condT);
   13670          DIP("fabsd%s d%u, d%u\n", nCC(conq), dD, dM);
   13671          goto decode_success_vfp;
   13672       }
   13673       else if (b16 == 1 && b7 == 0) {
   13674          // FNEGD
   13675          putDReg(dD, unop(Iop_NegF64, getDReg(dM)), condT);
   13676          DIP("fnegd%s d%u, d%u\n", nCC(conq), dD, dM);
   13677          goto decode_success_vfp;
   13678       }
   13679       else if (b16 == 1 && b7 == 1) {
   13680          // FSQRTD
   13681          IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
   13682          putDReg(dD, binop(Iop_SqrtF64, rm, getDReg(dM)), condT);
   13683          DIP("fsqrtd%s d%u, d%u\n", nCC(conq), dD, dM);
   13684          goto decode_success_vfp;
   13685       }
   13686       else
   13687          vassert(0);
   13688 
   13689       /* fall through */
   13690    }
   13691 
   13692    /* ----------------- I <-> D conversions ----------------- */
   13693 
   13694    // F{S,U}ITOD dD, fM
   13695    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   13696        && BITS4(1,0,0,0) == (INSN(19,16) & BITS4(1,1,1,1))
   13697        && BITS4(1,0,1,1) == INSN(11,8)
   13698        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   13699       UInt bM    = (insn28 >> 5) & 1;
   13700       UInt fM    = (INSN(3,0) << 1) | bM;
   13701       UInt dD    = INSN(15,12) | (INSN(22,22) << 4);
   13702       UInt syned = (insn28 >> 7) & 1;
   13703       if (syned) {
   13704          // FSITOD
   13705          putDReg(dD, unop(Iop_I32StoF64,
   13706                           unop(Iop_ReinterpF32asI32, getFReg(fM))),
   13707                  condT);
   13708          DIP("fsitod%s d%u, s%u\n", nCC(conq), dD, fM);
   13709       } else {
   13710          // FUITOD
   13711          putDReg(dD, unop(Iop_I32UtoF64,
   13712                           unop(Iop_ReinterpF32asI32, getFReg(fM))),
   13713                  condT);
   13714          DIP("fuitod%s d%u, s%u\n", nCC(conq), dD, fM);
   13715       }
   13716       goto decode_success_vfp;
   13717    }
   13718 
   13719    // FTO{S,U}ID fD, dM
   13720    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   13721        && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
   13722        && BITS4(1,0,1,1) == INSN(11,8)
   13723        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   13724       UInt   bD    = (insn28 >> 22) & 1;
   13725       UInt   fD    = (INSN(15,12) << 1) | bD;
   13726       UInt   dM    = INSN(3,0) | (INSN(5,5) << 4);
   13727       UInt   bZ    = (insn28 >> 7) & 1;
   13728       UInt   syned = (insn28 >> 16) & 1;
   13729       IRTemp rmode = newTemp(Ity_I32);
   13730       assign(rmode, bZ ? mkU32(Irrm_ZERO)
   13731                        : mkexpr(mk_get_IR_rounding_mode()));
   13732       if (syned) {
   13733          // FTOSID
   13734          putFReg(fD, unop(Iop_ReinterpI32asF32,
   13735                           binop(Iop_F64toI32S, mkexpr(rmode),
   13736                                 getDReg(dM))),
   13737                  condT);
   13738          DIP("ftosi%sd%s s%u, d%u\n", bZ ? "z" : "",
   13739              nCC(conq), fD, dM);
   13740       } else {
   13741          // FTOUID
   13742          putFReg(fD, unop(Iop_ReinterpI32asF32,
   13743                           binop(Iop_F64toI32U, mkexpr(rmode),
   13744                                 getDReg(dM))),
   13745                  condT);
   13746          DIP("ftoui%sd%s s%u, d%u\n", bZ ? "z" : "",
   13747              nCC(conq), fD, dM);
   13748       }
   13749       goto decode_success_vfp;
   13750    }
   13751 
   13752    /* ----------------------------------------------------------- */
   13753    /* -- VFP instructions -- single precision                  -- */
   13754    /* ----------------------------------------------------------- */
   13755 
   13756    /* --------------------- fldms, fstms --------------------- */
   13757    /*
   13758                                  31   27   23   19 15 11   7   0
   13759                                          P UDWL
   13760       C4-98, C5-26   1  FSTMD    cond 1100 1x00 Rn Fd 1010 offset
   13761       C4-98, C5-28   2  FSTMDIA  cond 1100 1x10 Rn Fd 1010 offset
   13762       C4-98, C5-30   3  FSTMDDB  cond 1101 0x10 Rn Fd 1010 offset
   13763 
   13764       C4-40, C5-26   1  FLDMD    cond 1100 1x01 Rn Fd 1010 offset
   13765       C4-40, C5-26   2  FLDMIAD  cond 1100 1x11 Rn Fd 1010 offset
   13766       C4-40, C5-26   3  FLDMDBD  cond 1101 0x11 Rn Fd 1010 offset
   13767 
   13768       Regs transferred: F(Fd:D) .. F(Fd:d + offset)
   13769       offset must not imply a reg > 15
   13770       IA/DB: Rn is changed by (4 x # regs transferred)
   13771 
   13772       case coding:
   13773          1  at-Rn   (access at Rn)
   13774          2  ia-Rn   (access at Rn, then Rn += 4n)
   13775          3  db-Rn   (Rn -= 4n,     then access at Rn)
   13776    */
   13777    if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
   13778        && INSN(11,8) == BITS4(1,0,1,0)) {
   13779       UInt bP      = (insn28 >> 24) & 1;
   13780       UInt bU      = (insn28 >> 23) & 1;
   13781       UInt bW      = (insn28 >> 21) & 1;
   13782       UInt bL      = (insn28 >> 20) & 1;
   13783       UInt bD      = (insn28 >> 22) & 1;
   13784       UInt offset  = (insn28 >> 0) & 0xFF;
   13785       UInt rN      = INSN(19,16);
   13786       UInt fD      = (INSN(15,12) << 1) | bD;
   13787       UInt nRegs   = offset;
   13788       UInt summary = 0;
   13789       Int  i;
   13790 
   13791       /**/ if (bP == 0 && bU == 1 && bW == 0) {
   13792          summary = 1;
   13793       }
   13794       else if (bP == 0 && bU == 1 && bW == 1) {
   13795          summary = 2;
   13796       }
   13797       else if (bP == 1 && bU == 0 && bW == 1) {
   13798          summary = 3;
   13799       }
   13800       else goto after_vfp_fldms_fstms;
   13801 
   13802       /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
   13803       if (rN == 15 && (summary == 2 || summary == 3 || isT))
   13804          goto after_vfp_fldms_fstms;
   13805 
   13806       /* offset must specify at least one register */
   13807       if (offset < 1)
   13808          goto after_vfp_fldms_fstms;
   13809 
   13810       /* can't transfer regs after S31 */
   13811       if (fD + nRegs - 1 >= 32)
   13812          goto after_vfp_fldms_fstms;
   13813 
   13814       /* Now, we can't do a conditional load or store, since that very
   13815          likely will generate an exception.  So we have to take a side
   13816          exit at this point if the condition is false. */
   13817       if (condT != IRTemp_INVALID) {
   13818          if (isT)
   13819             mk_skip_over_T32_if_cond_is_false( condT );
   13820          else
   13821             mk_skip_over_A32_if_cond_is_false( condT );
   13822          condT = IRTemp_INVALID;
   13823       }
   13824       /* Ok, now we're unconditional.  Do the load or store. */
   13825 
   13826       /* get the old Rn value */
   13827       IRTemp rnT = newTemp(Ity_I32);
   13828       assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
   13829                            rN == 15));
   13830 
   13831       /* make a new value for Rn, post-insn */
   13832       IRTemp rnTnew = IRTemp_INVALID;
   13833       if (summary == 2 || summary == 3) {
   13834          rnTnew = newTemp(Ity_I32);
   13835          assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
   13836                               mkexpr(rnT),
   13837                               mkU32(4 * nRegs)));
   13838       }
   13839 
   13840       /* decide on the base transfer address */
   13841       IRTemp taT = newTemp(Ity_I32);
   13842       assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
   13843 
   13844       /* update Rn if necessary -- in case 3, we're moving it down, so
   13845          update before any memory reference, in order to keep Memcheck
   13846          and V's stack-extending logic (on linux) happy */
   13847       if (summary == 3) {
   13848          if (isT)
   13849             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
   13850          else
   13851             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
   13852       }
   13853 
   13854       /* generate the transfers */
   13855       for (i = 0; i < nRegs; i++) {
   13856          IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(4*i));
   13857          if (bL) {
   13858             putFReg(fD + i, loadLE(Ity_F32, addr), IRTemp_INVALID);
   13859          } else {
   13860             storeLE(addr, getFReg(fD + i));
   13861          }
   13862       }
   13863 
   13864       /* update Rn if necessary -- in case 2, we're moving it up, so
   13865          update after any memory reference, in order to keep Memcheck
   13866          and V's stack-extending logic (on linux) happy */
   13867       if (summary == 2) {
   13868          if (isT)
   13869             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
   13870          else
   13871             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
   13872       }
   13873 
   13874       const HChar* nm = bL==1 ? "ld" : "st";
   13875       switch (summary) {
   13876          case 1:  DIP("f%sms%s r%u, {s%u-s%u}\n",
   13877                       nm, nCC(conq), rN, fD, fD + nRegs - 1);
   13878                   break;
   13879          case 2:  DIP("f%smias%s r%u!, {s%u-s%u}\n",
   13880                       nm, nCC(conq), rN, fD, fD + nRegs - 1);
   13881                   break;
   13882          case 3:  DIP("f%smdbs%s r%u!, {s%u-s%u}\n",
   13883                       nm, nCC(conq), rN, fD, fD + nRegs - 1);
   13884                   break;
   13885          default: vassert(0);
   13886       }
   13887 
   13888       goto decode_success_vfp;
   13889       /* FIXME alignment constraints? */
   13890    }
   13891 
   13892   after_vfp_fldms_fstms:
   13893 
   13894    /* --------------------- fmsr, fmrs --------------------- */
   13895    if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
   13896        && BITS4(1,0,1,0) == INSN(11,8)
   13897        && BITS4(0,0,0,0) == INSN(3,0)
   13898        && BITS4(0,0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
   13899       UInt rD  = INSN(15,12);
   13900       UInt b7  = (insn28 >> 7) & 1;
   13901       UInt fN  = (INSN(19,16) << 1) | b7;
   13902       UInt b20 = (insn28 >> 20) & 1;
   13903       if (rD == 15) {
   13904          /* fall through */
   13905          /* Let's assume that no sane person would want to do
   13906             floating-point transfers to or from the program counter,
   13907             and simply decline to decode the instruction.  The ARM ARM
   13908             doesn't seem to explicitly disallow this case, though. */
   13909       } else {
   13910          if (b20) {
   13911             IRExpr* res = unop(Iop_ReinterpF32asI32, getFReg(fN));
   13912             if (isT)
   13913                putIRegT(rD, res, condT);
   13914             else
   13915                putIRegA(rD, res, condT, Ijk_Boring);
   13916             DIP("fmrs%s r%u, s%u\n", nCC(conq), rD, fN);
   13917          } else {
   13918             putFReg(fN, unop(Iop_ReinterpI32asF32,
   13919                              isT ? getIRegT(rD) : getIRegA(rD)),
   13920                         condT);
   13921             DIP("fmsr%s s%u, r%u\n", nCC(conq), fN, rD);
   13922          }
   13923          goto decode_success_vfp;
   13924       }
   13925       /* fall through */
   13926    }
   13927 
   13928    /* --------------------- f{ld,st}s --------------------- */
   13929    // FLDS, FSTS
   13930    if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
   13931        && BITS4(1,0,1,0) == INSN(11,8)) {
   13932       UInt bD     = (insn28 >> 22) & 1;
   13933       UInt fD     = (INSN(15,12) << 1) | bD;
   13934       UInt rN     = INSN(19,16);
   13935       UInt offset = (insn28 & 0xFF) << 2;
   13936       UInt bU     = (insn28 >> 23) & 1; /* 1: +offset  0: -offset */
   13937       UInt bL     = (insn28 >> 20) & 1; /* 1: load  0: store */
   13938       /* make unconditional */
   13939       if (condT != IRTemp_INVALID) {
   13940          if (isT)
   13941             mk_skip_over_T32_if_cond_is_false( condT );
   13942          else
   13943             mk_skip_over_A32_if_cond_is_false( condT );
   13944          condT = IRTemp_INVALID;
   13945       }
   13946       IRTemp ea = newTemp(Ity_I32);
   13947       assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
   13948                        align4if(isT ? getIRegT(rN) : getIRegA(rN),
   13949                                 rN == 15),
   13950                        mkU32(offset)));
   13951       if (bL) {
   13952          putFReg(fD, loadLE(Ity_F32,mkexpr(ea)), IRTemp_INVALID);
   13953       } else {
   13954          storeLE(mkexpr(ea), getFReg(fD));
   13955       }
   13956       DIP("f%ss%s s%u, [r%u, %c#%u]\n",
   13957           bL ? "ld" : "st", nCC(conq), fD, rN,
   13958           bU ? '+' : '-', offset);
   13959       goto decode_success_vfp;
   13960    }
   13961 
   13962    /* --------------------- dp insns (F) --------------------- */
   13963    if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
   13964        && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
   13965        && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
   13966       UInt    bM  = (insn28 >> 5) & 1;
   13967       UInt    bD  = (insn28 >> 22) & 1;
   13968       UInt    bN  = (insn28 >> 7) & 1;
   13969       UInt    fM  = (INSN(3,0) << 1) | bM;   /* argR */
   13970       UInt    fD  = (INSN(15,12) << 1) | bD; /* dst/acc */
   13971       UInt    fN  = (INSN(19,16) << 1) | bN; /* argL */
   13972       UInt    bP  = (insn28 >> 23) & 1;
   13973       UInt    bQ  = (insn28 >> 21) & 1;
   13974       UInt    bR  = (insn28 >> 20) & 1;
   13975       UInt    bS  = (insn28 >> 6) & 1;
   13976       UInt    opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
   13977       IRExpr* rm  = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
   13978       switch (opc) {
   13979          case BITS4(0,0,0,0): /* MAC: d + n * m */
   13980             putFReg(fD, triop(Iop_AddF32, rm,
   13981                               getFReg(fD),
   13982                               triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
   13983                         condT);
   13984             DIP("fmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   13985             goto decode_success_vfp;
   13986          case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
   13987             putFReg(fD, triop(Iop_AddF32, rm,
   13988                               getFReg(fD),
   13989                               unop(Iop_NegF32,
   13990                                    triop(Iop_MulF32, rm, getFReg(fN),
   13991                                                          getFReg(fM)))),
   13992                         condT);
   13993             DIP("fnmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   13994             goto decode_success_vfp;
   13995          case BITS4(0,0,1,0): /* MSC: - d + n * m */
   13996             putFReg(fD, triop(Iop_AddF32, rm,
   13997                               unop(Iop_NegF32, getFReg(fD)),
   13998                               triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
   13999                         condT);
   14000             DIP("fmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   14001             goto decode_success_vfp;
   14002          case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
   14003             putFReg(fD, triop(Iop_AddF32, rm,
   14004                               unop(Iop_NegF32, getFReg(fD)),
   14005                               unop(Iop_NegF32,
   14006                                    triop(Iop_MulF32, rm,
   14007                                                      getFReg(fN),
   14008                                                     getFReg(fM)))),
   14009                         condT);
   14010             DIP("fnmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   14011             goto decode_success_vfp;
   14012          case BITS4(0,1,0,0): /* MUL: n * m */
   14013             putFReg(fD, triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM)),
   14014                         condT);
   14015             DIP("fmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   14016             goto decode_success_vfp;
   14017          case BITS4(0,1,0,1): /* NMUL: - n * m */
   14018             putFReg(fD, unop(Iop_NegF32,
   14019                              triop(Iop_MulF32, rm, getFReg(fN),
   14020                                                    getFReg(fM))),
   14021                     condT);
   14022             DIP("fnmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   14023             goto decode_success_vfp;
   14024          case BITS4(0,1,1,0): /* ADD: n + m */
   14025             putFReg(fD, triop(Iop_AddF32, rm, getFReg(fN), getFReg(fM)),
   14026                         condT);
   14027             DIP("fadds%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   14028             goto decode_success_vfp;
   14029          case BITS4(0,1,1,1): /* SUB: n - m */
   14030             putFReg(fD, triop(Iop_SubF32, rm, getFReg(fN), getFReg(fM)),
   14031                         condT);
   14032             DIP("fsubs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   14033             goto decode_success_vfp;
   14034          case BITS4(1,0,0,0): /* DIV: n / m */
   14035             putFReg(fD, triop(Iop_DivF32, rm, getFReg(fN), getFReg(fM)),
   14036                         condT);
   14037             DIP("fdivs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   14038             goto decode_success_vfp;
   14039          case BITS4(1,0,1,0): /* VNFMS: -(d - n * m) (fused) */
   14040             /* XXXROUNDINGFIXME look up ARM reference for fused
   14041                multiply-add rounding */
   14042             putFReg(fD, triop(Iop_AddF32, rm,
   14043                               unop(Iop_NegF32, getFReg(fD)),
   14044                               triop(Iop_MulF32, rm,
   14045                                                 getFReg(fN),
   14046                                                 getFReg(fM))),
   14047                         condT);
   14048             DIP("vfnmss%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   14049             goto decode_success_vfp;
   14050          case BITS4(1,0,1,1): /* VNFMA: -(d + n * m) (fused) */
   14051             /* XXXROUNDINGFIXME look up ARM reference for fused
   14052                multiply-add rounding */
   14053             putFReg(fD, triop(Iop_AddF32, rm,
   14054                               unop(Iop_NegF32, getFReg(fD)),
   14055                               triop(Iop_MulF32, rm,
   14056                                                 unop(Iop_NegF32, getFReg(fN)),
   14057                                                 getFReg(fM))),
   14058                         condT);
   14059             DIP("vfnmas%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   14060             goto decode_success_vfp;
   14061          case BITS4(1,1,0,0): /* VFMA: d + n * m (fused) */
   14062             /* XXXROUNDINGFIXME look up ARM reference for fused
   14063                multiply-add rounding */
   14064             putFReg(fD, triop(Iop_AddF32, rm,
   14065                               getFReg(fD),
   14066                               triop(Iop_MulF32, rm, getFReg(fN),
   14067                                                     getFReg(fM))),
   14068                         condT);
   14069             DIP("vfmas%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   14070             goto decode_success_vfp;
   14071          case BITS4(1,1,0,1): /* VFMS: d + (-n * m) (fused) */
   14072             /* XXXROUNDINGFIXME look up ARM reference for fused
   14073                multiply-add rounding */
   14074             putFReg(fD, triop(Iop_AddF32, rm,
   14075                               getFReg(fD),
   14076                               triop(Iop_MulF32, rm,
   14077                                     unop(Iop_NegF32, getFReg(fN)),
   14078                                     getFReg(fM))),
   14079                         condT);
   14080             DIP("vfmss%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   14081             goto decode_success_vfp;
   14082          default:
   14083             break;
   14084       }
   14085    }
   14086 
   14087    /* --------------------- compares (S) --------------------- */
   14088    /*          31   27   23   19   15 11   7    3
   14089                  28   24   20   16 12    8    4    0
   14090       FCMPS    cond 1110 1D11 0100 Fd 1010 01M0 Fm
   14091       FCMPES   cond 1110 1D11 0100 Fd 1010 11M0 Fm
   14092       FCMPZS   cond 1110 1D11 0101 Fd 1010 0100 0000
   14093       FCMPZED  cond 1110 1D11 0101 Fd 1010 1100 0000
   14094                                  Z         N
   14095 
   14096       Z=0 Compare Fd:D vs Fm:M     and set FPSCR 31:28 accordingly
   14097       Z=1 Compare Fd:D vs zero
   14098 
   14099       N=1 generates Invalid Operation exn if either arg is any kind of NaN
   14100       N=0 generates Invalid Operation exn if either arg is a signalling NaN
   14101       (Not that we pay any attention to N here)
   14102    */
   14103    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   14104        && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
   14105        && BITS4(1,0,1,0) == INSN(11,8)
   14106        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   14107       UInt bZ = (insn28 >> 16) & 1;
   14108       UInt bN = (insn28 >> 7) & 1;
   14109       UInt bD = (insn28 >> 22) & 1;
   14110       UInt bM = (insn28 >> 5) & 1;
   14111       UInt fD = (INSN(15,12) << 1) | bD;
   14112       UInt fM = (INSN(3,0) << 1) | bM;
   14113       if (bZ && (INSN(3,0) != 0 || (INSN(7,4) & 3) != 0)) {
   14114          /* does not decode; fall through */
   14115       } else {
   14116          IRTemp argL = newTemp(Ity_F64);
   14117          IRTemp argR = newTemp(Ity_F64);
   14118          IRTemp irRes = newTemp(Ity_I32);
   14119 
   14120          assign(argL, unop(Iop_F32toF64, getFReg(fD)));
   14121          assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0))
   14122                          : unop(Iop_F32toF64, getFReg(fM)));
   14123          assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
   14124 
   14125          IRTemp nzcv     = IRTemp_INVALID;
   14126          IRTemp oldFPSCR = newTemp(Ity_I32);
   14127          IRTemp newFPSCR = newTemp(Ity_I32);
   14128 
   14129          /* This is where the fun starts.  We have to convert 'irRes'
   14130             from an IR-convention return result (IRCmpF64Result) to an
   14131             ARM-encoded (N,Z,C,V) group.  The final result is in the
   14132             bottom 4 bits of 'nzcv'. */
   14133          /* Map compare result from IR to ARM(nzcv) */
   14134          /*
   14135             FP cmp result | IR   | ARM(nzcv)
   14136             --------------------------------
   14137             UN              0x45   0011
   14138             LT              0x01   1000
   14139             GT              0x00   0010
   14140             EQ              0x40   0110
   14141          */
   14142          nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
   14143 
   14144          /* And update FPSCR accordingly */
   14145          assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
   14146          assign(newFPSCR,
   14147                 binop(Iop_Or32,
   14148                       binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
   14149                       binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
   14150 
   14151          putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
   14152 
   14153          if (bZ) {
   14154             DIP("fcmpz%ss%s s%u\n", bN ? "e" : "", nCC(conq), fD);
   14155          } else {
   14156             DIP("fcmp%ss%s s%u, s%u\n", bN ? "e" : "",
   14157                 nCC(conq), fD, fM);
   14158          }
   14159          goto decode_success_vfp;
   14160       }
   14161       /* fall through */
   14162    }
   14163 
   14164    /* --------------------- unary (S) --------------------- */
   14165    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   14166        && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
   14167        && BITS4(1,0,1,0) == INSN(11,8)
   14168        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   14169       UInt bD = (insn28 >> 22) & 1;
   14170       UInt bM = (insn28 >> 5) & 1;
   14171       UInt fD  = (INSN(15,12) << 1) | bD;
   14172       UInt fM  = (INSN(3,0) << 1) | bM;
   14173       UInt b16 = (insn28 >> 16) & 1;
   14174       UInt b7  = (insn28 >> 7) & 1;
   14175       /**/ if (b16 == 0 && b7 == 0) {
   14176          // FCPYS
   14177          putFReg(fD, getFReg(fM), condT);
   14178          DIP("fcpys%s s%u, s%u\n", nCC(conq), fD, fM);
   14179          goto decode_success_vfp;
   14180       }
   14181       else if (b16 == 0 && b7 == 1) {
   14182          // FABSS
   14183          putFReg(fD, unop(Iop_AbsF32, getFReg(fM)), condT);
   14184          DIP("fabss%s s%u, s%u\n", nCC(conq), fD, fM);
   14185          goto decode_success_vfp;
   14186       }
   14187       else if (b16 == 1 && b7 == 0) {
   14188          // FNEGS
   14189          putFReg(fD, unop(Iop_NegF32, getFReg(fM)), condT);
   14190          DIP("fnegs%s s%u, s%u\n", nCC(conq), fD, fM);
   14191          goto decode_success_vfp;
   14192       }
   14193       else if (b16 == 1 && b7 == 1) {
   14194          // FSQRTS
   14195          IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
   14196          putFReg(fD, binop(Iop_SqrtF32, rm, getFReg(fM)), condT);
   14197          DIP("fsqrts%s s%u, s%u\n", nCC(conq), fD, fM);
   14198          goto decode_success_vfp;
   14199       }
   14200       else
   14201          vassert(0);
   14202 
   14203       /* fall through */
   14204    }
   14205 
   14206    /* ----------------- I <-> S conversions ----------------- */
   14207 
   14208    // F{S,U}ITOS fD, fM
   14209    /* These are more complex than FSITOD/FUITOD.  In the D cases, a 32
   14210       bit int will always fit within the 53 bit mantissa, so there's
   14211       no possibility of a loss of precision, but that's obviously not
   14212       the case here.  Hence this case possibly requires rounding, and
   14213       so it drags in the current rounding mode. */
   14214    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   14215        && BITS4(1,0,0,0) == INSN(19,16)
   14216        && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
   14217        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   14218       UInt bM    = (insn28 >> 5) & 1;
   14219       UInt bD    = (insn28 >> 22) & 1;
   14220       UInt fM    = (INSN(3,0) << 1) | bM;
   14221       UInt fD    = (INSN(15,12) << 1) | bD;
   14222       UInt syned = (insn28 >> 7) & 1;
   14223       IRTemp rmode = newTemp(Ity_I32);
   14224       assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
   14225       if (syned) {
   14226          // FSITOS
   14227          putFReg(fD, binop(Iop_F64toF32,
   14228                            mkexpr(rmode),
   14229                            unop(Iop_I32StoF64,
   14230                                 unop(Iop_ReinterpF32asI32, getFReg(fM)))),
   14231                  condT);
   14232          DIP("fsitos%s s%u, s%u\n", nCC(conq), fD, fM);
   14233       } else {
   14234          // FUITOS
   14235          putFReg(fD, binop(Iop_F64toF32,
   14236                            mkexpr(rmode),
   14237                            unop(Iop_I32UtoF64,
   14238                                 unop(Iop_ReinterpF32asI32, getFReg(fM)))),
   14239                  condT);
   14240          DIP("fuitos%s s%u, s%u\n", nCC(conq), fD, fM);
   14241       }
   14242       goto decode_success_vfp;
   14243    }
   14244 
   14245    // FTO{S,U}IS fD, fM
   14246    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   14247        && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
   14248        && BITS4(1,0,1,0) == INSN(11,8)
   14249        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   14250       UInt   bM    = (insn28 >> 5) & 1;
   14251       UInt   bD    = (insn28 >> 22) & 1;
   14252       UInt   fD    = (INSN(15,12) << 1) | bD;
   14253       UInt   fM    = (INSN(3,0) << 1) | bM;
   14254       UInt   bZ    = (insn28 >> 7) & 1;
   14255       UInt   syned = (insn28 >> 16) & 1;
   14256       IRTemp rmode = newTemp(Ity_I32);
   14257       assign(rmode, bZ ? mkU32(Irrm_ZERO)
   14258                        : mkexpr(mk_get_IR_rounding_mode()));
   14259       if (syned) {
   14260          // FTOSIS
   14261          putFReg(fD, unop(Iop_ReinterpI32asF32,
   14262                           binop(Iop_F64toI32S, mkexpr(rmode),
   14263                                 unop(Iop_F32toF64, getFReg(fM)))),
   14264                  condT);
   14265          DIP("ftosi%ss%s s%u, d%u\n", bZ ? "z" : "",
   14266              nCC(conq), fD, fM);
   14267          goto decode_success_vfp;
   14268       } else {
   14269          // FTOUIS
   14270          putFReg(fD, unop(Iop_ReinterpI32asF32,
   14271                           binop(Iop_F64toI32U, mkexpr(rmode),
   14272                                 unop(Iop_F32toF64, getFReg(fM)))),
   14273                  condT);
   14274          DIP("ftoui%ss%s s%u, d%u\n", bZ ? "z" : "",
   14275              nCC(conq), fD, fM);
   14276          goto decode_success_vfp;
   14277       }
   14278    }
   14279 
   14280    /* ----------------- S <-> D conversions ----------------- */
   14281 
   14282    // FCVTDS
   14283    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   14284        && BITS4(0,1,1,1) == INSN(19,16)
   14285        && BITS4(1,0,1,0) == INSN(11,8)
   14286        && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
   14287       UInt dD = INSN(15,12) | (INSN(22,22) << 4);
   14288       UInt bM = (insn28 >> 5) & 1;
   14289       UInt fM = (INSN(3,0) << 1) | bM;
   14290       putDReg(dD, unop(Iop_F32toF64, getFReg(fM)), condT);
   14291       DIP("fcvtds%s d%u, s%u\n", nCC(conq), dD, fM);
   14292       goto decode_success_vfp;
   14293    }
   14294 
   14295    // FCVTSD
   14296    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   14297        && BITS4(0,1,1,1) == INSN(19,16)
   14298        && BITS4(1,0,1,1) == INSN(11,8)
   14299        && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
   14300       UInt   bD    = (insn28 >> 22) & 1;
   14301       UInt   fD    = (INSN(15,12) << 1) | bD;
   14302       UInt   dM    = INSN(3,0) | (INSN(5,5) << 4);
   14303       IRTemp rmode = newTemp(Ity_I32);
   14304       assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
   14305       putFReg(fD, binop(Iop_F64toF32, mkexpr(rmode), getDReg(dM)),
   14306                   condT);
   14307       DIP("fcvtsd%s s%u, d%u\n", nCC(conq), fD, dM);
   14308       goto decode_success_vfp;
   14309    }
   14310 
   14311    /* --------------- VCVT fixed<->floating, VFP --------------- */
   14312    /*          31   27   23   19   15 11   7    3
   14313                  28   24   20   16 12    8    4    0
   14314 
   14315                cond 1110 1D11 1p1U Vd 101f x1i0 imm4
   14316 
   14317       VCVT<c>.<Td>.F64 <Dd>, <Dd>, #fbits
   14318       VCVT<c>.<Td>.F32 <Dd>, <Dd>, #fbits
   14319       VCVT<c>.F64.<Td> <Dd>, <Dd>, #fbits
   14320       VCVT<c>.F32.<Td> <Dd>, <Dd>, #fbits
   14321       are of this form.  We only handle a subset of the cases though.
   14322    */
   14323    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   14324        && BITS4(1,0,1,0) == (INSN(19,16) & BITS4(1,0,1,0))
   14325        && BITS3(1,0,1) == INSN(11,9)
   14326        && BITS3(1,0,0) == (INSN(6,4) & BITS3(1,0,1))) {
   14327       UInt bD        = INSN(22,22);
   14328       UInt bOP       = INSN(18,18);
   14329       UInt bU        = INSN(16,16);
   14330       UInt Vd        = INSN(15,12);
   14331       UInt bSF       = INSN(8,8);
   14332       UInt bSX       = INSN(7,7);
   14333       UInt bI        = INSN(5,5);
   14334       UInt imm4      = INSN(3,0);
   14335       Bool to_fixed  = bOP == 1;
   14336       Bool dp_op     = bSF == 1;
   14337       Bool unsyned   = bU == 1;
   14338       UInt size      = bSX == 0 ? 16 : 32;
   14339       Int  frac_bits = size - ((imm4 << 1) | bI);
   14340       UInt d         = dp_op  ? ((bD << 4) | Vd)  : ((Vd << 1) | bD);
   14341       if (frac_bits >= 1 && frac_bits <= 32 && !to_fixed && !dp_op
   14342                                             && size == 32) {
   14343          /* VCVT.F32.{S,U}32 S[d], S[d], #frac_bits */
   14344          /* This generates really horrible code.  We could potentially
   14345             do much better. */
   14346          IRTemp rmode = newTemp(Ity_I32);
   14347          assign(rmode, mkU32(Irrm_NEAREST)); // per the spec
   14348          IRTemp src32 = newTemp(Ity_I32);
   14349          assign(src32,  unop(Iop_ReinterpF32asI32, getFReg(d)));
   14350          IRExpr* as_F64 = unop( unsyned ? Iop_I32UtoF64 : Iop_I32StoF64,
   14351                                 mkexpr(src32 ) );
   14352          IRTemp scale = newTemp(Ity_F64);
   14353          assign(scale, unop(Iop_I32UtoF64, mkU32( 1 << (frac_bits-1) )));
   14354          IRExpr* rm     = mkU32(Irrm_NEAREST);
   14355          IRExpr* resF64 = triop(Iop_DivF64,
   14356                                 rm, as_F64,
   14357                                 triop(Iop_AddF64, rm, mkexpr(scale),
   14358                                                       mkexpr(scale)));
   14359          IRExpr* resF32 = binop(Iop_F64toF32, mkexpr(rmode), resF64);
   14360          putFReg(d, resF32, condT);
   14361          DIP("vcvt.f32.%c32, s%u, s%u, #%d\n",
   14362              unsyned ? 'u' : 's', d, d, frac_bits);
   14363          goto decode_success_vfp;
   14364       }
   14365       if (frac_bits >= 1 && frac_bits <= 32 && !to_fixed && dp_op
   14366                                             && size == 32) {
   14367          /* VCVT.F64.{S,U}32 D[d], D[d], #frac_bits */
   14368          /* This generates really horrible code.  We could potentially
   14369             do much better. */
   14370          IRTemp src32 = newTemp(Ity_I32);
   14371          assign(src32, unop(Iop_64to32, getDRegI64(d)));
   14372          IRExpr* as_F64 = unop( unsyned ? Iop_I32UtoF64 : Iop_I32StoF64,
   14373                                 mkexpr(src32 ) );
   14374          IRTemp scale = newTemp(Ity_F64);
   14375          assign(scale, unop(Iop_I32UtoF64, mkU32( 1 << (frac_bits-1) )));
   14376          IRExpr* rm     = mkU32(Irrm_NEAREST);
   14377          IRExpr* resF64 = triop(Iop_DivF64,
   14378                                 rm, as_F64,
   14379                                 triop(Iop_AddF64, rm, mkexpr(scale),
   14380                                                       mkexpr(scale)));
   14381          putDReg(d, resF64, condT);
   14382          DIP("vcvt.f64.%c32, d%u, d%u, #%d\n",
   14383              unsyned ? 'u' : 's', d, d, frac_bits);
   14384          goto decode_success_vfp;
   14385       }
   14386       if (frac_bits >= 1 && frac_bits <= 32 && to_fixed && dp_op
   14387                                             && size == 32) {
   14388          /* VCVT.{S,U}32.F64 D[d], D[d], #frac_bits */
   14389          IRTemp srcF64 = newTemp(Ity_F64);
   14390          assign(srcF64, getDReg(d));
   14391          IRTemp scale = newTemp(Ity_F64);
   14392          assign(scale, unop(Iop_I32UtoF64, mkU32( 1 << (frac_bits-1) )));
   14393          IRTemp scaledF64 = newTemp(Ity_F64);
   14394          IRExpr* rm = mkU32(Irrm_NEAREST);
   14395          assign(scaledF64, triop(Iop_MulF64,
   14396                                  rm, mkexpr(srcF64),
   14397                                  triop(Iop_AddF64, rm, mkexpr(scale),
   14398                                                        mkexpr(scale))));
   14399          IRTemp rmode = newTemp(Ity_I32);
   14400          assign(rmode, mkU32(Irrm_ZERO)); // as per the spec
   14401          IRTemp asI32 = newTemp(Ity_I32);
   14402          assign(asI32, binop(unsyned ? Iop_F64toI32U : Iop_F64toI32S,
   14403                              mkexpr(rmode), mkexpr(scaledF64)));
   14404          putDRegI64(d, unop(unsyned ? Iop_32Uto64 : Iop_32Sto64,
   14405                             mkexpr(asI32)), condT);
   14406          goto decode_success_vfp;
   14407       }
   14408       /* fall through */
   14409    }
   14410 
   14411    /* FAILURE */
   14412    return False;
   14413 
   14414   decode_success_vfp:
   14415    /* Check that any accepted insn really is a CP10 or CP11 insn, iow,
   14416       assert that we aren't accepting, in this fn, insns that actually
   14417       should be handled somewhere else. */
   14418    vassert(INSN(11,9) == BITS3(1,0,1)); // 11:8 = 1010 or 1011
   14419    return True;
   14420 
   14421 #  undef INSN
   14422 }
   14423 
   14424 
   14425 /*------------------------------------------------------------*/
   14426 /*--- Instructions in NV (never) space                     ---*/
   14427 /*------------------------------------------------------------*/
   14428 
   14429 /* ARM only */
   14430 /* Translate a NV space instruction.  If successful, returns True and
   14431    *dres may or may not be updated.  If failure, returns False and
   14432    doesn't change *dres nor create any IR.
   14433 
   14434    Note that all NEON instructions (in ARM mode) are handled through
   14435    here, since they are all in NV space.
   14436 */
   14437 static Bool decode_NV_instruction ( /*MOD*/DisResult* dres,
   14438                                     const VexArchInfo* archinfo,
   14439                                     UInt insn )
   14440 {
   14441 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   14442 #  define INSN_COND          SLICE_UInt(insn, 31, 28)
   14443 
   14444    HChar dis_buf[128];
   14445 
   14446    // Should only be called for NV instructions
   14447    vassert(BITS4(1,1,1,1) == INSN_COND);
   14448 
   14449    /* ------------------------ pld{w} ------------------------ */
   14450    if (BITS8(0,1,0,1, 0,0, 0,1) == (INSN(27,20) & BITS8(1,1,1,1, 0,0, 1,1))
   14451        && BITS4(1,1,1,1) == INSN(15,12)) {
   14452       UInt rN    = INSN(19,16);
   14453       UInt imm12 = INSN(11,0);
   14454       UInt bU    = INSN(23,23);
   14455       UInt bR    = INSN(22,22);
   14456       DIP("pld%c [r%u, #%c%u]\n", bR ? ' ' : 'w', rN, bU ? '+' : '-', imm12);
   14457       return True;
   14458    }
   14459 
   14460    if (BITS8(0,1,1,1, 0,0, 0,1) == (INSN(27,20) & BITS8(1,1,1,1, 0,0, 1,1))
   14461        && BITS4(1,1,1,1) == INSN(15,12)
   14462        && 0 == INSN(4,4)) {
   14463       UInt rN   = INSN(19,16);
   14464       UInt rM   = INSN(3,0);
   14465       UInt imm5 = INSN(11,7);
   14466       UInt sh2  = INSN(6,5);
   14467       UInt bU   = INSN(23,23);
   14468       UInt bR   = INSN(22,22);
   14469       if (rM != 15 && (rN != 15 || bR)) {
   14470          IRExpr* eaE = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
   14471                                                        sh2, imm5, dis_buf);
   14472          IRTemp eaT = newTemp(Ity_I32);
   14473          /* Bind eaE to a temp merely for debugging-vex purposes, so we
   14474             can check it's a plausible decoding.  It will get removed
   14475             by iropt a little later on. */
   14476          vassert(eaE);
   14477          assign(eaT, eaE);
   14478          DIP("pld%c %s\n", bR ? ' ' : 'w', dis_buf);
   14479          return True;
   14480       }
   14481       /* fall through */
   14482    }
   14483 
   14484    /* ------------------------ pli ------------------------ */
   14485    if (BITS8(0,1,0,0, 0, 1,0,1) == (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1))
   14486        && BITS4(1,1,1,1) == INSN(15,12)) {
   14487       UInt rN    = INSN(19,16);
   14488       UInt imm12 = INSN(11,0);
   14489       UInt bU    = INSN(23,23);
   14490       DIP("pli [r%u, #%c%u]\n", rN, bU ? '+' : '-', imm12);
   14491       return True;
   14492    }
   14493 
   14494    /* --------------------- Interworking branches --------------------- */
   14495 
   14496    // BLX (1), viz, unconditional branch and link to R15+simm24
   14497    // and set CPSR.T = 1, that is, switch to Thumb mode
   14498    if (INSN(31,25) == BITS7(1,1,1,1,1,0,1)) {
   14499       UInt bitH   = INSN(24,24);
   14500       Int  uimm24 = INSN(23,0);
   14501       Int  simm24 = (((uimm24 << 8) >> 8) << 2) + (bitH << 1);
   14502       /* Now this is a bit tricky.  Since we're decoding an ARM insn,
   14503          it is implies that CPSR.T == 0.  Hence the current insn's
   14504          address is guaranteed to be of the form X--(30)--X00.  So, no
   14505          need to mask any bits off it.  But need to set the lowest bit
   14506          to 1 to denote we're in Thumb mode after this, since
   14507          guest_R15T has CPSR.T as the lowest bit.  And we can't chase
   14508          into the call, so end the block at this point. */
   14509       UInt dst = guest_R15_curr_instr_notENC + 8 + (simm24 | 1);
   14510       putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
   14511                     IRTemp_INVALID/*because AL*/, Ijk_Boring );
   14512       llPutIReg(15, mkU32(dst));
   14513       dres->jk_StopHere = Ijk_Call;
   14514       dres->whatNext    = Dis_StopHere;
   14515       DIP("blx 0x%x (and switch to Thumb mode)\n", dst - 1);
   14516       return True;
   14517    }
   14518 
   14519    /* ------------------- v7 barrier insns ------------------- */
   14520    switch (insn) {
   14521       case 0xF57FF06F: /* ISB */
   14522          stmt( IRStmt_MBE(Imbe_Fence) );
   14523          DIP("ISB\n");
   14524          return True;
   14525       case 0xF57FF04F: /* DSB sy */
   14526       case 0xF57FF04E: /* DSB st */
   14527       case 0xF57FF04B: /* DSB ish */
   14528       case 0xF57FF04A: /* DSB ishst */
   14529       case 0xF57FF047: /* DSB nsh */
   14530       case 0xF57FF046: /* DSB nshst */
   14531       case 0xF57FF043: /* DSB osh */
   14532       case 0xF57FF042: /* DSB oshst */
   14533          stmt( IRStmt_MBE(Imbe_Fence) );
   14534          DIP("DSB\n");
   14535          return True;
   14536       case 0xF57FF05F: /* DMB sy */
   14537       case 0xF57FF05E: /* DMB st */
   14538       case 0xF57FF05B: /* DMB ish */
   14539       case 0xF57FF05A: /* DMB ishst */
   14540       case 0xF57FF057: /* DMB nsh */
   14541       case 0xF57FF056: /* DMB nshst */
   14542       case 0xF57FF053: /* DMB osh */
   14543       case 0xF57FF052: /* DMB oshst */
   14544          stmt( IRStmt_MBE(Imbe_Fence) );
   14545          DIP("DMB\n");
   14546          return True;
   14547       default:
   14548          break;
   14549    }
   14550 
   14551    /* ------------------- CLREX ------------------ */
   14552    if (insn == 0xF57FF01F) {
   14553       /* AFAICS, this simply cancels a (all?) reservations made by a
   14554          (any?) preceding LDREX(es).  Arrange to hand it through to
   14555          the back end. */
   14556       stmt( IRStmt_MBE(Imbe_CancelReservation) );
   14557       DIP("clrex\n");
   14558       return True;
   14559    }
   14560 
   14561    /* ------------------- NEON ------------------- */
   14562    if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
   14563       Bool ok_neon = decode_NEON_instruction(
   14564                         dres, insn, IRTemp_INVALID/*unconditional*/,
   14565                         False/*!isT*/
   14566                      );
   14567       if (ok_neon)
   14568          return True;
   14569    }
   14570 
   14571    // unrecognised
   14572    return False;
   14573 
   14574 #  undef INSN_COND
   14575 #  undef INSN
   14576 }
   14577 
   14578 
   14579 /*------------------------------------------------------------*/
   14580 /*--- Disassemble a single ARM instruction                 ---*/
   14581 /*------------------------------------------------------------*/
   14582 
   14583 /* Disassemble a single ARM instruction into IR.  The instruction is
   14584    located in host memory at guest_instr, and has (decoded) guest IP
   14585    of guest_R15_curr_instr_notENC, which will have been set before the
   14586    call here. */
   14587 
   14588 static
   14589 DisResult disInstr_ARM_WRK (
   14590              Bool         (*resteerOkFn) ( /*opaque*/void*, Addr ),
   14591              Bool         resteerCisOk,
   14592              void*        callback_opaque,
   14593              const UChar* guest_instr,
   14594              const VexArchInfo* archinfo,
   14595              const VexAbiInfo*  abiinfo,
   14596              Bool         sigill_diag
   14597           )
   14598 {
   14599    // A macro to fish bits out of 'insn'.
   14600 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   14601 #  define INSN_COND          SLICE_UInt(insn, 31, 28)
   14602 
   14603    DisResult dres;
   14604    UInt      insn;
   14605    //Bool      allow_VFP = False;
   14606    //UInt      hwcaps = archinfo->hwcaps;
   14607    IRTemp    condT; /* :: Ity_I32 */
   14608    UInt      summary;
   14609    HChar     dis_buf[128];  // big enough to hold LDMIA etc text
   14610 
   14611    /* What insn variants are we supporting today? */
   14612    //allow_VFP  = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
   14613    // etc etc
   14614 
   14615    /* Set result defaults. */
   14616    dres.whatNext    = Dis_Continue;
   14617    dres.len         = 4;
   14618    dres.continueAt  = 0;
   14619    dres.jk_StopHere = Ijk_INVALID;
   14620 
   14621    /* Set default actions for post-insn handling of writes to r15, if
   14622       required. */
   14623    r15written = False;
   14624    r15guard   = IRTemp_INVALID; /* unconditional */
   14625    r15kind    = Ijk_Boring;
   14626 
   14627    /* At least this is simple on ARM: insns are all 4 bytes long, and
   14628       4-aligned.  So just fish the whole thing out of memory right now
   14629       and have done. */
   14630    insn = getUIntLittleEndianly( guest_instr );
   14631 
   14632    if (0) vex_printf("insn: 0x%x\n", insn);
   14633 
   14634    DIP("\t(arm) 0x%x:  ", (UInt)guest_R15_curr_instr_notENC);
   14635 
   14636    vassert(0 == (guest_R15_curr_instr_notENC & 3));
   14637 
   14638    /* ----------------------------------------------------------- */
   14639 
   14640    /* Spot "Special" instructions (see comment at top of file). */
   14641    {
   14642       const UChar* code = guest_instr;
   14643       /* Spot the 16-byte preamble:
   14644 
   14645          e1a0c1ec  mov r12, r12, ROR #3
   14646          e1a0c6ec  mov r12, r12, ROR #13
   14647          e1a0ceec  mov r12, r12, ROR #29
   14648          e1a0c9ec  mov r12, r12, ROR #19
   14649       */
   14650       UInt word1 = 0xE1A0C1EC;
   14651       UInt word2 = 0xE1A0C6EC;
   14652       UInt word3 = 0xE1A0CEEC;
   14653       UInt word4 = 0xE1A0C9EC;
   14654       if (getUIntLittleEndianly(code+ 0) == word1 &&
   14655           getUIntLittleEndianly(code+ 4) == word2 &&
   14656           getUIntLittleEndianly(code+ 8) == word3 &&
   14657           getUIntLittleEndianly(code+12) == word4) {
   14658          /* Got a "Special" instruction preamble.  Which one is it? */
   14659          if (getUIntLittleEndianly(code+16) == 0xE18AA00A
   14660                                                /* orr r10,r10,r10 */) {
   14661             /* R3 = client_request ( R4 ) */
   14662             DIP("r3 = client_request ( %%r4 )\n");
   14663             llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 ));
   14664             dres.jk_StopHere = Ijk_ClientReq;
   14665             dres.whatNext    = Dis_StopHere;
   14666             goto decode_success;
   14667          }
   14668          else
   14669          if (getUIntLittleEndianly(code+16) == 0xE18BB00B
   14670                                                /* orr r11,r11,r11 */) {
   14671             /* R3 = guest_NRADDR */
   14672             DIP("r3 = guest_NRADDR\n");
   14673             dres.len = 20;
   14674             llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
   14675             goto decode_success;
   14676          }
   14677          else
   14678          if (getUIntLittleEndianly(code+16) == 0xE18CC00C
   14679                                                /* orr r12,r12,r12 */) {
   14680             /*  branch-and-link-to-noredir R4 */
   14681             DIP("branch-and-link-to-noredir r4\n");
   14682             llPutIReg(14, mkU32( guest_R15_curr_instr_notENC + 20) );
   14683             llPutIReg(15, llGetIReg(4));
   14684             dres.jk_StopHere = Ijk_NoRedir;
   14685             dres.whatNext    = Dis_StopHere;
   14686             goto decode_success;
   14687          }
   14688          else
   14689          if (getUIntLittleEndianly(code+16) == 0xE1899009
   14690                                                /* orr r9,r9,r9 */) {
   14691             /* IR injection */
   14692             DIP("IR injection\n");
   14693             vex_inject_ir(irsb, Iend_LE);
   14694             // Invalidate the current insn. The reason is that the IRop we're
   14695             // injecting here can change. In which case the translation has to
   14696             // be redone. For ease of handling, we simply invalidate all the
   14697             // time.
   14698             stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_R15_curr_instr_notENC)));
   14699             stmt(IRStmt_Put(OFFB_CMLEN,   mkU32(20)));
   14700             llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 ));
   14701             dres.whatNext    = Dis_StopHere;
   14702             dres.jk_StopHere = Ijk_InvalICache;
   14703             goto decode_success;
   14704          }
   14705          /* We don't know what it is.  Set opc1/opc2 so decode_failure
   14706             can print the insn following the Special-insn preamble. */
   14707          insn = getUIntLittleEndianly(code+16);
   14708          goto decode_failure;
   14709          /*NOTREACHED*/
   14710       }
   14711 
   14712    }
   14713 
   14714    /* ----------------------------------------------------------- */
   14715 
   14716    /* Main ARM instruction decoder starts here. */
   14717 
   14718    /* Deal with the condition.  Strategy is to merely generate a
   14719       condition temporary at this point (or IRTemp_INVALID, meaning
   14720       unconditional).  We leave it to lower-level instruction decoders
   14721       to decide whether they can generate straight-line code, or
   14722       whether they must generate a side exit before the instruction.
   14723       condT :: Ity_I32 and is always either zero or one. */
   14724    condT = IRTemp_INVALID;
   14725    switch ( (ARMCondcode)INSN_COND ) {
   14726       case ARMCondNV: {
   14727          // Illegal instruction prior to v5 (see ARM ARM A3-5), but
   14728          // some cases are acceptable
   14729          Bool ok = decode_NV_instruction(&dres, archinfo, insn);
   14730          if (ok)
   14731             goto decode_success;
   14732          else
   14733             goto decode_failure;
   14734       }
   14735       case ARMCondAL: // Always executed
   14736          break;
   14737       case ARMCondEQ: case ARMCondNE: case ARMCondHS: case ARMCondLO:
   14738       case ARMCondMI: case ARMCondPL: case ARMCondVS: case ARMCondVC:
   14739       case ARMCondHI: case ARMCondLS: case ARMCondGE: case ARMCondLT:
   14740       case ARMCondGT: case ARMCondLE:
   14741          condT = newTemp(Ity_I32);
   14742          assign( condT, mk_armg_calculate_condition( INSN_COND ));
   14743          break;
   14744    }
   14745 
   14746    /* ----------------------------------------------------------- */
   14747    /* -- ARMv5 integer instructions                            -- */
   14748    /* ----------------------------------------------------------- */
   14749 
   14750    /* ---------------- Data processing ops ------------------- */
   14751 
   14752    if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0))
   14753        && !(INSN(25,25) == 0 && INSN(7,7) == 1 && INSN(4,4) == 1)) {
   14754       IRTemp  shop = IRTemp_INVALID; /* shifter operand */
   14755       IRTemp  shco = IRTemp_INVALID; /* shifter carry out */
   14756       UInt    rD   = (insn >> 12) & 0xF; /* 15:12 */
   14757       UInt    rN   = (insn >> 16) & 0xF; /* 19:16 */
   14758       UInt    bitS = (insn >> 20) & 1; /* 20:20 */
   14759       IRTemp  rNt  = IRTemp_INVALID;
   14760       IRTemp  res  = IRTemp_INVALID;
   14761       IRTemp  oldV = IRTemp_INVALID;
   14762       IRTemp  oldC = IRTemp_INVALID;
   14763       const HChar*  name = NULL;
   14764       IROp    op   = Iop_INVALID;
   14765       Bool    ok;
   14766 
   14767       switch (INSN(24,21)) {
   14768 
   14769          /* --------- ADD, SUB, AND, OR --------- */
   14770          case BITS4(0,1,0,0): /* ADD:  Rd = Rn + shifter_operand */
   14771             name = "add"; op = Iop_Add32; goto rd_eq_rn_op_SO;
   14772          case BITS4(0,0,1,0): /* SUB:  Rd = Rn - shifter_operand */
   14773             name = "sub"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
   14774          case BITS4(0,0,1,1): /* RSB:  Rd = shifter_operand - Rn */
   14775             name = "rsb"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
   14776          case BITS4(0,0,0,0): /* AND:  Rd = Rn & shifter_operand */
   14777             name = "and"; op = Iop_And32; goto rd_eq_rn_op_SO;
   14778          case BITS4(1,1,0,0): /* OR:   Rd = Rn | shifter_operand */
   14779             name = "orr"; op = Iop_Or32; goto rd_eq_rn_op_SO;
   14780          case BITS4(0,0,0,1): /* EOR:  Rd = Rn ^ shifter_operand */
   14781             name = "eor"; op = Iop_Xor32; goto rd_eq_rn_op_SO;
   14782          case BITS4(1,1,1,0): /* BIC:  Rd = Rn & ~shifter_operand */
   14783             name = "bic"; op = Iop_And32; goto rd_eq_rn_op_SO;
   14784          rd_eq_rn_op_SO: {
   14785             Bool isRSB = False;
   14786             Bool isBIC = False;
   14787             switch (INSN(24,21)) {
   14788                case BITS4(0,0,1,1):
   14789                   vassert(op == Iop_Sub32); isRSB = True; break;
   14790                case BITS4(1,1,1,0):
   14791                   vassert(op == Iop_And32); isBIC = True; break;
   14792                default:
   14793                   break;
   14794             }
   14795             rNt = newTemp(Ity_I32);
   14796             assign(rNt, getIRegA(rN));
   14797             ok = mk_shifter_operand(
   14798                     INSN(25,25), INSN(11,0),
   14799                     &shop, bitS ? &shco : NULL, dis_buf
   14800                  );
   14801             if (!ok)
   14802                break;
   14803             res = newTemp(Ity_I32);
   14804             // compute the main result
   14805             if (isRSB) {
   14806                // reverse-subtract: shifter_operand - Rn
   14807                vassert(op == Iop_Sub32);
   14808                assign(res, binop(op, mkexpr(shop), mkexpr(rNt)) );
   14809             } else if (isBIC) {
   14810                // andn: shifter_operand & ~Rn
   14811                vassert(op == Iop_And32);
   14812                assign(res, binop(op, mkexpr(rNt),
   14813                                      unop(Iop_Not32, mkexpr(shop))) );
   14814             } else {
   14815                // normal: Rn op shifter_operand
   14816                assign(res, binop(op, mkexpr(rNt), mkexpr(shop)) );
   14817             }
   14818             // but don't commit it until after we've finished
   14819             // all necessary reads from the guest state
   14820             if (bitS
   14821                 && (op == Iop_And32 || op == Iop_Or32 || op == Iop_Xor32)) {
   14822                oldV = newTemp(Ity_I32);
   14823                assign( oldV, mk_armg_calculate_flag_v() );
   14824             }
   14825             // can't safely read guest state after here
   14826             // now safe to put the main result
   14827             putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
   14828             // XXXX!! not safe to read any guest state after
   14829             // this point (I think the code below doesn't do that).
   14830             if (!bitS)
   14831                vassert(shco == IRTemp_INVALID);
   14832             /* Update the flags thunk if necessary */
   14833             if (bitS) {
   14834                vassert(shco != IRTemp_INVALID);
   14835                switch (op) {
   14836                   case Iop_Add32:
   14837                      setFlags_D1_D2( ARMG_CC_OP_ADD, rNt, shop, condT );
   14838                      break;
   14839                   case Iop_Sub32:
   14840                      if (isRSB) {
   14841                         setFlags_D1_D2( ARMG_CC_OP_SUB, shop, rNt, condT );
   14842                      } else {
   14843                         setFlags_D1_D2( ARMG_CC_OP_SUB, rNt, shop, condT );
   14844                      }
   14845                      break;
   14846                   case Iop_And32: /* BIC and AND set the flags the same */
   14847                   case Iop_Or32:
   14848                   case Iop_Xor32:
   14849                      // oldV has been read just above
   14850                      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
   14851                                         res, shco, oldV, condT );
   14852                      break;
   14853                   default:
   14854                      vassert(0);
   14855                }
   14856             }
   14857             DIP("%s%s%s r%u, r%u, %s\n",
   14858                 name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
   14859             goto decode_success;
   14860          }
   14861 
   14862          /* --------- MOV, MVN --------- */
   14863          case BITS4(1,1,0,1):   /* MOV: Rd = shifter_operand */
   14864          case BITS4(1,1,1,1): { /* MVN: Rd = not(shifter_operand) */
   14865             Bool isMVN = INSN(24,21) == BITS4(1,1,1,1);
   14866             IRTemp jk = Ijk_Boring;
   14867             if (rN != 0)
   14868                break; /* rN must be zero */
   14869             ok = mk_shifter_operand(
   14870                     INSN(25,25), INSN(11,0),
   14871                     &shop, bitS ? &shco : NULL, dis_buf
   14872                  );
   14873             if (!ok)
   14874                break;
   14875             res = newTemp(Ity_I32);
   14876             assign( res, isMVN ? unop(Iop_Not32, mkexpr(shop))
   14877                                : mkexpr(shop) );
   14878             if (bitS) {
   14879                vassert(shco != IRTemp_INVALID);
   14880                oldV = newTemp(Ity_I32);
   14881                assign( oldV, mk_armg_calculate_flag_v() );
   14882             } else {
   14883                vassert(shco == IRTemp_INVALID);
   14884             }
   14885             /* According to the Cortex A8 TRM Sec. 5.2.1, MOV PC, r14 is a
   14886                 return for purposes of branch prediction. */
   14887             if (!isMVN && INSN(11,0) == 14) {
   14888               jk = Ijk_Ret;
   14889             }
   14890             // can't safely read guest state after here
   14891             putIRegA( rD, mkexpr(res), condT, jk );
   14892             /* Update the flags thunk if necessary */
   14893             if (bitS) {
   14894                setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
   14895                                   res, shco, oldV, condT );
   14896             }
   14897             DIP("%s%s%s r%u, %s\n",
   14898                 isMVN ? "mvn" : "mov",
   14899                 nCC(INSN_COND), bitS ? "s" : "", rD, dis_buf );
   14900             goto decode_success;
   14901          }
   14902 
   14903          /* --------- CMP --------- */
   14904          case BITS4(1,0,1,0):   /* CMP:  (void) Rn - shifter_operand */
   14905          case BITS4(1,0,1,1): { /* CMN:  (void) Rn + shifter_operand */
   14906             Bool isCMN = INSN(24,21) == BITS4(1,0,1,1);
   14907             if (rD != 0)
   14908                break; /* rD must be zero */
   14909             if (bitS == 0)
   14910                break; /* if S (bit 20) is not set, it's not CMP/CMN */
   14911             rNt = newTemp(Ity_I32);
   14912             assign(rNt, getIRegA(rN));
   14913             ok = mk_shifter_operand(
   14914                     INSN(25,25), INSN(11,0),
   14915                     &shop, NULL, dis_buf
   14916                  );
   14917             if (!ok)
   14918                break;
   14919             // can't safely read guest state after here
   14920             /* Update the flags thunk. */
   14921             setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
   14922                             rNt, shop, condT );
   14923             DIP("%s%s r%u, %s\n",
   14924                 isCMN ? "cmn" : "cmp",
   14925                 nCC(INSN_COND), rN, dis_buf );
   14926             goto decode_success;
   14927          }
   14928 
   14929          /* --------- TST --------- */
   14930          case BITS4(1,0,0,0):   /* TST:  (void) Rn & shifter_operand */
   14931          case BITS4(1,0,0,1): { /* TEQ:  (void) Rn ^ shifter_operand */
   14932             Bool isTEQ = INSN(24,21) == BITS4(1,0,0,1);
   14933             if (rD != 0)
   14934                break; /* rD must be zero */
   14935             if (bitS == 0)
   14936                break; /* if S (bit 20) is not set, it's not TST/TEQ */
   14937             rNt = newTemp(Ity_I32);
   14938             assign(rNt, getIRegA(rN));
   14939             ok = mk_shifter_operand(
   14940                     INSN(25,25), INSN(11,0),
   14941                     &shop, &shco, dis_buf
   14942                  );
   14943             if (!ok)
   14944                break;
   14945             /* Update the flags thunk. */
   14946             res = newTemp(Ity_I32);
   14947             assign( res, binop(isTEQ ? Iop_Xor32 : Iop_And32,
   14948                                mkexpr(rNt), mkexpr(shop)) );
   14949             oldV = newTemp(Ity_I32);
   14950             assign( oldV, mk_armg_calculate_flag_v() );
   14951             // can't safely read guest state after here
   14952             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
   14953                                res, shco, oldV, condT );
   14954             DIP("%s%s r%u, %s\n",
   14955                 isTEQ ? "teq" : "tst",
   14956                 nCC(INSN_COND), rN, dis_buf );
   14957             goto decode_success;
   14958          }
   14959 
   14960          /* --------- ADC, SBC, RSC --------- */
   14961          case BITS4(0,1,0,1): /* ADC:  Rd = Rn + shifter_operand + oldC */
   14962             name = "adc"; goto rd_eq_rn_op_SO_op_oldC;
   14963          case BITS4(0,1,1,0): /* SBC:  Rd = Rn - shifter_operand - (oldC ^ 1) */
   14964             name = "sbc"; goto rd_eq_rn_op_SO_op_oldC;
   14965          case BITS4(0,1,1,1): /* RSC:  Rd = shifter_operand - Rn - (oldC ^ 1) */
   14966             name = "rsc"; goto rd_eq_rn_op_SO_op_oldC;
   14967          rd_eq_rn_op_SO_op_oldC: {
   14968             // FIXME: shco isn't used for anything.  Get rid of it.
   14969             rNt = newTemp(Ity_I32);
   14970             assign(rNt, getIRegA(rN));
   14971             ok = mk_shifter_operand(
   14972                     INSN(25,25), INSN(11,0),
   14973                     &shop, bitS ? &shco : NULL, dis_buf
   14974                  );
   14975             if (!ok)
   14976                break;
   14977             oldC = newTemp(Ity_I32);
   14978             assign( oldC, mk_armg_calculate_flag_c() );
   14979             res = newTemp(Ity_I32);
   14980             // compute the main result
   14981             switch (INSN(24,21)) {
   14982                case BITS4(0,1,0,1): /* ADC */
   14983                   assign(res,
   14984                          binop(Iop_Add32,
   14985                                binop(Iop_Add32, mkexpr(rNt), mkexpr(shop)),
   14986                                mkexpr(oldC) ));
   14987                   break;
   14988                case BITS4(0,1,1,0): /* SBC */
   14989                   assign(res,
   14990                          binop(Iop_Sub32,
   14991                                binop(Iop_Sub32, mkexpr(rNt), mkexpr(shop)),
   14992                                binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
   14993                   break;
   14994                case BITS4(0,1,1,1): /* RSC */
   14995                   assign(res,
   14996                          binop(Iop_Sub32,
   14997                                binop(Iop_Sub32, mkexpr(shop), mkexpr(rNt)),
   14998                                binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
   14999                   break;
   15000                default:
   15001                   vassert(0);
   15002             }
   15003             // but don't commit it until after we've finished
   15004             // all necessary reads from the guest state
   15005             // now safe to put the main result
   15006             putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
   15007             // XXXX!! not safe to read any guest state after
   15008             // this point (I think the code below doesn't do that).
   15009             if (!bitS)
   15010                vassert(shco == IRTemp_INVALID);
   15011             /* Update the flags thunk if necessary */
   15012             if (bitS) {
   15013                vassert(shco != IRTemp_INVALID);
   15014                switch (INSN(24,21)) {
   15015                   case BITS4(0,1,0,1): /* ADC */
   15016                      setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
   15017                                         rNt, shop, oldC, condT );
   15018                      break;
   15019                   case BITS4(0,1,1,0): /* SBC */
   15020                      setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
   15021                                         rNt, shop, oldC, condT );
   15022                      break;
   15023                   case BITS4(0,1,1,1): /* RSC */
   15024                      setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
   15025                                         shop, rNt, oldC, condT );
   15026                      break;
   15027                   default:
   15028                      vassert(0);
   15029                }
   15030             }
   15031             DIP("%s%s%s r%u, r%u, %s\n",
   15032                 name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
   15033             goto decode_success;
   15034          }
   15035 
   15036          default:
   15037             vassert(0);
   15038       }
   15039    } /* if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0)) */
   15040 
   15041    /* --------------------- Load/store (ubyte & word) -------- */
   15042    // LDR STR LDRB STRB
   15043    /*                 31   27   23   19 15 11    6   4 3  # highest bit
   15044                         28   24   20 16 12
   15045       A5-20   1 | 16  cond 0101 UB0L Rn Rd imm12
   15046       A5-22   1 | 32  cond 0111 UBOL Rn Rd imm5  sh2 0 Rm
   15047       A5-24   2 | 16  cond 0101 UB1L Rn Rd imm12
   15048       A5-26   2 | 32  cond 0111 UB1L Rn Rd imm5  sh2 0 Rm
   15049       A5-28   3 | 16  cond 0100 UB0L Rn Rd imm12
   15050       A5-32   3 | 32  cond 0110 UB0L Rn Rd imm5  sh2 0 Rm
   15051    */
   15052    /* case coding:
   15053              1   at-ea               (access at ea)
   15054              2   at-ea-then-upd      (access at ea, then Rn = ea)
   15055              3   at-Rn-then-upd      (access at Rn, then Rn = ea)
   15056       ea coding
   15057              16  Rn +/- imm12
   15058              32  Rn +/- Rm sh2 imm5
   15059    */
   15060    /* Quickly skip over all of this for hopefully most instructions */
   15061    if ((INSN(27,24) & BITS4(1,1,0,0)) != BITS4(0,1,0,0))
   15062       goto after_load_store_ubyte_or_word;
   15063 
   15064    summary = 0;
   15065 
   15066    /**/ if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 0) {
   15067       summary = 1 | 16;
   15068    }
   15069    else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 0
   15070                                           && INSN(4,4) == 0) {
   15071       summary = 1 | 32;
   15072    }
   15073    else if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 1) {
   15074       summary = 2 | 16;
   15075    }
   15076    else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 1
   15077                                           && INSN(4,4) == 0) {
   15078       summary = 2 | 32;
   15079    }
   15080    else if (INSN(27,24) == BITS4(0,1,0,0) && INSN(21,21) == 0) {
   15081       summary = 3 | 16;
   15082    }
   15083    else if (INSN(27,24) == BITS4(0,1,1,0) && INSN(21,21) == 0
   15084                                           && INSN(4,4) == 0) {
   15085       summary = 3 | 32;
   15086    }
   15087    else goto after_load_store_ubyte_or_word;
   15088 
   15089    { UInt rN = (insn >> 16) & 0xF; /* 19:16 */
   15090      UInt rD = (insn >> 12) & 0xF; /* 15:12 */
   15091      UInt rM = (insn >> 0)  & 0xF; /*  3:0  */
   15092      UInt bU = (insn >> 23) & 1;      /* 23 */
   15093      UInt bB = (insn >> 22) & 1;      /* 22 */
   15094      UInt bL = (insn >> 20) & 1;      /* 20 */
   15095      UInt imm12 = (insn >> 0) & 0xFFF; /* 11:0 */
   15096      UInt imm5  = (insn >> 7) & 0x1F;  /* 11:7 */
   15097      UInt sh2   = (insn >> 5) & 3;     /* 6:5 */
   15098 
   15099      /* Skip some invalid cases, which would lead to two competing
   15100         updates to the same register, or which are otherwise
   15101         disallowed by the spec. */
   15102      switch (summary) {
   15103         case 1 | 16:
   15104            break;
   15105         case 1 | 32:
   15106            if (rM == 15) goto after_load_store_ubyte_or_word;
   15107            break;
   15108         case 2 | 16: case 3 | 16:
   15109            if (rN == 15) goto after_load_store_ubyte_or_word;
   15110            if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
   15111            break;
   15112         case 2 | 32: case 3 | 32:
   15113            if (rM == 15) goto after_load_store_ubyte_or_word;
   15114            if (rN == 15) goto after_load_store_ubyte_or_word;
   15115            if (rN == rM) goto after_load_store_ubyte_or_word;
   15116            if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
   15117            break;
   15118         default:
   15119            vassert(0);
   15120      }
   15121 
   15122      /* compute the effective address.  Bind it to a tmp since we
   15123         may need to use it twice. */
   15124      IRExpr* eaE = NULL;
   15125      switch (summary & 0xF0) {
   15126         case 16:
   15127            eaE = mk_EA_reg_plusminus_imm12( rN, bU, imm12, dis_buf );
   15128            break;
   15129         case 32:
   15130            eaE = mk_EA_reg_plusminus_shifted_reg( rN, bU, rM, sh2, imm5,
   15131                                                   dis_buf );
   15132            break;
   15133      }
   15134      vassert(eaE);
   15135      IRTemp eaT = newTemp(Ity_I32);
   15136      assign(eaT, eaE);
   15137 
   15138      /* get the old Rn value */
   15139      IRTemp rnT = newTemp(Ity_I32);
   15140      assign(rnT, getIRegA(rN));
   15141 
   15142      /* decide on the transfer address */
   15143      IRTemp taT = IRTemp_INVALID;
   15144      switch (summary & 0x0F) {
   15145         case 1: case 2: taT = eaT; break;
   15146         case 3:         taT = rnT; break;
   15147      }
   15148      vassert(taT != IRTemp_INVALID);
   15149 
   15150      if (bL == 0) {
   15151        /* Store.  If necessary, update the base register before the
   15152           store itself, so that the common idiom of "str rX, [sp,
   15153           #-4]!" (store rX at sp-4, then do new sp = sp-4, a.k.a "push
   15154           rX") doesn't cause Memcheck to complain that the access is
   15155           below the stack pointer.  Also, not updating sp before the
   15156           store confuses Valgrind's dynamic stack-extending logic.  So
   15157           do it before the store.  Hence we need to snarf the store
   15158           data before doing the basereg update. */
   15159 
   15160         /* get hold of the data to be stored */
   15161         IRTemp rDt = newTemp(Ity_I32);
   15162         assign(rDt, getIRegA(rD));
   15163 
   15164         /* Update Rn if necessary. */
   15165         switch (summary & 0x0F) {
   15166            case 2: case 3:
   15167               putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
   15168               break;
   15169         }
   15170 
   15171         /* generate the transfer */
   15172         if (bB == 0) { // word store
   15173            storeGuardedLE( mkexpr(taT), mkexpr(rDt), condT );
   15174         } else { // byte store
   15175            vassert(bB == 1);
   15176            storeGuardedLE( mkexpr(taT), unop(Iop_32to8, mkexpr(rDt)), condT );
   15177         }
   15178 
   15179      } else {
   15180         /* Load */
   15181         vassert(bL == 1);
   15182 
   15183         /* generate the transfer */
   15184         if (bB == 0) { // word load
   15185            IRTemp jk = Ijk_Boring;
   15186            /* According to the Cortex A8 TRM Sec. 5.2.1, LDR(1) with r13 as the
   15187                base register and PC as the destination register is a return for
   15188                purposes of branch prediction.
   15189               The ARM ARM Sec. C9.10.1 further specifies that it must use a
   15190                post-increment by immediate addressing mode to be counted in
   15191                event 0x0E (Procedure return).*/
   15192            if (rN == 13 && summary == (3 | 16) && bB == 0) {
   15193               jk = Ijk_Ret;
   15194            }
   15195            IRTemp tD = newTemp(Ity_I32);
   15196            loadGuardedLE( tD, ILGop_Ident32,
   15197                           mkexpr(taT), llGetIReg(rD), condT );
   15198            /* "rD == 15 ? condT : IRTemp_INVALID": simply
   15199               IRTemp_INVALID would be correct in all cases here, and
   15200               for the non-r15 case it generates better code, by
   15201               avoiding two tests of the cond (since it is already
   15202               tested by loadGuardedLE).  However, the logic at the end
   15203               of this function, that deals with writes to r15, has an
   15204               optimisation which depends on seeing whether or not the
   15205               write is conditional.  Hence in this particular case we
   15206               let it "see" the guard condition. */
   15207            putIRegA( rD, mkexpr(tD),
   15208                      rD == 15 ? condT : IRTemp_INVALID, jk );
   15209         } else { // byte load
   15210            vassert(bB == 1);
   15211            IRTemp tD = newTemp(Ity_I32);
   15212            loadGuardedLE( tD, ILGop_8Uto32, mkexpr(taT), llGetIReg(rD), condT );
   15213            /* No point in similar 3rd arg complexity here, since we
   15214               can't sanely write anything to r15 like this. */
   15215            putIRegA( rD, mkexpr(tD), IRTemp_INVALID, Ijk_Boring );
   15216         }
   15217 
   15218         /* Update Rn if necessary. */
   15219         switch (summary & 0x0F) {
   15220            case 2: case 3:
   15221               // should be assured by logic above:
   15222               if (bL == 1)
   15223                  vassert(rD != rN); /* since we just wrote rD */
   15224               putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
   15225               break;
   15226         }
   15227      }
   15228 
   15229      switch (summary & 0x0F) {
   15230         case 1:  DIP("%sr%s%s r%u, %s\n",
   15231                      bL == 0 ? "st" : "ld",
   15232                      bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
   15233                  break;
   15234         case 2:  DIP("%sr%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
   15235                      bL == 0 ? "st" : "ld",
   15236                      bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
   15237                  break;
   15238         case 3:  DIP("%sr%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
   15239                      bL == 0 ? "st" : "ld",
   15240                      bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
   15241                  break;
   15242         default: vassert(0);
   15243      }
   15244 
   15245      /* XXX deal with alignment constraints */
   15246 
   15247      goto decode_success;
   15248 
   15249      /* Complications:
   15250 
   15251         For all loads: if the Amode specifies base register
   15252         writeback, and the same register is specified for Rd and Rn,
   15253         the results are UNPREDICTABLE.
   15254 
   15255         For all loads and stores: if R15 is written, branch to
   15256         that address afterwards.
   15257 
   15258         STRB: straightforward
   15259         LDRB: loaded data is zero extended
   15260         STR:  lowest 2 bits of address are ignored
   15261         LDR:  if the lowest 2 bits of the address are nonzero
   15262               then the loaded value is rotated right by 8 * the lowest 2 bits
   15263      */
   15264    }
   15265 
   15266   after_load_store_ubyte_or_word:
   15267 
   15268    /* --------------------- Load/store (sbyte & hword) -------- */
   15269    // LDRH LDRSH STRH LDRSB
   15270    /*                 31   27   23   19 15 11   7    3     # highest bit
   15271                         28   24   20 16 12    8    4    0
   15272       A5-36   1 | 16  cond 0001 U10L Rn Rd im4h 1SH1 im4l
   15273       A5-38   1 | 32  cond 0001 U00L Rn Rd 0000 1SH1 Rm
   15274       A5-40   2 | 16  cond 0001 U11L Rn Rd im4h 1SH1 im4l
   15275       A5-42   2 | 32  cond 0001 U01L Rn Rd 0000 1SH1 Rm
   15276       A5-44   3 | 16  cond 0000 U10L Rn Rd im4h 1SH1 im4l
   15277       A5-46   3 | 32  cond 0000 U00L Rn Rd 0000 1SH1 Rm
   15278    */
   15279    /* case coding:
   15280              1   at-ea               (access at ea)
   15281              2   at-ea-then-upd      (access at ea, then Rn = ea)
   15282              3   at-Rn-then-upd      (access at Rn, then Rn = ea)
   15283       ea coding
   15284              16  Rn +/- imm8
   15285              32  Rn +/- Rm
   15286    */
   15287    /* Quickly skip over all of this for hopefully most instructions */
   15288    if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
   15289       goto after_load_store_sbyte_or_hword;
   15290 
   15291    /* Check the "1SH1" thing. */
   15292    if ((INSN(7,4) & BITS4(1,0,0,1)) != BITS4(1,0,0,1))
   15293       goto after_load_store_sbyte_or_hword;
   15294 
   15295    summary = 0;
   15296 
   15297    /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,0)) {
   15298       summary = 1 | 16;
   15299    }
   15300    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,0)) {
   15301       summary = 1 | 32;
   15302    }
   15303    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,1)) {
   15304       summary = 2 | 16;
   15305    }
   15306    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,1)) {
   15307       summary = 2 | 32;
   15308    }
   15309    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(1,0)) {
   15310       summary = 3 | 16;
   15311    }
   15312    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(0,0)) {
   15313       summary = 3 | 32;
   15314    }
   15315    else goto after_load_store_sbyte_or_hword;
   15316 
   15317    { UInt rN   = (insn >> 16) & 0xF; /* 19:16 */
   15318      UInt rD   = (insn >> 12) & 0xF; /* 15:12 */
   15319      UInt rM   = (insn >> 0)  & 0xF; /*  3:0  */
   15320      UInt bU   = (insn >> 23) & 1;   /* 23 U=1 offset+, U=0 offset- */
   15321      UInt bL   = (insn >> 20) & 1;   /* 20 L=1 load, L=0 store */
   15322      UInt bH   = (insn >> 5) & 1;    /* H=1 halfword, H=0 byte */
   15323      UInt bS   = (insn >> 6) & 1;    /* S=1 signed, S=0 unsigned */
   15324      UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
   15325 
   15326      /* Skip combinations that are either meaningless or already
   15327         handled by main word-or-unsigned-byte load-store
   15328         instructions. */
   15329      if (bS == 0 && bH == 0) /* "unsigned byte" */
   15330         goto after_load_store_sbyte_or_hword;
   15331      if (bS == 1 && bL == 0) /* "signed store" */
   15332         goto after_load_store_sbyte_or_hword;
   15333 
   15334      /* Require 11:8 == 0 for Rn +/- Rm cases */
   15335      if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
   15336         goto after_load_store_sbyte_or_hword;
   15337 
   15338      /* Skip some invalid cases, which would lead to two competing
   15339         updates to the same register, or which are otherwise
   15340         disallowed by the spec. */
   15341      switch (summary) {
   15342         case 1 | 16:
   15343            break;
   15344         case 1 | 32:
   15345            if (rM == 15) goto after_load_store_sbyte_or_hword;
   15346            break;
   15347         case 2 | 16: case 3 | 16:
   15348            if (rN == 15) goto after_load_store_sbyte_or_hword;
   15349            if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
   15350            break;
   15351         case 2 | 32: case 3 | 32:
   15352            if (rM == 15) goto after_load_store_sbyte_or_hword;
   15353            if (rN == 15) goto after_load_store_sbyte_or_hword;
   15354            if (rN == rM) goto after_load_store_sbyte_or_hword;
   15355            if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
   15356            break;
   15357         default:
   15358            vassert(0);
   15359      }
   15360 
   15361      /* If this is a branch, make it unconditional at this point.
   15362         Doing conditional branches in-line is too complex (for now).
   15363         Note that you'd have to be insane to use any of these loads to
   15364         do a branch, since they only load 16 bits at most, but we
   15365         handle it just in case. */
   15366      if (bL == 1 && rD == 15 && condT != IRTemp_INVALID) {
   15367         // go uncond
   15368         mk_skip_over_A32_if_cond_is_false( condT );
   15369         condT = IRTemp_INVALID;
   15370         // now uncond
   15371      }
   15372 
   15373      /* compute the effective address.  Bind it to a tmp since we
   15374         may need to use it twice. */
   15375      IRExpr* eaE = NULL;
   15376      switch (summary & 0xF0) {
   15377         case 16:
   15378            eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
   15379            break;
   15380         case 32:
   15381            eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
   15382            break;
   15383      }
   15384      vassert(eaE);
   15385      IRTemp eaT = newTemp(Ity_I32);
   15386      assign(eaT, eaE);
   15387 
   15388      /* get the old Rn value */
   15389      IRTemp rnT = newTemp(Ity_I32);
   15390      assign(rnT, getIRegA(rN));
   15391 
   15392      /* decide on the transfer address */
   15393      IRTemp taT = IRTemp_INVALID;
   15394      switch (summary & 0x0F) {
   15395         case 1: case 2: taT = eaT; break;
   15396         case 3:         taT = rnT; break;
   15397      }
   15398      vassert(taT != IRTemp_INVALID);
   15399 
   15400      /* ll previous value of rD, for dealing with conditional loads */
   15401      IRTemp llOldRd = newTemp(Ity_I32);
   15402      assign(llOldRd, llGetIReg(rD));
   15403 
   15404      /* halfword store  H 1  L 0  S 0
   15405         uhalf load      H 1  L 1  S 0
   15406         shalf load      H 1  L 1  S 1
   15407         sbyte load      H 0  L 1  S 1
   15408      */
   15409      const HChar* name = NULL;
   15410      /* generate the transfer */
   15411      /**/ if (bH == 1 && bL == 0 && bS == 0) { // halfword store
   15412         storeGuardedLE( mkexpr(taT),
   15413                         unop(Iop_32to16, getIRegA(rD)), condT );
   15414         name = "strh";
   15415      }
   15416      else if (bH == 1 && bL == 1 && bS == 0) { // uhalf load
   15417         IRTemp newRd = newTemp(Ity_I32);
   15418         loadGuardedLE( newRd, ILGop_16Uto32,
   15419                        mkexpr(taT), mkexpr(llOldRd), condT );
   15420         putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
   15421         name = "ldrh";
   15422      }
   15423      else if (bH == 1 && bL == 1 && bS == 1) { // shalf load
   15424         IRTemp newRd = newTemp(Ity_I32);
   15425         loadGuardedLE( newRd, ILGop_16Sto32,
   15426                        mkexpr(taT), mkexpr(llOldRd), condT );
   15427         putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
   15428         name = "ldrsh";
   15429      }
   15430      else if (bH == 0 && bL == 1 && bS == 1) { // sbyte load
   15431         IRTemp newRd = newTemp(Ity_I32);
   15432         loadGuardedLE( newRd, ILGop_8Sto32,
   15433                        mkexpr(taT), mkexpr(llOldRd), condT );
   15434         putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
   15435         name = "ldrsb";
   15436      }
   15437      else
   15438         vassert(0); // should be assured by logic above
   15439 
   15440      /* Update Rn if necessary. */
   15441      switch (summary & 0x0F) {
   15442         case 2: case 3:
   15443            // should be assured by logic above:
   15444            if (bL == 1)
   15445               vassert(rD != rN); /* since we just wrote rD */
   15446            putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
   15447            break;
   15448      }
   15449 
   15450      switch (summary & 0x0F) {
   15451         case 1:  DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
   15452                  break;
   15453         case 2:  DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
   15454                      name, nCC(INSN_COND), rD, dis_buf);
   15455                  break;
   15456         case 3:  DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
   15457                      name, nCC(INSN_COND), rD, dis_buf);
   15458                  break;
   15459         default: vassert(0);
   15460      }
   15461 
   15462      /* XXX deal with alignment constraints */
   15463 
   15464      goto decode_success;
   15465 
   15466      /* Complications:
   15467 
   15468         For all loads: if the Amode specifies base register
   15469         writeback, and the same register is specified for Rd and Rn,
   15470         the results are UNPREDICTABLE.
   15471 
   15472         For all loads and stores: if R15 is written, branch to
   15473         that address afterwards.
   15474 
   15475         Misaligned halfword stores => Unpredictable
   15476         Misaligned halfword loads  => Unpredictable
   15477      */
   15478    }
   15479 
   15480   after_load_store_sbyte_or_hword:
   15481 
   15482    /* --------------------- Load/store multiple -------------- */
   15483    // LD/STMIA LD/STMIB LD/STMDA LD/STMDB
   15484    // Remarkably complex and difficult to get right
   15485    // match 27:20 as 100XX0WL
   15486    if (BITS8(1,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,1,0,0))) {
   15487       // A5-50 LD/STMIA  cond 1000 10WL Rn RegList
   15488       // A5-51 LD/STMIB  cond 1001 10WL Rn RegList
   15489       // A5-53 LD/STMDA  cond 1000 00WL Rn RegList
   15490       // A5-53 LD/STMDB  cond 1001 00WL Rn RegList
   15491       //                   28   24   20 16       0
   15492 
   15493       UInt bINC    = (insn >> 23) & 1;
   15494       UInt bBEFORE = (insn >> 24) & 1;
   15495 
   15496       UInt bL      = (insn >> 20) & 1;  /* load=1, store=0 */
   15497       UInt bW      = (insn >> 21) & 1;  /* Rn wback=1, no wback=0 */
   15498       UInt rN      = (insn >> 16) & 0xF;
   15499       UInt regList = insn & 0xFFFF;
   15500       /* Skip some invalid cases, which would lead to two competing
   15501          updates to the same register, or which are otherwise
   15502          disallowed by the spec.  Note the test above has required
   15503          that S == 0, since that looks like a kernel-mode only thing.
   15504          Done by forcing the real pattern, viz 100XXSWL to actually be
   15505          100XX0WL. */
   15506       if (rN == 15) goto after_load_store_multiple;
   15507       // reglist can't be empty
   15508       if (regList == 0) goto after_load_store_multiple;
   15509       // if requested to writeback Rn, and this is a load instruction,
   15510       // then Rn can't appear in RegList, since we'd have two competing
   15511       // new values for Rn.  We do however accept this case for store
   15512       // instructions.
   15513       if (bW == 1 && bL == 1 && ((1 << rN) & regList) > 0)
   15514          goto after_load_store_multiple;
   15515 
   15516       /* Now, we can't do a conditional load or store, since that very
   15517          likely will generate an exception.  So we have to take a side
   15518          exit at this point if the condition is false. */
   15519       if (condT != IRTemp_INVALID) {
   15520          mk_skip_over_A32_if_cond_is_false( condT );
   15521          condT = IRTemp_INVALID;
   15522       }
   15523 
   15524       /* Ok, now we're unconditional.  Generate the IR. */
   15525       mk_ldm_stm( True/*arm*/, rN, bINC, bBEFORE, bW, bL, regList );
   15526 
   15527       DIP("%sm%c%c%s r%u%s, {0x%04x}\n",
   15528           bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
   15529           nCC(INSN_COND),
   15530           rN, bW ? "!" : "", regList);
   15531 
   15532       goto decode_success;
   15533    }
   15534 
   15535   after_load_store_multiple:
   15536 
   15537    /* --------------------- Control flow --------------------- */
   15538    // B, BL (Branch, or Branch-and-Link, to immediate offset)
   15539    //
   15540    if (BITS8(1,0,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))) {
   15541       UInt link   = (insn >> 24) & 1;
   15542       UInt uimm24 = insn & ((1<<24)-1);
   15543       Int  simm24 = (Int)uimm24;
   15544       UInt dst    = guest_R15_curr_instr_notENC + 8
   15545                     + (((simm24 << 8) >> 8) << 2);
   15546       IRJumpKind jk = link ? Ijk_Call : Ijk_Boring;
   15547       if (link) {
   15548          putIRegA(14, mkU32(guest_R15_curr_instr_notENC + 4),
   15549                       condT, Ijk_Boring);
   15550       }
   15551       if (condT == IRTemp_INVALID) {
   15552          /* unconditional transfer to 'dst'.  See if we can simply
   15553             continue tracing at the destination. */
   15554          if (resteerOkFn( callback_opaque, dst )) {
   15555             /* yes */
   15556             dres.whatNext   = Dis_ResteerU;
   15557             dres.continueAt = dst;
   15558          } else {
   15559             /* no; terminate the SB at this point. */
   15560             llPutIReg(15, mkU32(dst));
   15561             dres.jk_StopHere = jk;
   15562             dres.whatNext    = Dis_StopHere;
   15563          }
   15564          DIP("b%s 0x%x\n", link ? "l" : "", dst);
   15565       } else {
   15566          /* conditional transfer to 'dst' */
   15567          const HChar* comment = "";
   15568 
   15569          /* First see if we can do some speculative chasing into one
   15570             arm or the other.  Be conservative and only chase if
   15571             !link, that is, this is a normal conditional branch to a
   15572             known destination. */
   15573          if (!link
   15574              && resteerCisOk
   15575              && vex_control.guest_chase_cond
   15576              && dst < guest_R15_curr_instr_notENC
   15577              && resteerOkFn( callback_opaque, dst) ) {
   15578             /* Speculation: assume this backward branch is taken.  So
   15579                we need to emit a side-exit to the insn following this
   15580                one, on the negation of the condition, and continue at
   15581                the branch target address (dst). */
   15582             stmt( IRStmt_Exit( unop(Iop_Not1,
   15583                                     unop(Iop_32to1, mkexpr(condT))),
   15584                                Ijk_Boring,
   15585                                IRConst_U32(guest_R15_curr_instr_notENC+4),
   15586                                OFFB_R15T ));
   15587             dres.whatNext   = Dis_ResteerC;
   15588             dres.continueAt = (Addr32)dst;
   15589             comment = "(assumed taken)";
   15590          }
   15591          else
   15592          if (!link
   15593              && resteerCisOk
   15594              && vex_control.guest_chase_cond
   15595              && dst >= guest_R15_curr_instr_notENC
   15596              && resteerOkFn( callback_opaque,
   15597                              guest_R15_curr_instr_notENC+4) ) {
   15598             /* Speculation: assume this forward branch is not taken.
   15599                So we need to emit a side-exit to dst (the dest) and
   15600                continue disassembling at the insn immediately
   15601                following this one. */
   15602             stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
   15603                                Ijk_Boring,
   15604                                IRConst_U32(dst),
   15605                                OFFB_R15T ));
   15606             dres.whatNext   = Dis_ResteerC;
   15607             dres.continueAt = guest_R15_curr_instr_notENC+4;
   15608             comment = "(assumed not taken)";
   15609          }
   15610          else {
   15611             /* Conservative default translation - end the block at
   15612                this point. */
   15613             stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
   15614                                jk, IRConst_U32(dst), OFFB_R15T ));
   15615             llPutIReg(15, mkU32(guest_R15_curr_instr_notENC + 4));
   15616             dres.jk_StopHere = Ijk_Boring;
   15617             dres.whatNext    = Dis_StopHere;
   15618          }
   15619          DIP("b%s%s 0x%x %s\n", link ? "l" : "", nCC(INSN_COND),
   15620              dst, comment);
   15621       }
   15622       goto decode_success;
   15623    }
   15624 
   15625    // B, BL (Branch, or Branch-and-Link, to a register)
   15626    // NB: interworking branch
   15627    if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
   15628        && INSN(19,12) == BITS8(1,1,1,1,1,1,1,1)
   15629        && (INSN(11,4) == BITS8(1,1,1,1,0,0,1,1)
   15630            || INSN(11,4) == BITS8(1,1,1,1,0,0,0,1))) {
   15631       IRTemp  dst = newTemp(Ity_I32);
   15632       UInt    link = (INSN(11,4) >> 1) & 1;
   15633       UInt    rM   = INSN(3,0);
   15634       // we don't decode the case (link && rM == 15), as that's
   15635       // Unpredictable.
   15636       if (!(link && rM == 15)) {
   15637          if (condT != IRTemp_INVALID) {
   15638             mk_skip_over_A32_if_cond_is_false( condT );
   15639          }
   15640          // rM contains an interworking address exactly as we require
   15641          // (with continuation CPSR.T in bit 0), so we can use it
   15642          // as-is, with no masking.
   15643          assign( dst, getIRegA(rM) );
   15644          if (link) {
   15645             putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
   15646                       IRTemp_INVALID/*because AL*/, Ijk_Boring );
   15647          }
   15648          llPutIReg(15, mkexpr(dst));
   15649          dres.jk_StopHere = link ? Ijk_Call
   15650                                  : (rM == 14 ? Ijk_Ret : Ijk_Boring);
   15651          dres.whatNext    = Dis_StopHere;
   15652          if (condT == IRTemp_INVALID) {
   15653             DIP("b%sx r%u\n", link ? "l" : "", rM);
   15654          } else {
   15655             DIP("b%sx%s r%u\n", link ? "l" : "", nCC(INSN_COND), rM);
   15656          }
   15657          goto decode_success;
   15658       }
   15659       /* else: (link && rM == 15): just fall through */
   15660    }
   15661 
   15662    /* --- NB: ARM interworking branches are in NV space, hence
   15663       are handled elsewhere by decode_NV_instruction.
   15664       ---
   15665    */
   15666 
   15667    /* --------------------- Clz --------------------- */
   15668    // CLZ
   15669    if (INSN(27,20) == BITS8(0,0,0,1,0,1,1,0)
   15670        && INSN(19,16) == BITS4(1,1,1,1)
   15671        && INSN(11,4) == BITS8(1,1,1,1,0,0,0,1)) {
   15672       UInt rD = INSN(15,12);
   15673       UInt rM = INSN(3,0);
   15674       IRTemp arg = newTemp(Ity_I32);
   15675       IRTemp res = newTemp(Ity_I32);
   15676       assign(arg, getIRegA(rM));
   15677       assign(res, IRExpr_ITE(
   15678                      binop(Iop_CmpEQ32, mkexpr(arg), mkU32(0)),
   15679                      mkU32(32),
   15680                      unop(Iop_Clz32, mkexpr(arg))
   15681             ));
   15682       putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   15683       DIP("clz%s r%u, r%u\n", nCC(INSN_COND), rD, rM);
   15684       goto decode_success;
   15685    }
   15686 
   15687    /* --------------------- Mul etc --------------------- */
   15688    // MUL
   15689    if (BITS8(0,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
   15690        && INSN(15,12) == BITS4(0,0,0,0)
   15691        && INSN(7,4) == BITS4(1,0,0,1)) {
   15692       UInt bitS = (insn >> 20) & 1; /* 20:20 */
   15693       UInt rD = INSN(19,16);
   15694       UInt rS = INSN(11,8);
   15695       UInt rM = INSN(3,0);
   15696       if (rD == 15 || rM == 15 || rS == 15) {
   15697          /* Unpredictable; don't decode; fall through */
   15698       } else {
   15699          IRTemp argL = newTemp(Ity_I32);
   15700          IRTemp argR = newTemp(Ity_I32);
   15701          IRTemp res  = newTemp(Ity_I32);
   15702          IRTemp oldC = IRTemp_INVALID;
   15703          IRTemp oldV = IRTemp_INVALID;
   15704          assign( argL, getIRegA(rM));
   15705          assign( argR, getIRegA(rS));
   15706          assign( res, binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) );
   15707          if (bitS) {
   15708             oldC = newTemp(Ity_I32);
   15709             assign(oldC, mk_armg_calculate_flag_c());
   15710             oldV = newTemp(Ity_I32);
   15711             assign(oldV, mk_armg_calculate_flag_v());
   15712          }
   15713          // now update guest state
   15714          putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
   15715          if (bitS) {
   15716             IRTemp pair = newTemp(Ity_I32);
   15717             assign( pair, binop(Iop_Or32,
   15718                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
   15719                                 mkexpr(oldV)) );
   15720             setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
   15721          }
   15722          DIP("mul%c%s r%u, r%u, r%u\n",
   15723              bitS ? 's' : ' ', nCC(INSN_COND), rD, rM, rS);
   15724          goto decode_success;
   15725       }
   15726       /* fall through */
   15727    }
   15728 
   15729    /* --------------------- Integer Divides --------------------- */
   15730    // SDIV
   15731    if (BITS8(0,1,1,1,0,0,0,1) == INSN(27,20)
   15732        && INSN(15,12) == BITS4(1,1,1,1)
   15733        && INSN(7,4) == BITS4(0,0,0,1)) {
   15734       UInt rD = INSN(19,16);
   15735       UInt rM = INSN(11,8);
   15736       UInt rN = INSN(3,0);
   15737       if (rD == 15 || rM == 15 || rN == 15) {
   15738          /* Unpredictable; don't decode; fall through */
   15739       } else {
   15740          IRTemp res  = newTemp(Ity_I32);
   15741          IRTemp argL = newTemp(Ity_I32);
   15742          IRTemp argR = newTemp(Ity_I32);
   15743          assign(argL, getIRegA(rN));
   15744          assign(argR, getIRegA(rM));
   15745          assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
   15746          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   15747          DIP("sdiv r%u, r%u, r%u\n", rD, rN, rM);
   15748          goto decode_success;
   15749       }
   15750     }
   15751 
   15752    // UDIV
   15753    if (BITS8(0,1,1,1,0,0,1,1) == INSN(27,20)
   15754        && INSN(15,12) == BITS4(1,1,1,1)
   15755        && INSN(7,4) == BITS4(0,0,0,1)) {
   15756       UInt rD = INSN(19,16);
   15757       UInt rM = INSN(11,8);
   15758       UInt rN = INSN(3,0);
   15759       if (rD == 15 || rM == 15 || rN == 15) {
   15760          /* Unpredictable; don't decode; fall through */
   15761       } else {
   15762          IRTemp res  = newTemp(Ity_I32);
   15763          IRTemp argL = newTemp(Ity_I32);
   15764          IRTemp argR = newTemp(Ity_I32);
   15765          assign(argL, getIRegA(rN));
   15766          assign(argR, getIRegA(rM));
   15767          assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
   15768          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   15769          DIP("udiv r%u, r%u, r%u\n", rD, rN, rM);
   15770          goto decode_success;
   15771       }
   15772    }
   15773 
   15774    // MLA, MLS
   15775    if (BITS8(0,0,0,0,0,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
   15776        && INSN(7,4) == BITS4(1,0,0,1)) {
   15777       UInt bitS  = (insn >> 20) & 1; /* 20:20 */
   15778       UInt isMLS = (insn >> 22) & 1; /* 22:22 */
   15779       UInt rD = INSN(19,16);
   15780       UInt rN = INSN(15,12);
   15781       UInt rS = INSN(11,8);
   15782       UInt rM = INSN(3,0);
   15783       if (bitS == 1 && isMLS == 1) {
   15784          /* This isn't allowed (MLS that sets flags).  don't decode;
   15785             fall through */
   15786       }
   15787       else
   15788       if (rD == 15 || rM == 15 || rS == 15 || rN == 15) {
   15789          /* Unpredictable; don't decode; fall through */
   15790       } else {
   15791          IRTemp argL = newTemp(Ity_I32);
   15792          IRTemp argR = newTemp(Ity_I32);
   15793          IRTemp argP = newTemp(Ity_I32);
   15794          IRTemp res  = newTemp(Ity_I32);
   15795          IRTemp oldC = IRTemp_INVALID;
   15796          IRTemp oldV = IRTemp_INVALID;
   15797          assign( argL, getIRegA(rM));
   15798          assign( argR, getIRegA(rS));
   15799          assign( argP, getIRegA(rN));
   15800          assign( res, binop(isMLS ? Iop_Sub32 : Iop_Add32,
   15801                             mkexpr(argP),
   15802                             binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) ));
   15803          if (bitS) {
   15804             vassert(!isMLS); // guaranteed above
   15805             oldC = newTemp(Ity_I32);
   15806             assign(oldC, mk_armg_calculate_flag_c());
   15807             oldV = newTemp(Ity_I32);
   15808             assign(oldV, mk_armg_calculate_flag_v());
   15809          }
   15810          // now update guest state
   15811          putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
   15812          if (bitS) {
   15813             IRTemp pair = newTemp(Ity_I32);
   15814             assign( pair, binop(Iop_Or32,
   15815                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
   15816                                 mkexpr(oldV)) );
   15817             setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
   15818          }
   15819          DIP("ml%c%c%s r%u, r%u, r%u, r%u\n",
   15820              isMLS ? 's' : 'a', bitS ? 's' : ' ',
   15821              nCC(INSN_COND), rD, rM, rS, rN);
   15822          goto decode_success;
   15823       }
   15824       /* fall through */
   15825    }
   15826 
   15827    // SMULL, UMULL
   15828    if (BITS8(0,0,0,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
   15829        && INSN(7,4) == BITS4(1,0,0,1)) {
   15830       UInt bitS = (insn >> 20) & 1; /* 20:20 */
   15831       UInt rDhi = INSN(19,16);
   15832       UInt rDlo = INSN(15,12);
   15833       UInt rS   = INSN(11,8);
   15834       UInt rM   = INSN(3,0);
   15835       UInt isS  = (INSN(27,20) >> 2) & 1; /* 22:22 */
   15836       if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo)  {
   15837          /* Unpredictable; don't decode; fall through */
   15838       } else {
   15839          IRTemp argL  = newTemp(Ity_I32);
   15840          IRTemp argR  = newTemp(Ity_I32);
   15841          IRTemp res   = newTemp(Ity_I64);
   15842          IRTemp resHi = newTemp(Ity_I32);
   15843          IRTemp resLo = newTemp(Ity_I32);
   15844          IRTemp oldC  = IRTemp_INVALID;
   15845          IRTemp oldV  = IRTemp_INVALID;
   15846          IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
   15847          assign( argL, getIRegA(rM));
   15848          assign( argR, getIRegA(rS));
   15849          assign( res, binop(mulOp, mkexpr(argL), mkexpr(argR)) );
   15850          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
   15851          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
   15852          if (bitS) {
   15853             oldC = newTemp(Ity_I32);
   15854             assign(oldC, mk_armg_calculate_flag_c());
   15855             oldV = newTemp(Ity_I32);
   15856             assign(oldV, mk_armg_calculate_flag_v());
   15857          }
   15858          // now update guest state
   15859          putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
   15860          putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
   15861          if (bitS) {
   15862             IRTemp pair = newTemp(Ity_I32);
   15863             assign( pair, binop(Iop_Or32,
   15864                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
   15865                                 mkexpr(oldV)) );
   15866             setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
   15867          }
   15868          DIP("%cmull%c%s r%u, r%u, r%u, r%u\n",
   15869              isS ? 's' : 'u', bitS ? 's' : ' ',
   15870              nCC(INSN_COND), rDlo, rDhi, rM, rS);
   15871          goto decode_success;
   15872       }
   15873       /* fall through */
   15874    }
   15875 
   15876    // SMLAL, UMLAL
   15877    if (BITS8(0,0,0,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
   15878        && INSN(7,4) == BITS4(1,0,0,1)) {
   15879       UInt bitS = (insn >> 20) & 1; /* 20:20 */
   15880       UInt rDhi = INSN(19,16);
   15881       UInt rDlo = INSN(15,12);
   15882       UInt rS   = INSN(11,8);
   15883       UInt rM   = INSN(3,0);
   15884       UInt isS  = (INSN(27,20) >> 2) & 1; /* 22:22 */
   15885       if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo)  {
   15886          /* Unpredictable; don't decode; fall through */
   15887       } else {
   15888          IRTemp argL  = newTemp(Ity_I32);
   15889          IRTemp argR  = newTemp(Ity_I32);
   15890          IRTemp old   = newTemp(Ity_I64);
   15891          IRTemp res   = newTemp(Ity_I64);
   15892          IRTemp resHi = newTemp(Ity_I32);
   15893          IRTemp resLo = newTemp(Ity_I32);
   15894          IRTemp oldC  = IRTemp_INVALID;
   15895          IRTemp oldV  = IRTemp_INVALID;
   15896          IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
   15897          assign( argL, getIRegA(rM));
   15898          assign( argR, getIRegA(rS));
   15899          assign( old, binop(Iop_32HLto64, getIRegA(rDhi), getIRegA(rDlo)) );
   15900          assign( res, binop(Iop_Add64,
   15901                             mkexpr(old),
   15902                             binop(mulOp, mkexpr(argL), mkexpr(argR))) );
   15903          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
   15904          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
   15905          if (bitS) {
   15906             oldC = newTemp(Ity_I32);
   15907             assign(oldC, mk_armg_calculate_flag_c());
   15908             oldV = newTemp(Ity_I32);
   15909             assign(oldV, mk_armg_calculate_flag_v());
   15910          }
   15911          // now update guest state
   15912          putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
   15913          putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
   15914          if (bitS) {
   15915             IRTemp pair = newTemp(Ity_I32);
   15916             assign( pair, binop(Iop_Or32,
   15917                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
   15918                                 mkexpr(oldV)) );
   15919             setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
   15920          }
   15921          DIP("%cmlal%c%s r%u, r%u, r%u, r%u\n",
   15922              isS ? 's' : 'u', bitS ? 's' : ' ', nCC(INSN_COND),
   15923              rDlo, rDhi, rM, rS);
   15924          goto decode_success;
   15925       }
   15926       /* fall through */
   15927    }
   15928 
   15929    // UMAAL
   15930    if (BITS8(0,0,0,0,0,1,0,0) == INSN(27,20) && INSN(7,4) == BITS4(1,0,0,1)) {
   15931       UInt rDhi = INSN(19,16);
   15932       UInt rDlo = INSN(15,12);
   15933       UInt rM   = INSN(11,8);
   15934       UInt rN   = INSN(3,0);
   15935       if (rDlo == 15 || rDhi == 15 || rN == 15 || rM == 15 || rDhi == rDlo)  {
   15936          /* Unpredictable; don't decode; fall through */
   15937       } else {
   15938          IRTemp argN   = newTemp(Ity_I32);
   15939          IRTemp argM   = newTemp(Ity_I32);
   15940          IRTemp argDhi = newTemp(Ity_I32);
   15941          IRTemp argDlo = newTemp(Ity_I32);
   15942          IRTemp res    = newTemp(Ity_I64);
   15943          IRTemp resHi  = newTemp(Ity_I32);
   15944          IRTemp resLo  = newTemp(Ity_I32);
   15945          assign( argN,   getIRegA(rN) );
   15946          assign( argM,   getIRegA(rM) );
   15947          assign( argDhi, getIRegA(rDhi) );
   15948          assign( argDlo, getIRegA(rDlo) );
   15949          assign( res,
   15950                  binop(Iop_Add64,
   15951                        binop(Iop_Add64,
   15952                              binop(Iop_MullU32, mkexpr(argN), mkexpr(argM)),
   15953                              unop(Iop_32Uto64, mkexpr(argDhi))),
   15954                        unop(Iop_32Uto64, mkexpr(argDlo))) );
   15955          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
   15956          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
   15957          // now update guest state
   15958          putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
   15959          putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
   15960          DIP("umaal %s r%u, r%u, r%u, r%u\n",
   15961              nCC(INSN_COND), rDlo, rDhi, rN, rM);
   15962          goto decode_success;
   15963       }
   15964       /* fall through */
   15965    }
   15966 
   15967    /* --------------------- Msr etc --------------------- */
   15968 
   15969    // MSR apsr, #imm
   15970    if (INSN(27,20) == BITS8(0,0,1,1,0,0,1,0)
   15971        && INSN(17,12) == BITS6(0,0,1,1,1,1)) {
   15972       UInt write_ge    = INSN(18,18);
   15973       UInt write_nzcvq = INSN(19,19);
   15974       if (write_nzcvq || write_ge) {
   15975          UInt   imm = (INSN(11,0) >> 0) & 0xFF;
   15976          UInt   rot = 2 * ((INSN(11,0) >> 8) & 0xF);
   15977          IRTemp immT = newTemp(Ity_I32);
   15978          vassert(rot <= 30);
   15979          imm = ROR32(imm, rot);
   15980          assign(immT, mkU32(imm));
   15981          desynthesise_APSR( write_nzcvq, write_ge, immT, condT );
   15982          DIP("msr%s cpsr%s%sf, #0x%08x\n", nCC(INSN_COND),
   15983              write_nzcvq ? "f" : "", write_ge ? "g" : "", imm);
   15984          goto decode_success;
   15985       }
   15986       /* fall through */
   15987    }
   15988 
   15989    // MSR apsr, reg
   15990    if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
   15991        && INSN(17,12) == BITS6(0,0,1,1,1,1)
   15992        && INSN(11,4) == BITS8(0,0,0,0,0,0,0,0)) {
   15993       UInt rN          = INSN(3,0);
   15994       UInt write_ge    = INSN(18,18);
   15995       UInt write_nzcvq = INSN(19,19);
   15996       if (rN != 15 && (write_nzcvq || write_ge)) {
   15997          IRTemp rNt = newTemp(Ity_I32);
   15998          assign(rNt, getIRegA(rN));
   15999          desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
   16000          DIP("msr%s cpsr_%s%s, r%u\n", nCC(INSN_COND),
   16001              write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
   16002          goto decode_success;
   16003       }
   16004       /* fall through */
   16005    }
   16006 
   16007    // MRS rD, cpsr
   16008    if ((insn & 0x0FFF0FFF) == 0x010F0000) {
   16009       UInt rD   = INSN(15,12);
   16010       if (rD != 15) {
   16011          IRTemp apsr = synthesise_APSR();
   16012          putIRegA( rD, mkexpr(apsr), condT, Ijk_Boring );
   16013          DIP("mrs%s r%u, cpsr\n", nCC(INSN_COND), rD);
   16014          goto decode_success;
   16015       }
   16016       /* fall through */
   16017    }
   16018 
   16019    /* --------------------- Svc --------------------- */
   16020    if (BITS8(1,1,1,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))) {
   16021       UInt imm24 = (insn >> 0) & 0xFFFFFF;
   16022       if (imm24 == 0) {
   16023          /* A syscall.  We can't do this conditionally, hence: */
   16024          if (condT != IRTemp_INVALID) {
   16025             mk_skip_over_A32_if_cond_is_false( condT );
   16026          }
   16027          // AL after here
   16028          llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 4 ));
   16029          dres.jk_StopHere = Ijk_Sys_syscall;
   16030          dres.whatNext    = Dis_StopHere;
   16031          DIP("svc%s #0x%08x\n", nCC(INSN_COND), imm24);
   16032          goto decode_success;
   16033       }
   16034       /* fall through */
   16035    }
   16036 
   16037    /* ------------------------ swp ------------------------ */
   16038 
   16039    // SWP, SWPB
   16040    if (BITS8(0,0,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   16041        && BITS4(0,0,0,0) == INSN(11,8)
   16042        && BITS4(1,0,0,1) == INSN(7,4)) {
   16043       UInt   rN   = INSN(19,16);
   16044       UInt   rD   = INSN(15,12);
   16045       UInt   rM   = INSN(3,0);
   16046       IRTemp tRn  = newTemp(Ity_I32);
   16047       IRTemp tNew = newTemp(Ity_I32);
   16048       IRTemp tOld = IRTemp_INVALID;
   16049       IRTemp tSC1 = newTemp(Ity_I1);
   16050       UInt   isB  = (insn >> 22) & 1;
   16051 
   16052       if (rD == 15 || rN == 15 || rM == 15 || rN == rM || rN == rD) {
   16053          /* undecodable; fall through */
   16054       } else {
   16055          /* make unconditional */
   16056          if (condT != IRTemp_INVALID) {
   16057             mk_skip_over_A32_if_cond_is_false( condT );
   16058             condT = IRTemp_INVALID;
   16059          }
   16060          /* Ok, now we're unconditional.  Generate a LL-SC loop. */
   16061          assign(tRn, getIRegA(rN));
   16062          assign(tNew, getIRegA(rM));
   16063          if (isB) {
   16064             /* swpb */
   16065             tOld = newTemp(Ity_I8);
   16066             stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
   16067                               NULL/*=>isLL*/) );
   16068             stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
   16069                               unop(Iop_32to8, mkexpr(tNew))) );
   16070          } else {
   16071             /* swp */
   16072             tOld = newTemp(Ity_I32);
   16073             stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
   16074                               NULL/*=>isLL*/) );
   16075             stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
   16076                               mkexpr(tNew)) );
   16077          }
   16078          stmt( IRStmt_Exit(unop(Iop_Not1, mkexpr(tSC1)),
   16079                            /*Ijk_NoRedir*/Ijk_Boring,
   16080                            IRConst_U32(guest_R15_curr_instr_notENC),
   16081                            OFFB_R15T ));
   16082          putIRegA(rD, isB ? unop(Iop_8Uto32, mkexpr(tOld)) : mkexpr(tOld),
   16083                       IRTemp_INVALID, Ijk_Boring);
   16084          DIP("swp%s%s r%u, r%u, [r%u]\n",
   16085              isB ? "b" : "", nCC(INSN_COND), rD, rM, rN);
   16086          goto decode_success;
   16087       }
   16088       /* fall through */
   16089    }
   16090 
   16091    /* ----------------------------------------------------------- */
   16092    /* -- ARMv6 instructions                                    -- */
   16093    /* ----------------------------------------------------------- */
   16094 
   16095    /* ------------------- {ldr,str}ex{,b,h,d} ------------------- */
   16096 
   16097    // LDREXD, LDREX, LDREXH, LDREXB
   16098    if (0x01900F9F == (insn & 0x0F900FFF)) {
   16099       UInt   rT    = INSN(15,12);
   16100       UInt   rN    = INSN(19,16);
   16101       IRType ty    = Ity_INVALID;
   16102       IROp   widen = Iop_INVALID;
   16103       const HChar* nm = NULL;
   16104       Bool   valid = True;
   16105       switch (INSN(22,21)) {
   16106          case 0: nm = "";  ty = Ity_I32; break;
   16107          case 1: nm = "d"; ty = Ity_I64; break;
   16108          case 2: nm = "b"; ty = Ity_I8;  widen = Iop_8Uto32; break;
   16109          case 3: nm = "h"; ty = Ity_I16; widen = Iop_16Uto32; break;
   16110          default: vassert(0);
   16111       }
   16112       if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
   16113          if (rT == 15 || rN == 15)
   16114             valid = False;
   16115       } else {
   16116          vassert(ty == Ity_I64);
   16117          if ((rT & 1) == 1 || rT == 14 || rN == 15)
   16118             valid = False;
   16119       }
   16120       if (valid) {
   16121          IRTemp res;
   16122          /* make unconditional */
   16123          if (condT != IRTemp_INVALID) {
   16124            mk_skip_over_A32_if_cond_is_false( condT );
   16125            condT = IRTemp_INVALID;
   16126          }
   16127          /* Ok, now we're unconditional.  Do the load. */
   16128          res = newTemp(ty);
   16129          // FIXME: assumes little-endian guest
   16130          stmt( IRStmt_LLSC(Iend_LE, res, getIRegA(rN),
   16131                            NULL/*this is a load*/) );
   16132          if (ty == Ity_I64) {
   16133             // FIXME: assumes little-endian guest
   16134             putIRegA(rT+0, unop(Iop_64to32, mkexpr(res)),
   16135                            IRTemp_INVALID, Ijk_Boring);
   16136             putIRegA(rT+1, unop(Iop_64HIto32, mkexpr(res)),
   16137                            IRTemp_INVALID, Ijk_Boring);
   16138             DIP("ldrex%s%s r%u, r%u, [r%u]\n",
   16139                 nm, nCC(INSN_COND), rT+0, rT+1, rN);
   16140          } else {
   16141             putIRegA(rT, widen == Iop_INVALID
   16142                             ? mkexpr(res) : unop(widen, mkexpr(res)),
   16143                      IRTemp_INVALID, Ijk_Boring);
   16144             DIP("ldrex%s%s r%u, [r%u]\n", nm, nCC(INSN_COND), rT, rN);
   16145          }
   16146          goto decode_success;
   16147       }
   16148       /* undecodable; fall through */
   16149    }
   16150 
   16151    // STREXD, STREX, STREXH, STREXB
   16152    if (0x01800F90 == (insn & 0x0F900FF0)) {
   16153       UInt   rT     = INSN(3,0);
   16154       UInt   rN     = INSN(19,16);
   16155       UInt   rD     = INSN(15,12);
   16156       IRType ty     = Ity_INVALID;
   16157       IROp   narrow = Iop_INVALID;
   16158       const HChar* nm = NULL;
   16159       Bool   valid  = True;
   16160       switch (INSN(22,21)) {
   16161          case 0: nm = "";  ty = Ity_I32; break;
   16162          case 1: nm = "d"; ty = Ity_I64; break;
   16163          case 2: nm = "b"; ty = Ity_I8;  narrow = Iop_32to8; break;
   16164          case 3: nm = "h"; ty = Ity_I16; narrow = Iop_32to16; break;
   16165          default: vassert(0);
   16166       }
   16167       if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
   16168          if (rD == 15 || rN == 15 || rT == 15
   16169              || rD == rN || rD == rT)
   16170             valid = False;
   16171       } else {
   16172          vassert(ty == Ity_I64);
   16173          if (rD == 15 || (rT & 1) == 1 || rT == 14 || rN == 15
   16174              || rD == rN || rD == rT || rD == rT+1)
   16175             valid = False;
   16176       }
   16177       if (valid) {
   16178          IRTemp resSC1, resSC32, data;
   16179          /* make unconditional */
   16180          if (condT != IRTemp_INVALID) {
   16181             mk_skip_over_A32_if_cond_is_false( condT );
   16182             condT = IRTemp_INVALID;
   16183          }
   16184          /* Ok, now we're unconditional.  Do the store. */
   16185          data = newTemp(ty);
   16186          assign(data,
   16187                 ty == Ity_I64
   16188                    // FIXME: assumes little-endian guest
   16189                    ? binop(Iop_32HLto64, getIRegA(rT+1), getIRegA(rT+0))
   16190                    : narrow == Iop_INVALID
   16191                       ? getIRegA(rT)
   16192                       : unop(narrow, getIRegA(rT)));
   16193          resSC1 = newTemp(Ity_I1);
   16194          // FIXME: assumes little-endian guest
   16195          stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegA(rN), mkexpr(data)) );
   16196 
   16197          /* Set rD to 1 on failure, 0 on success.  Currently we have
   16198             resSC1 == 0 on failure, 1 on success. */
   16199          resSC32 = newTemp(Ity_I32);
   16200          assign(resSC32,
   16201                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
   16202 
   16203          putIRegA(rD, mkexpr(resSC32),
   16204                       IRTemp_INVALID, Ijk_Boring);
   16205          if (ty == Ity_I64) {
   16206             DIP("strex%s%s r%u, r%u, r%u, [r%u]\n",
   16207                 nm, nCC(INSN_COND), rD, rT, rT+1, rN);
   16208          } else {
   16209             DIP("strex%s%s r%u, r%u, [r%u]\n",
   16210                 nm, nCC(INSN_COND), rD, rT, rN);
   16211          }
   16212          goto decode_success;
   16213       }
   16214       /* fall through */
   16215    }
   16216 
   16217    /* --------------------- movw, movt --------------------- */
   16218    if (0x03000000 == (insn & 0x0FF00000)
   16219        || 0x03400000 == (insn & 0x0FF00000)) /* pray for CSE */ {
   16220       UInt rD    = INSN(15,12);
   16221       UInt imm16 = (insn & 0xFFF) | ((insn >> 4) & 0x0000F000);
   16222       UInt isT   = (insn >> 22) & 1;
   16223       if (rD == 15) {
   16224          /* forget it */
   16225       } else {
   16226          if (isT) {
   16227             putIRegA(rD,
   16228                      binop(Iop_Or32,
   16229                            binop(Iop_And32, getIRegA(rD), mkU32(0xFFFF)),
   16230                            mkU32(imm16 << 16)),
   16231                      condT, Ijk_Boring);
   16232             DIP("movt%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
   16233             goto decode_success;
   16234          } else {
   16235             putIRegA(rD, mkU32(imm16), condT, Ijk_Boring);
   16236             DIP("movw%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
   16237             goto decode_success;
   16238          }
   16239       }
   16240       /* fall through */
   16241    }
   16242 
   16243    /* ----------- uxtb, sxtb, uxth, sxth, uxtb16, sxtb16 ----------- */
   16244    /* FIXME: this is an exact duplicate of the Thumb version.  They
   16245       should be commoned up. */
   16246    if (BITS8(0,1,1,0,1, 0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,0))
   16247        && BITS4(1,1,1,1) == INSN(19,16)
   16248        && BITS4(0,1,1,1) == INSN(7,4)
   16249        && BITS4(0,0, 0,0) == (INSN(11,8) & BITS4(0,0,1,1))) {
   16250       UInt subopc = INSN(27,20) & BITS8(0,0,0,0,0, 1,1,1);
   16251       if (subopc != BITS4(0,0,0,1) && subopc != BITS4(0,1,0,1)) {
   16252          Int    rot  = (INSN(11,8) >> 2) & 3;
   16253          UInt   rM   = INSN(3,0);
   16254          UInt   rD   = INSN(15,12);
   16255          IRTemp srcT = newTemp(Ity_I32);
   16256          IRTemp rotT = newTemp(Ity_I32);
   16257          IRTemp dstT = newTemp(Ity_I32);
   16258          const HChar* nm = "???";
   16259          assign(srcT, getIRegA(rM));
   16260          assign(rotT, genROR32(srcT, 8 * rot)); /* 0, 8, 16 or 24 only */
   16261          switch (subopc) {
   16262             case BITS4(0,1,1,0): // UXTB
   16263                assign(dstT, unop(Iop_8Uto32, unop(Iop_32to8, mkexpr(rotT))));
   16264                nm = "uxtb";
   16265                break;
   16266             case BITS4(0,0,1,0): // SXTB
   16267                assign(dstT, unop(Iop_8Sto32, unop(Iop_32to8, mkexpr(rotT))));
   16268                nm = "sxtb";
   16269                break;
   16270             case BITS4(0,1,1,1): // UXTH
   16271                assign(dstT, unop(Iop_16Uto32, unop(Iop_32to16, mkexpr(rotT))));
   16272                nm = "uxth";
   16273                break;
   16274             case BITS4(0,0,1,1): // SXTH
   16275                assign(dstT, unop(Iop_16Sto32, unop(Iop_32to16, mkexpr(rotT))));
   16276                nm = "sxth";
   16277                break;
   16278             case BITS4(0,1,0,0): // UXTB16
   16279                assign(dstT, binop(Iop_And32, mkexpr(rotT), mkU32(0x00FF00FF)));
   16280                nm = "uxtb16";
   16281                break;
   16282             case BITS4(0,0,0,0): { // SXTB16
   16283                IRTemp lo32 = newTemp(Ity_I32);
   16284                IRTemp hi32 = newTemp(Ity_I32);
   16285                assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
   16286                assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
   16287                assign(
   16288                   dstT,
   16289                   binop(Iop_Or32,
   16290                         binop(Iop_And32,
   16291                               unop(Iop_8Sto32,
   16292                                    unop(Iop_32to8, mkexpr(lo32))),
   16293                               mkU32(0xFFFF)),
   16294                         binop(Iop_Shl32,
   16295                               unop(Iop_8Sto32,
   16296                                    unop(Iop_32to8, mkexpr(hi32))),
   16297                               mkU8(16))
   16298                ));
   16299                nm = "sxtb16";
   16300                break;
   16301             }
   16302             default:
   16303                vassert(0); // guarded by "if" above
   16304          }
   16305          putIRegA(rD, mkexpr(dstT), condT, Ijk_Boring);
   16306          DIP("%s%s r%u, r%u, ROR #%u\n", nm, nCC(INSN_COND), rD, rM, rot);
   16307          goto decode_success;
   16308       }
   16309       /* fall through */
   16310    }
   16311 
   16312    /* ------------------- bfi, bfc ------------------- */
   16313    if (BITS8(0,1,1,1,1,1,0, 0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
   16314        && BITS4(0, 0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
   16315       UInt rD  = INSN(15,12);
   16316       UInt rN  = INSN(3,0);
   16317       UInt msb = (insn >> 16) & 0x1F; /* 20:16 */
   16318       UInt lsb = (insn >> 7) & 0x1F;  /* 11:7 */
   16319       if (rD == 15 || msb < lsb) {
   16320          /* undecodable; fall through */
   16321       } else {
   16322          IRTemp src    = newTemp(Ity_I32);
   16323          IRTemp olddst = newTemp(Ity_I32);
   16324          IRTemp newdst = newTemp(Ity_I32);
   16325          UInt   mask = 1 << (msb - lsb);
   16326          mask = (mask - 1) + mask;
   16327          vassert(mask != 0); // guaranteed by "msb < lsb" check above
   16328          mask <<= lsb;
   16329 
   16330          assign(src, rN == 15 ? mkU32(0) : getIRegA(rN));
   16331          assign(olddst, getIRegA(rD));
   16332          assign(newdst,
   16333                 binop(Iop_Or32,
   16334                    binop(Iop_And32,
   16335                          binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
   16336                          mkU32(mask)),
   16337                    binop(Iop_And32,
   16338                          mkexpr(olddst),
   16339                          mkU32(~mask)))
   16340                );
   16341 
   16342          putIRegA(rD, mkexpr(newdst), condT, Ijk_Boring);
   16343 
   16344          if (rN == 15) {
   16345             DIP("bfc%s r%u, #%u, #%u\n",
   16346                 nCC(INSN_COND), rD, lsb, msb-lsb+1);
   16347          } else {
   16348             DIP("bfi%s r%u, r%u, #%u, #%u\n",
   16349                 nCC(INSN_COND), rD, rN, lsb, msb-lsb+1);
   16350          }
   16351          goto decode_success;
   16352       }
   16353       /* fall through */
   16354    }
   16355 
   16356    /* ------------------- {u,s}bfx ------------------- */
   16357    if (BITS8(0,1,1,1,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
   16358        && BITS4(0,1,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
   16359       UInt rD  = INSN(15,12);
   16360       UInt rN  = INSN(3,0);
   16361       UInt wm1 = (insn >> 16) & 0x1F; /* 20:16 */
   16362       UInt lsb = (insn >> 7) & 0x1F;  /* 11:7 */
   16363       UInt msb = lsb + wm1;
   16364       UInt isU = (insn >> 22) & 1;    /* 22:22 */
   16365       if (rD == 15 || rN == 15 || msb >= 32) {
   16366          /* undecodable; fall through */
   16367       } else {
   16368          IRTemp src  = newTemp(Ity_I32);
   16369          IRTemp tmp  = newTemp(Ity_I32);
   16370          IRTemp res  = newTemp(Ity_I32);
   16371          UInt   mask = ((1 << wm1) - 1) + (1 << wm1);
   16372          vassert(msb >= 0 && msb <= 31);
   16373          vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
   16374 
   16375          assign(src, getIRegA(rN));
   16376          assign(tmp, binop(Iop_And32,
   16377                            binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
   16378                            mkU32(mask)));
   16379          assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
   16380                            binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
   16381                            mkU8(31-wm1)));
   16382 
   16383          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   16384 
   16385          DIP("%s%s r%u, r%u, #%u, #%u\n",
   16386              isU ? "ubfx" : "sbfx",
   16387              nCC(INSN_COND), rD, rN, lsb, wm1 + 1);
   16388          goto decode_success;
   16389       }
   16390       /* fall through */
   16391    }
   16392 
   16393    /* --------------------- Load/store doubleword ------------- */
   16394    // LDRD STRD
   16395    /*                 31   27   23   19 15 11   7    3     # highest bit
   16396                         28   24   20 16 12    8    4    0
   16397       A5-36   1 | 16  cond 0001 U100 Rn Rd im4h 11S1 im4l
   16398       A5-38   1 | 32  cond 0001 U000 Rn Rd 0000 11S1 Rm
   16399       A5-40   2 | 16  cond 0001 U110 Rn Rd im4h 11S1 im4l
   16400       A5-42   2 | 32  cond 0001 U010 Rn Rd 0000 11S1 Rm
   16401       A5-44   3 | 16  cond 0000 U100 Rn Rd im4h 11S1 im4l
   16402       A5-46   3 | 32  cond 0000 U000 Rn Rd 0000 11S1 Rm
   16403    */
   16404    /* case coding:
   16405              1   at-ea               (access at ea)
   16406              2   at-ea-then-upd      (access at ea, then Rn = ea)
   16407              3   at-Rn-then-upd      (access at Rn, then Rn = ea)
   16408       ea coding
   16409              16  Rn +/- imm8
   16410              32  Rn +/- Rm
   16411    */
   16412    /* Quickly skip over all of this for hopefully most instructions */
   16413    if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
   16414       goto after_load_store_doubleword;
   16415 
   16416    /* Check the "11S1" thing. */
   16417    if ((INSN(7,4) & BITS4(1,1,0,1)) != BITS4(1,1,0,1))
   16418       goto after_load_store_doubleword;
   16419 
   16420    summary = 0;
   16421 
   16422    /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,0,0)) {
   16423       summary = 1 | 16;
   16424    }
   16425    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,0,0)) {
   16426       summary = 1 | 32;
   16427    }
   16428    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,1,0)) {
   16429       summary = 2 | 16;
   16430    }
   16431    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,1,0)) {
   16432       summary = 2 | 32;
   16433    }
   16434    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(1,0,0)) {
   16435       summary = 3 | 16;
   16436    }
   16437    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(0,0,0)) {
   16438       summary = 3 | 32;
   16439    }
   16440    else goto after_load_store_doubleword;
   16441 
   16442    { UInt rN   = (insn >> 16) & 0xF; /* 19:16 */
   16443      UInt rD   = (insn >> 12) & 0xF; /* 15:12 */
   16444      UInt rM   = (insn >> 0)  & 0xF; /*  3:0  */
   16445      UInt bU   = (insn >> 23) & 1;   /* 23 U=1 offset+, U=0 offset- */
   16446      UInt bS   = (insn >> 5) & 1;    /* S=1 store, S=0 load */
   16447      UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
   16448 
   16449      /* Require rD to be an even numbered register */
   16450      if ((rD & 1) != 0)
   16451         goto after_load_store_doubleword;
   16452 
   16453      /* Require 11:8 == 0 for Rn +/- Rm cases */
   16454      if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
   16455         goto after_load_store_doubleword;
   16456 
   16457      /* Skip some invalid cases, which would lead to two competing
   16458         updates to the same register, or which are otherwise
   16459         disallowed by the spec. */
   16460      switch (summary) {
   16461         case 1 | 16:
   16462            break;
   16463         case 1 | 32:
   16464            if (rM == 15) goto after_load_store_doubleword;
   16465            break;
   16466         case 2 | 16: case 3 | 16:
   16467            if (rN == 15) goto after_load_store_doubleword;
   16468            if (bS == 0 && (rN == rD || rN == rD+1))
   16469               goto after_load_store_doubleword;
   16470            break;
   16471         case 2 | 32: case 3 | 32:
   16472            if (rM == 15) goto after_load_store_doubleword;
   16473            if (rN == 15) goto after_load_store_doubleword;
   16474            if (rN == rM) goto after_load_store_doubleword;
   16475            if (bS == 0 && (rN == rD || rN == rD+1))
   16476               goto after_load_store_doubleword;
   16477            break;
   16478         default:
   16479            vassert(0);
   16480      }
   16481 
   16482      /* If this is a branch, make it unconditional at this point.
   16483         Doing conditional branches in-line is too complex (for
   16484         now). */
   16485      vassert((rD & 1) == 0); /* from tests above */
   16486      if (bS == 0 && rD+1 == 15 && condT != IRTemp_INVALID) {
   16487         // go uncond
   16488         mk_skip_over_A32_if_cond_is_false( condT );
   16489         condT = IRTemp_INVALID;
   16490         // now uncond
   16491      }
   16492 
   16493      /* compute the effective address.  Bind it to a tmp since we
   16494         may need to use it twice. */
   16495      IRExpr* eaE = NULL;
   16496      switch (summary & 0xF0) {
   16497         case 16:
   16498            eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
   16499            break;
   16500         case 32:
   16501            eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
   16502            break;
   16503      }
   16504      vassert(eaE);
   16505      IRTemp eaT = newTemp(Ity_I32);
   16506      assign(eaT, eaE);
   16507 
   16508      /* get the old Rn value */
   16509      IRTemp rnT = newTemp(Ity_I32);
   16510      assign(rnT, getIRegA(rN));
   16511 
   16512      /* decide on the transfer address */
   16513      IRTemp taT = IRTemp_INVALID;
   16514      switch (summary & 0x0F) {
   16515         case 1: case 2: taT = eaT; break;
   16516         case 3:         taT = rnT; break;
   16517      }
   16518      vassert(taT != IRTemp_INVALID);
   16519 
   16520      /* XXX deal with alignment constraints */
   16521      /* XXX: but the A8 doesn't seem to trap for misaligned loads, so,
   16522         ignore alignment issues for the time being. */
   16523 
   16524      /* For almost all cases, we do the writeback after the transfers.
   16525         However, that leaves the stack "uncovered" in this case:
   16526            strd    rD, [sp, #-8]
   16527         In which case, do the writeback to SP now, instead of later.
   16528         This is bad in that it makes the insn non-restartable if the
   16529         accesses fault, but at least keeps Memcheck happy. */
   16530      Bool writeback_already_done = False;
   16531      if (bS == 1 /*store*/ && summary == (2 | 16)
   16532          && rN == 13 && rN != rD && rN != rD+1
   16533          && bU == 0/*minus*/) {
   16534         putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
   16535         writeback_already_done = True;
   16536      }
   16537 
   16538      /* doubleword store  S 1
   16539         doubleword load   S 0
   16540      */
   16541      const HChar* name = NULL;
   16542      /* generate the transfers */
   16543      if (bS == 1) { // doubleword store
   16544         storeGuardedLE( binop(Iop_Add32, mkexpr(taT), mkU32(0)),
   16545                         getIRegA(rD+0), condT );
   16546         storeGuardedLE( binop(Iop_Add32, mkexpr(taT), mkU32(4)),
   16547                         getIRegA(rD+1), condT );
   16548         name = "strd";
   16549      } else { // doubleword load
   16550         IRTemp oldRd0 = newTemp(Ity_I32);
   16551         IRTemp oldRd1 = newTemp(Ity_I32);
   16552         assign(oldRd0, llGetIReg(rD+0));
   16553         assign(oldRd1, llGetIReg(rD+1));
   16554         IRTemp newRd0 = newTemp(Ity_I32);
   16555         IRTemp newRd1 = newTemp(Ity_I32);
   16556         loadGuardedLE( newRd0, ILGop_Ident32,
   16557                        binop(Iop_Add32, mkexpr(taT), mkU32(0)),
   16558                        mkexpr(oldRd0), condT );
   16559         putIRegA( rD+0, mkexpr(newRd0), IRTemp_INVALID, Ijk_Boring );
   16560         loadGuardedLE( newRd1, ILGop_Ident32,
   16561                        binop(Iop_Add32, mkexpr(taT), mkU32(4)),
   16562                        mkexpr(oldRd1), condT );
   16563         putIRegA( rD+1, mkexpr(newRd1), IRTemp_INVALID, Ijk_Boring );
   16564         name = "ldrd";
   16565      }
   16566 
   16567      /* Update Rn if necessary. */
   16568      switch (summary & 0x0F) {
   16569         case 2: case 3:
   16570            // should be assured by logic above:
   16571            vassert(rN != 15); /* from checks above */
   16572            if (bS == 0) {
   16573               vassert(rD+0 != rN); /* since we just wrote rD+0 */
   16574               vassert(rD+1 != rN); /* since we just wrote rD+1 */
   16575            }
   16576            if (!writeback_already_done)
   16577               putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
   16578            break;
   16579      }
   16580 
   16581      switch (summary & 0x0F) {
   16582         case 1:  DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
   16583                  break;
   16584         case 2:  DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
   16585                      name, nCC(INSN_COND), rD, dis_buf);
   16586                  break;
   16587         case 3:  DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
   16588                      name, nCC(INSN_COND), rD, dis_buf);
   16589                  break;
   16590         default: vassert(0);
   16591      }
   16592 
   16593      goto decode_success;
   16594    }
   16595 
   16596   after_load_store_doubleword:
   16597 
   16598    /* ------------------- {s,u}xtab ------------- */
   16599    if (BITS8(0,1,1,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   16600        && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
   16601        && BITS4(0,1,1,1) == INSN(7,4)) {
   16602       UInt rN  = INSN(19,16);
   16603       UInt rD  = INSN(15,12);
   16604       UInt rM  = INSN(3,0);
   16605       UInt rot = (insn >> 10) & 3;
   16606       UInt isU = INSN(22,22);
   16607       if (rN == 15/*it's {S,U}XTB*/ || rD == 15 || rM == 15) {
   16608          /* undecodable; fall through */
   16609       } else {
   16610          IRTemp srcL = newTemp(Ity_I32);
   16611          IRTemp srcR = newTemp(Ity_I32);
   16612          IRTemp res  = newTemp(Ity_I32);
   16613          assign(srcR, getIRegA(rM));
   16614          assign(srcL, getIRegA(rN));
   16615          assign(res,  binop(Iop_Add32,
   16616                             mkexpr(srcL),
   16617                             unop(isU ? Iop_8Uto32 : Iop_8Sto32,
   16618                                  unop(Iop_32to8,
   16619                                       genROR32(srcR, 8 * rot)))));
   16620          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   16621          DIP("%cxtab%s r%u, r%u, r%u, ror #%u\n",
   16622              isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
   16623          goto decode_success;
   16624       }
   16625       /* fall through */
   16626    }
   16627 
   16628    /* ------------------- {s,u}xtah ------------- */
   16629    if (BITS8(0,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   16630        && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
   16631        && BITS4(0,1,1,1) == INSN(7,4)) {
   16632       UInt rN  = INSN(19,16);
   16633       UInt rD  = INSN(15,12);
   16634       UInt rM  = INSN(3,0);
   16635       UInt rot = (insn >> 10) & 3;
   16636       UInt isU = INSN(22,22);
   16637       if (rN == 15/*it's {S,U}XTH*/ || rD == 15 || rM == 15) {
   16638          /* undecodable; fall through */
   16639       } else {
   16640          IRTemp srcL = newTemp(Ity_I32);
   16641          IRTemp srcR = newTemp(Ity_I32);
   16642          IRTemp res  = newTemp(Ity_I32);
   16643          assign(srcR, getIRegA(rM));
   16644          assign(srcL, getIRegA(rN));
   16645          assign(res,  binop(Iop_Add32,
   16646                             mkexpr(srcL),
   16647                             unop(isU ? Iop_16Uto32 : Iop_16Sto32,
   16648                                  unop(Iop_32to16,
   16649                                       genROR32(srcR, 8 * rot)))));
   16650          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   16651 
   16652          DIP("%cxtah%s r%u, r%u, r%u, ror #%u\n",
   16653              isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
   16654          goto decode_success;
   16655       }
   16656       /* fall through */
   16657    }
   16658 
   16659    /* ------------------- rev16, rev ------------------ */
   16660    if (INSN(27,16) == 0x6BF
   16661        && (INSN(11,4) == 0xFB/*rev16*/ || INSN(11,4) == 0xF3/*rev*/)) {
   16662       Bool isREV = INSN(11,4) == 0xF3;
   16663       UInt rM    = INSN(3,0);
   16664       UInt rD    = INSN(15,12);
   16665       if (rM != 15 && rD != 15) {
   16666          IRTemp rMt = newTemp(Ity_I32);
   16667          assign(rMt, getIRegA(rM));
   16668          IRTemp res = isREV ? gen_REV(rMt) : gen_REV16(rMt);
   16669          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   16670          DIP("rev%s%s r%u, r%u\n", isREV ? "" : "16",
   16671              nCC(INSN_COND), rD, rM);
   16672          goto decode_success;
   16673       }
   16674    }
   16675 
   16676    /* ------------------- revsh ----------------------- */
   16677    if (INSN(27,16) == 0x6FF && INSN(11,4) == 0xFB) {
   16678       UInt rM = INSN(3,0);
   16679       UInt rD = INSN(15,12);
   16680       if (rM != 15 && rD != 15) {
   16681          IRTemp irt_rM  = newTemp(Ity_I32);
   16682          IRTemp irt_hi  = newTemp(Ity_I32);
   16683          IRTemp irt_low = newTemp(Ity_I32);
   16684          IRTemp irt_res = newTemp(Ity_I32);
   16685          assign(irt_rM, getIRegA(rM));
   16686          assign(irt_hi,
   16687                 binop(Iop_Sar32,
   16688                       binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
   16689                       mkU8(16)
   16690                 )
   16691          );
   16692          assign(irt_low,
   16693                 binop(Iop_And32,
   16694                       binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
   16695                       mkU32(0xFF)
   16696                 )
   16697          );
   16698          assign(irt_res,
   16699                 binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
   16700          );
   16701          putIRegA(rD, mkexpr(irt_res), condT, Ijk_Boring);
   16702          DIP("revsh%s r%u, r%u\n", nCC(INSN_COND), rD, rM);
   16703          goto decode_success;
   16704       }
   16705    }
   16706 
   16707    /* ------------------- rbit ------------------ */
   16708    if (INSN(27,16) == 0x6FF && INSN(11,4) == 0xF3) {
   16709       UInt rD = INSN(15,12);
   16710       UInt rM = INSN(3,0);
   16711       if (rD != 15 && rM != 15) {
   16712          IRTemp arg = newTemp(Ity_I32);
   16713          assign(arg, getIRegA(rM));
   16714          IRTemp res = gen_BITREV(arg);
   16715          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   16716          DIP("rbit r%u, r%u\n", rD, rM);
   16717          goto decode_success;
   16718       }
   16719    }
   16720 
   16721    /* ------------------- smmul ------------------ */
   16722    if (INSN(27,20) == BITS8(0,1,1,1,0,1,0,1)
   16723        && INSN(15,12) == BITS4(1,1,1,1)
   16724        && (INSN(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
   16725       UInt bitR = INSN(5,5);
   16726       UInt rD = INSN(19,16);
   16727       UInt rM = INSN(11,8);
   16728       UInt rN = INSN(3,0);
   16729       if (rD != 15 && rM != 15 && rN != 15) {
   16730          IRExpr* res
   16731          = unop(Iop_64HIto32,
   16732                 binop(Iop_Add64,
   16733                       binop(Iop_MullS32, getIRegA(rN), getIRegA(rM)),
   16734                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
   16735          putIRegA(rD, res, condT, Ijk_Boring);
   16736          DIP("smmul%s%s r%u, r%u, r%u\n",
   16737              nCC(INSN_COND), bitR ? "r" : "", rD, rN, rM);
   16738          goto decode_success;
   16739       }
   16740    }
   16741 
   16742    /* ------------------- smmla ------------------ */
   16743    if (INSN(27,20) == BITS8(0,1,1,1,0,1,0,1)
   16744        && INSN(15,12) != BITS4(1,1,1,1)
   16745        && (INSN(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
   16746       UInt bitR = INSN(5,5);
   16747       UInt rD = INSN(19,16);
   16748       UInt rA = INSN(15,12);
   16749       UInt rM = INSN(11,8);
   16750       UInt rN = INSN(3,0);
   16751       if (rD != 15 && rM != 15 && rN != 15) {
   16752          IRExpr* res
   16753          = unop(Iop_64HIto32,
   16754                 binop(Iop_Add64,
   16755                       binop(Iop_Add64,
   16756                             binop(Iop_32HLto64, getIRegA(rA), mkU32(0)),
   16757                             binop(Iop_MullS32, getIRegA(rN), getIRegA(rM))),
   16758                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
   16759          putIRegA(rD, res, condT, Ijk_Boring);
   16760          DIP("smmla%s%s r%u, r%u, r%u, r%u\n",
   16761              nCC(INSN_COND), bitR ? "r" : "", rD, rN, rM, rA);
   16762          goto decode_success;
   16763       }
   16764    }
   16765 
   16766    /* ------------------- NOP ------------------ */
   16767    if (0x0320F000 == (insn & 0x0FFFFFFF)) {
   16768       DIP("nop%s\n", nCC(INSN_COND));
   16769       goto decode_success;
   16770    }
   16771 
   16772    /* -------------- (A1) LDRT reg+/-#imm12 -------------- */
   16773    /* Load Register Unprivileged:
   16774       ldrt<c> Rt, [Rn] {, #+/-imm12}
   16775    */
   16776    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,0,1,1) ) {
   16777       UInt rT     = INSN(15,12);
   16778       UInt rN     = INSN(19,16);
   16779       UInt imm12  = INSN(11,0);
   16780       UInt bU     = INSN(23,23);
   16781       Bool valid  = True;
   16782       if (rT == 15 || rN == 15 || rN == rT) valid = False;
   16783       if (valid) {
   16784          IRTemp newRt = newTemp(Ity_I32);
   16785          loadGuardedLE( newRt,
   16786                         ILGop_Ident32, getIRegA(rN), getIRegA(rT), condT );
   16787          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   16788          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   16789                              getIRegA(rN), mkU32(imm12));
   16790          putIRegA(rN, erN, condT, Ijk_Boring);
   16791          DIP("ldrt%s r%u, [r%u], #%c%u\n",
   16792              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
   16793          goto decode_success;
   16794       }
   16795    }
   16796 
   16797    /* -------------- (A2) LDRT reg+/-reg with shift -------------- */
   16798    /* Load Register Unprivileged:
   16799       ldrt<c> Rt, [Rn], +/-Rm{, shift}
   16800    */
   16801    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,0,1,1)
   16802         && INSN(4,4) == 0 ) {
   16803       UInt rT     = INSN(15,12);
   16804       UInt rN     = INSN(19,16);
   16805       UInt rM     = INSN(3,0);
   16806       UInt imm5   = INSN(11,7);
   16807       UInt bU     = INSN(23,23);
   16808       UInt type   = INSN(6,5);
   16809       Bool valid  = True;
   16810       if (rT == 15 || rN == 15 || rN == rT || rM == 15
   16811           /* || (ArchVersion() < 6 && rM == rN) */)
   16812          valid = False;
   16813       if (valid) {
   16814          IRTemp newRt = newTemp(Ity_I32);
   16815          loadGuardedLE( newRt,
   16816                         ILGop_Ident32, getIRegA(rN), getIRegA(rT), condT );
   16817          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   16818          // dis_buf generated is slightly bogus, in fact.
   16819          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
   16820                                                        type, imm5, dis_buf);
   16821          putIRegA(rN, erN, condT, Ijk_Boring);
   16822          DIP("ldrt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
   16823          goto decode_success;
   16824       }
   16825    }
   16826 
   16827    /* -------------- (A1) LDRBT reg+/-#imm12 -------------- */
   16828    /* Load Register Byte Unprivileged:
   16829       ldrbt<c> Rt, [Rn], #+/-imm12
   16830    */
   16831    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,1,1,1) ) {
   16832       UInt rT     = INSN(15,12);
   16833       UInt rN     = INSN(19,16);
   16834       UInt imm12  = INSN(11,0);
   16835       UInt bU     = INSN(23,23);
   16836       Bool valid  = True;
   16837       if (rT == 15 || rN == 15 || rN == rT) valid = False;
   16838       if (valid) {
   16839          IRTemp newRt = newTemp(Ity_I32);
   16840          loadGuardedLE( newRt,
   16841                         ILGop_8Uto32, getIRegA(rN), getIRegA(rT), condT );
   16842          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   16843          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   16844                              getIRegA(rN), mkU32(imm12));
   16845          putIRegA(rN, erN, condT, Ijk_Boring);
   16846          DIP("ldrbt%s r%u, [r%u], #%c%u\n",
   16847              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
   16848          goto decode_success;
   16849       }
   16850    }
   16851 
   16852    /* -------------- (A2) LDRBT reg+/-reg with shift -------------- */
   16853    /* Load Register Byte Unprivileged:
   16854       ldrbt<c> Rt, [Rn], +/-Rm{, shift}
   16855    */
   16856    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,1,1,1)
   16857         && INSN(4,4) == 0 ) {
   16858       UInt rT     = INSN(15,12);
   16859       UInt rN     = INSN(19,16);
   16860       UInt rM     = INSN(3,0);
   16861       UInt imm5   = INSN(11,7);
   16862       UInt bU     = INSN(23,23);
   16863       UInt type   = INSN(6,5);
   16864       Bool valid  = True;
   16865       if (rT == 15 || rN == 15 || rN == rT || rM == 15
   16866           /* || (ArchVersion() < 6 && rM == rN) */)
   16867          valid = False;
   16868       if (valid) {
   16869          IRTemp newRt = newTemp(Ity_I32);
   16870          loadGuardedLE( newRt,
   16871                         ILGop_8Uto32, getIRegA(rN), getIRegA(rT), condT );
   16872          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   16873          // dis_buf generated is slightly bogus, in fact.
   16874          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
   16875                                                        type, imm5, dis_buf);
   16876          putIRegA(rN, erN, condT, Ijk_Boring);
   16877          DIP("ldrbt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
   16878          goto decode_success;
   16879       }
   16880    }
   16881 
   16882    /* -------------- (A1) LDRHT reg+#imm8 -------------- */
   16883    /* Load Register Halfword Unprivileged:
   16884       ldrht<c> Rt, [Rn] {, #+/-imm8}
   16885    */
   16886    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
   16887        && INSN(7,4) == BITS4(1,0,1,1) ) {
   16888       UInt rT    = INSN(15,12);
   16889       UInt rN    = INSN(19,16);
   16890       UInt bU    = INSN(23,23);
   16891       UInt imm4H = INSN(11,8);
   16892       UInt imm4L = INSN(3,0);
   16893       UInt imm8  = (imm4H << 4) | imm4L;
   16894       Bool valid = True;
   16895       if (rT == 15 || rN == 15 || rN == rT)
   16896          valid = False;
   16897       if (valid) {
   16898          IRTemp newRt = newTemp(Ity_I32);
   16899          loadGuardedLE( newRt,
   16900                         ILGop_16Uto32, getIRegA(rN), getIRegA(rT), condT );
   16901          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   16902          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   16903                              getIRegA(rN), mkU32(imm8));
   16904          putIRegA(rN, erN, condT, Ijk_Boring);
   16905          DIP("ldrht%s r%u, [r%u], #%c%u\n",
   16906              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
   16907          goto decode_success;
   16908       }
   16909    }
   16910 
   16911    /* -------------- (A2) LDRHT reg+/-reg -------------- */
   16912    /* Load Register Halfword Unprivileged:
   16913       ldrht<c> Rt, [Rn], +/-Rm
   16914    */
   16915    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
   16916        && INSN(11,4) == BITS8(0,0,0,0,1,0,1,1) ) {
   16917       UInt rT    = INSN(15,12);
   16918       UInt rN    = INSN(19,16);
   16919       UInt rM    = INSN(3,0);
   16920       UInt bU    = INSN(23,23);
   16921       Bool valid = True;
   16922       if (rT == 15 || rN == 15 || rN == rT || rM == 15)
   16923          valid = False;
   16924       if (valid) {
   16925          IRTemp newRt = newTemp(Ity_I32);
   16926          loadGuardedLE( newRt,
   16927                         ILGop_16Uto32, getIRegA(rN), getIRegA(rT), condT );
   16928          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   16929          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   16930                              getIRegA(rN), getIRegA(rM));
   16931          putIRegA(rN, erN, condT, Ijk_Boring);
   16932          DIP("ldrht%s r%u, [r%u], %cr%u\n",
   16933              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
   16934          goto decode_success;
   16935       }
   16936    }
   16937 
   16938    /* -------------- (A1) LDRSHT reg+#imm8 -------------- */
   16939    /* Load Register Signed Halfword Unprivileged:
   16940       ldrsht<c> Rt, [Rn] {, #+/-imm8}
   16941    */
   16942    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
   16943        && INSN(7,4) == BITS4(1,1,1,1)) {
   16944       UInt rT    = INSN(15,12);
   16945       UInt rN    = INSN(19,16);
   16946       UInt bU    = INSN(23,23);
   16947       UInt imm4H = INSN(11,8);
   16948       UInt imm4L = INSN(3,0);
   16949       UInt imm8  = (imm4H << 4) | imm4L;
   16950       Bool valid = True;
   16951       if (rN == 15 || rT == 15 || rN == rT)
   16952          valid = False;
   16953       if (valid) {
   16954          IRTemp newRt = newTemp(Ity_I32);
   16955          loadGuardedLE( newRt,
   16956                         ILGop_16Sto32, getIRegA(rN), getIRegA(rT), condT );
   16957          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   16958          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   16959                              getIRegA(rN), mkU32(imm8));
   16960          putIRegA(rN, erN, condT, Ijk_Boring);
   16961          DIP("ldrsht%s r%u, [r%u], #%c%u\n",
   16962              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
   16963          goto decode_success;
   16964       }
   16965    }
   16966 
   16967    /* -------------- (A2) LDRSHT reg+/-reg -------------- */
   16968    /* Load Register Signed Halfword Unprivileged:
   16969       ldrsht<c> Rt, [Rn], +/-Rm
   16970    */
   16971    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
   16972        && INSN(11,4) == BITS8(0,0,0,0,1,1,1,1)) {
   16973       UInt rT    = INSN(15,12);
   16974       UInt rN    = INSN(19,16);
   16975       UInt rM    = INSN(3,0);
   16976       UInt bU    = INSN(23,23);
   16977       Bool valid = True;
   16978       if (rN == 15 || rT == 15 || rN == rT || rM == 15)
   16979          valid = False;
   16980       if (valid) {
   16981          IRTemp newRt = newTemp(Ity_I32);
   16982          loadGuardedLE( newRt,
   16983                         ILGop_16Sto32, getIRegA(rN), getIRegA(rT), condT );
   16984          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   16985          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   16986                              getIRegA(rN), getIRegA(rM));
   16987          putIRegA(rN, erN, condT, Ijk_Boring);
   16988          DIP("ldrsht%s r%u, [r%u], %cr%u\n",
   16989              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
   16990          goto decode_success;
   16991       }
   16992    }
   16993 
   16994    /* -------------- (A1) LDRSBT reg+#imm8 -------------- */
   16995    /* Load Register Signed Byte Unprivileged:
   16996       ldrsbt<c> Rt, [Rn] {, #+/-imm8}
   16997    */
   16998    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
   16999        && INSN(7,4) == BITS4(1,1,0,1)) {
   17000       UInt rT    = INSN(15,12);
   17001       UInt rN    = INSN(19,16);
   17002       UInt bU    = INSN(23,23);
   17003       UInt imm4H = INSN(11,8);
   17004       UInt imm4L = INSN(3,0);
   17005       UInt imm8  = (imm4H << 4) | imm4L;
   17006       Bool valid = True;
   17007       if (rT == 15 || rN == 15 || rN == rT)
   17008          valid = False;
   17009       if (valid) {
   17010          IRTemp newRt = newTemp(Ity_I32);
   17011          loadGuardedLE( newRt,
   17012                         ILGop_8Sto32, getIRegA(rN), getIRegA(rT), condT );
   17013          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   17014          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   17015                              getIRegA(rN), mkU32(imm8));
   17016          putIRegA(rN, erN, condT, Ijk_Boring);
   17017          DIP("ldrsbt%s r%u, [r%u], #%c%u\n",
   17018              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
   17019          goto decode_success;
   17020       }
   17021    }
   17022 
   17023    /* -------------- (A2) LDRSBT reg+/-reg -------------- */
   17024    /* Load Register Signed Byte Unprivileged:
   17025       ldrsbt<c> Rt, [Rn], +/-Rm
   17026    */
   17027    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
   17028        && INSN(11,4) == BITS8(0,0,0,0,1,1,0,1)) {
   17029       UInt rT    = INSN(15,12);
   17030       UInt rN    = INSN(19,16);
   17031       UInt bU    = INSN(23,23);
   17032       UInt rM    = INSN(3,0);
   17033       Bool valid = True;
   17034       if (rT == 15 || rN == 15 || rN == rT || rM == 15)
   17035          valid = False;
   17036       if (valid) {
   17037          IRTemp newRt = newTemp(Ity_I32);
   17038          loadGuardedLE( newRt,
   17039                         ILGop_8Sto32, getIRegA(rN), getIRegA(rT), condT );
   17040          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   17041          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   17042                              getIRegA(rN), getIRegA(rM));
   17043          putIRegA(rN, erN, condT, Ijk_Boring);
   17044          DIP("ldrsbt%s r%u, [r%u], %cr%u\n",
   17045              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
   17046          goto decode_success;
   17047       }
   17048    }
   17049 
   17050    /* -------------- (A1) STRBT reg+#imm12 -------------- */
   17051    /* Store Register Byte Unprivileged:
   17052       strbt<c> Rt, [Rn], #+/-imm12
   17053    */
   17054    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,1,1,0) ) {
   17055       UInt rT     = INSN(15,12);
   17056       UInt rN     = INSN(19,16);
   17057       UInt imm12  = INSN(11,0);
   17058       UInt bU     = INSN(23,23);
   17059       Bool valid = True;
   17060       if (rT == 15 || rN == 15 || rN == rT) valid = False;
   17061       if (valid) {
   17062          IRExpr* address = getIRegA(rN);
   17063          IRExpr* data = unop(Iop_32to8, getIRegA(rT));
   17064          storeGuardedLE( address, data, condT);
   17065          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
   17066                                getIRegA(rN), mkU32(imm12));
   17067          putIRegA(rN, newRn, condT, Ijk_Boring);
   17068          DIP("strbt%s r%u, [r%u], #%c%u\n",
   17069              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
   17070          goto decode_success;
   17071       }
   17072    }
   17073 
   17074    /* -------------- (A2) STRBT reg+/-reg -------------- */
   17075    /* Store Register Byte Unprivileged:
   17076       strbt<c> Rt, [Rn], +/-Rm{, shift}
   17077    */
   17078    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,1,1,0)
   17079        && INSN(4,4) == 0) {
   17080       UInt rT     = INSN(15,12);
   17081       UInt rN     = INSN(19,16);
   17082       UInt imm5   = INSN(11,7);
   17083       UInt type   = INSN(6,5);
   17084       UInt rM     = INSN(3,0);
   17085       UInt bU     = INSN(23,23);
   17086       Bool valid  = True;
   17087       if (rT == 15 || rN == 15 || rN == rT || rM == 15) valid = False;
   17088       if (valid) {
   17089          IRExpr* address = getIRegA(rN);
   17090          IRExpr* data = unop(Iop_32to8, getIRegA(rT));
   17091          storeGuardedLE( address, data, condT);
   17092          // dis_buf generated is slightly bogus, in fact.
   17093          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
   17094                                                        type, imm5, dis_buf);
   17095          putIRegA(rN, erN, condT, Ijk_Boring);
   17096          DIP("strbt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
   17097          goto decode_success;
   17098       }
   17099    }
   17100 
   17101    /* -------------- (A1) STRHT reg+#imm8 -------------- */
   17102    /* Store Register Halfword Unprivileged:
   17103       strht<c> Rt, [Rn], #+/-imm8
   17104    */
   17105    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,0)
   17106        && INSN(7,4) == BITS4(1,0,1,1) ) {
   17107       UInt rT    = INSN(15,12);
   17108       UInt rN    = INSN(19,16);
   17109       UInt imm4H = INSN(11,8);
   17110       UInt imm4L = INSN(3,0);
   17111       UInt imm8  = (imm4H << 4) | imm4L;
   17112       UInt bU    = INSN(23,23);
   17113       Bool valid = True;
   17114       if (rT == 15 || rN == 15 || rN == rT) valid = False;
   17115       if (valid) {
   17116          IRExpr* address = getIRegA(rN);
   17117          IRExpr* data = unop(Iop_32to16, getIRegA(rT));
   17118          storeGuardedLE( address, data, condT);
   17119          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
   17120                                getIRegA(rN), mkU32(imm8));
   17121          putIRegA(rN, newRn, condT, Ijk_Boring);
   17122          DIP("strht%s r%u, [r%u], #%c%u\n",
   17123              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
   17124          goto decode_success;
   17125       }
   17126    }
   17127 
   17128    /* -------------- (A2) STRHT reg+reg -------------- */
   17129    /* Store Register Halfword Unprivileged:
   17130       strht<c> Rt, [Rn], +/-Rm
   17131    */
   17132    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,0)
   17133        && INSN(11,4) == BITS8(0,0,0,0,1,0,1,1) ) {
   17134       UInt rT    = INSN(15,12);
   17135       UInt rN    = INSN(19,16);
   17136       UInt rM    = INSN(3,0);
   17137       UInt bU    = INSN(23,23);
   17138       Bool valid = True;
   17139       if (rT == 15 || rN == 15 || rN == rT || rM == 15) valid = False;
   17140       if (valid) {
   17141          IRExpr* address = getIRegA(rN);
   17142          IRExpr* data = unop(Iop_32to16, getIRegA(rT));
   17143          storeGuardedLE( address, data, condT);
   17144          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
   17145                                getIRegA(rN), getIRegA(rM));
   17146          putIRegA(rN, newRn, condT, Ijk_Boring);
   17147          DIP("strht%s r%u, [r%u], %cr%u\n",
   17148              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
   17149          goto decode_success;
   17150       }
   17151    }
   17152 
   17153    /* -------------- (A1) STRT reg+imm12 -------------- */
   17154    /* Store Register Unprivileged:
   17155       strt<c> Rt, [Rn], #+/-imm12
   17156    */
   17157    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,0,1,0) ) {
   17158       UInt rT    = INSN(15,12);
   17159       UInt rN    = INSN(19,16);
   17160       UInt imm12 = INSN(11,0);
   17161       UInt bU    = INSN(23,23);
   17162       Bool valid = True;
   17163       if (rN == 15 || rN == rT) valid = False;
   17164       if (valid) {
   17165          IRExpr* address = getIRegA(rN);
   17166          storeGuardedLE( address, getIRegA(rT), condT);
   17167          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
   17168                                getIRegA(rN), mkU32(imm12));
   17169          putIRegA(rN, newRn, condT, Ijk_Boring);
   17170          DIP("strt%s r%u, [r%u], %c%u\n",
   17171              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
   17172          goto decode_success;
   17173       }
   17174    }
   17175 
   17176    /* -------------- (A2) STRT reg+reg -------------- */
   17177    /* Store Register Unprivileged:
   17178       strt<c> Rt, [Rn], +/-Rm{, shift}
   17179    */
   17180    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,0,1,0)
   17181        && INSN(4,4) == 0 ) {
   17182       UInt rT    = INSN(15,12);
   17183       UInt rN    = INSN(19,16);
   17184       UInt rM    = INSN(3,0);
   17185       UInt type  = INSN(6,5);
   17186       UInt imm5  = INSN(11,7);
   17187       UInt bU    = INSN(23,23);
   17188       Bool valid = True;
   17189       if (rN == 15 || rN == rT || rM == 15) valid = False;
   17190       /* FIXME We didn't do:
   17191          if ArchVersion() < 6 && rM == rN then UNPREDICTABLE */
   17192       if (valid) {
   17193          storeGuardedLE( getIRegA(rN), getIRegA(rT), condT);
   17194          // dis_buf generated is slightly bogus, in fact.
   17195          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
   17196                                                        type, imm5, dis_buf);
   17197          putIRegA(rN, erN, condT, Ijk_Boring);
   17198          DIP("strt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
   17199          goto decode_success;
   17200       }
   17201    }
   17202 
   17203    /* ----------------------------------------------------------- */
   17204    /* -- ARMv7 instructions                                    -- */
   17205    /* ----------------------------------------------------------- */
   17206 
   17207    /* -------------- read CP15 TPIDRURO register ------------- */
   17208    /* mrc     p15, 0, r0, c13, c0, 3  up to
   17209       mrc     p15, 0, r14, c13, c0, 3
   17210    */
   17211    /* I don't know whether this is really v7-only.  But anyway, we
   17212       have to support it since arm-linux uses TPIDRURO as a thread
   17213       state register. */
   17214    if (0x0E1D0F70 == (insn & 0x0FFF0FFF)) {
   17215       UInt rD = INSN(15,12);
   17216       if (rD <= 14) {
   17217          /* skip r15, that's too stupid to handle */
   17218          putIRegA(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32),
   17219                       condT, Ijk_Boring);
   17220          DIP("mrc%s p15,0, r%u, c13, c0, 3\n", nCC(INSN_COND), rD);
   17221          goto decode_success;
   17222       }
   17223       /* fall through */
   17224    }
   17225 
   17226    /* Handle various kinds of barriers.  This is rather indiscriminate
   17227       in the sense that they are all turned into an IR Fence, which
   17228       means we don't know which they are, so the back end has to
   17229       re-emit them all when it comes acrosss an IR Fence.
   17230    */
   17231    /* v6 */ /* mcr 15, 0, rT, c7, c10, 5 */
   17232    if (0xEE070FBA == (insn & 0xFFFF0FFF)) {
   17233       UInt rT = INSN(15,12);
   17234       if (rT <= 14) {
   17235          /* mcr 15, 0, rT, c7, c10, 5 (v6) equiv to DMB (v7).  Data
   17236             Memory Barrier -- ensures ordering of memory accesses. */
   17237          stmt( IRStmt_MBE(Imbe_Fence) );
   17238          DIP("mcr 15, 0, r%u, c7, c10, 5 (data memory barrier)\n", rT);
   17239          goto decode_success;
   17240       }
   17241       /* fall through */
   17242    }
   17243    /* other flavours of barrier */
   17244    switch (insn) {
   17245       case 0xEE070F9A: /* v6 */
   17246          /* mcr 15, 0, r0, c7, c10, 4 (v6) equiv to DSB (v7).  Data
   17247             Synch Barrier -- ensures completion of memory accesses. */
   17248          stmt( IRStmt_MBE(Imbe_Fence) );
   17249          DIP("mcr 15, 0, r0, c7, c10, 4 (data synch barrier)\n");
   17250          goto decode_success;
   17251       case 0xEE070F95: /* v6 */
   17252          /* mcr 15, 0, r0, c7, c5, 4 (v6) equiv to ISB (v7).
   17253             Instruction Synchronisation Barrier (or Flush Prefetch
   17254             Buffer) -- a pipe flush, I think.  I suspect we could
   17255             ignore those, but to be on the safe side emit a fence
   17256             anyway. */
   17257          stmt( IRStmt_MBE(Imbe_Fence) );
   17258          DIP("mcr 15, 0, r0, c7, c5, 4 (insn synch barrier)\n");
   17259          goto decode_success;
   17260       default:
   17261          break;
   17262    }
   17263 
   17264    /* ----------------------------------------------------------- */
   17265    /* -- VFP (CP 10, CP 11) instructions (in ARM mode)         -- */
   17266    /* ----------------------------------------------------------- */
   17267 
   17268    if (INSN_COND != ARMCondNV) {
   17269       Bool ok_vfp = decode_CP10_CP11_instruction (
   17270                        &dres, INSN(27,0), condT, INSN_COND,
   17271                        False/*!isT*/
   17272                     );
   17273       if (ok_vfp)
   17274          goto decode_success;
   17275    }
   17276 
   17277    /* ----------------------------------------------------------- */
   17278    /* -- NEON instructions (in ARM mode)                       -- */
   17279    /* ----------------------------------------------------------- */
   17280 
   17281    /* These are all in NV space, and so are taken care of (far) above,
   17282       by a call from this function to decode_NV_instruction(). */
   17283 
   17284    /* ----------------------------------------------------------- */
   17285    /* -- v6 media instructions (in ARM mode)                   -- */
   17286    /* ----------------------------------------------------------- */
   17287 
   17288    { Bool ok_v6m = decode_V6MEDIA_instruction(
   17289                        &dres, INSN(27,0), condT, INSN_COND,
   17290                        False/*!isT*/
   17291                    );
   17292      if (ok_v6m)
   17293         goto decode_success;
   17294    }
   17295 
   17296    /* ----------------------------------------------------------- */
   17297    /* -- Undecodable                                           -- */
   17298    /* ----------------------------------------------------------- */
   17299 
   17300    goto decode_failure;
   17301    /*NOTREACHED*/
   17302 
   17303   decode_failure:
   17304    /* All decode failures end up here. */
   17305    if (sigill_diag) {
   17306       vex_printf("disInstr(arm): unhandled instruction: "
   17307                  "0x%x\n", insn);
   17308       vex_printf("                 cond=%d(0x%x) 27:20=%u(0x%02x) "
   17309                                    "4:4=%d "
   17310                                    "3:0=%u(0x%x)\n",
   17311                  (Int)INSN_COND, (UInt)INSN_COND,
   17312                  (Int)INSN(27,20), (UInt)INSN(27,20),
   17313                  (Int)INSN(4,4),
   17314                  (Int)INSN(3,0), (UInt)INSN(3,0) );
   17315    }
   17316 
   17317    /* Tell the dispatcher that this insn cannot be decoded, and so has
   17318       not been executed, and (is currently) the next to be executed.
   17319       R15 should be up-to-date since it made so at the start of each
   17320       insn, but nevertheless be paranoid and update it again right
   17321       now. */
   17322    vassert(0 == (guest_R15_curr_instr_notENC & 3));
   17323    llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC) );
   17324    dres.len         = 0;
   17325    dres.whatNext    = Dis_StopHere;
   17326    dres.jk_StopHere = Ijk_NoDecode;
   17327    dres.continueAt  = 0;
   17328    return dres;
   17329 
   17330   decode_success:
   17331    /* All decode successes end up here. */
   17332    DIP("\n");
   17333 
   17334    vassert(dres.len == 4 || dres.len == 20);
   17335 
   17336    /* Now then.  Do we have an implicit jump to r15 to deal with? */
   17337    if (r15written) {
   17338       /* If we get jump to deal with, we assume that there's been no
   17339          other competing branch stuff previously generated for this
   17340          insn.  That's reasonable, in the sense that the ARM insn set
   17341          appears to declare as "Unpredictable" any instruction which
   17342          generates more than one possible new value for r15.  Hence
   17343          just assert.  The decoders themselves should check against
   17344          all such instructions which are thusly Unpredictable, and
   17345          decline to decode them.  Hence we should never get here if we
   17346          have competing new values for r15, and hence it is safe to
   17347          assert here. */
   17348       vassert(dres.whatNext == Dis_Continue);
   17349       vassert(irsb->next == NULL);
   17350       vassert(irsb->jumpkind == Ijk_Boring);
   17351       /* If r15 is unconditionally written, terminate the block by
   17352          jumping to it.  If it's conditionally written, still
   17353          terminate the block (a shame, but we can't do side exits to
   17354          arbitrary destinations), but first jump to the next
   17355          instruction if the condition doesn't hold. */
   17356       /* We can't use getIReg(15) to get the destination, since that
   17357          will produce r15+8, which isn't what we want.  Must use
   17358          llGetIReg(15) instead. */
   17359       if (r15guard == IRTemp_INVALID) {
   17360          /* unconditional */
   17361       } else {
   17362          /* conditional */
   17363          stmt( IRStmt_Exit(
   17364                   unop(Iop_32to1,
   17365                        binop(Iop_Xor32,
   17366                              mkexpr(r15guard), mkU32(1))),
   17367                   r15kind,
   17368                   IRConst_U32(guest_R15_curr_instr_notENC + 4),
   17369                   OFFB_R15T
   17370          ));
   17371       }
   17372       /* This seems crazy, but we're required to finish the insn with
   17373          a write to the guest PC.  As usual we rely on ir_opt to tidy
   17374          up later. */
   17375       llPutIReg(15, llGetIReg(15));
   17376       dres.whatNext    = Dis_StopHere;
   17377       dres.jk_StopHere = r15kind;
   17378    } else {
   17379       /* Set up the end-state in the normal way. */
   17380       switch (dres.whatNext) {
   17381          case Dis_Continue:
   17382             llPutIReg(15, mkU32(dres.len + guest_R15_curr_instr_notENC));
   17383             break;
   17384          case Dis_ResteerU:
   17385          case Dis_ResteerC:
   17386             llPutIReg(15, mkU32(dres.continueAt));
   17387             break;
   17388          case Dis_StopHere:
   17389             break;
   17390          default:
   17391             vassert(0);
   17392       }
   17393    }
   17394 
   17395    return dres;
   17396 
   17397 #  undef INSN_COND
   17398 #  undef INSN
   17399 }
   17400 
   17401 
   17402 /*------------------------------------------------------------*/
   17403 /*--- Disassemble a single Thumb2 instruction              ---*/
   17404 /*------------------------------------------------------------*/
   17405 
   17406 static const UChar it_length_table[256]; /* fwds */
   17407 
   17408 /* NB: in Thumb mode we do fetches of regs with getIRegT, which
   17409    automagically adds 4 to fetches of r15.  However, writes to regs
   17410    are done with putIRegT, which disallows writes to r15.  Hence any
   17411    r15 writes and associated jumps have to be done "by hand". */
   17412 
   17413 /* Disassemble a single Thumb instruction into IR.  The instruction is
   17414    located in host memory at guest_instr, and has (decoded) guest IP
   17415    of guest_R15_curr_instr_notENC, which will have been set before the
   17416    call here. */
   17417 
   17418 static
   17419 DisResult disInstr_THUMB_WRK (
   17420              Bool         (*resteerOkFn) ( /*opaque*/void*, Addr ),
   17421              Bool         resteerCisOk,
   17422              void*        callback_opaque,
   17423              const UChar* guest_instr,
   17424              const VexArchInfo* archinfo,
   17425              const VexAbiInfo*  abiinfo,
   17426              Bool         sigill_diag
   17427           )
   17428 {
   17429    /* A macro to fish bits out of insn0.  There's also INSN1, to fish
   17430       bits out of insn1, but that's defined only after the end of the
   17431       16-bit insn decoder, so as to stop it mistakenly being used
   17432       therein. */
   17433 #  define INSN0(_bMax,_bMin)  SLICE_UInt(((UInt)insn0), (_bMax), (_bMin))
   17434 
   17435    DisResult dres;
   17436    UShort    insn0; /*  first 16 bits of the insn */
   17437    UShort    insn1; /* second 16 bits of the insn */
   17438    //Bool      allow_VFP = False;
   17439    //UInt      hwcaps = archinfo->hwcaps;
   17440    HChar     dis_buf[128];  // big enough to hold LDMIA etc text
   17441 
   17442    /* Summary result of the ITxxx backwards analysis: False == safe
   17443       but suboptimal. */
   17444    Bool guaranteedUnconditional = False;
   17445 
   17446    /* What insn variants are we supporting today? */
   17447    //allow_VFP  = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
   17448    // etc etc
   17449 
   17450    /* Set result defaults. */
   17451    dres.whatNext    = Dis_Continue;
   17452    dres.len         = 2;
   17453    dres.continueAt  = 0;
   17454    dres.jk_StopHere = Ijk_INVALID;
   17455 
   17456    /* Set default actions for post-insn handling of writes to r15, if
   17457       required. */
   17458    r15written = False;
   17459    r15guard   = IRTemp_INVALID; /* unconditional */
   17460    r15kind    = Ijk_Boring;
   17461 
   17462    /* Insns could be 2 or 4 bytes long.  Just get the first 16 bits at
   17463       this point.  If we need the second 16, get them later.  We can't
   17464       get them both out immediately because it risks a fault (very
   17465       unlikely, but ..) if the second 16 bits aren't actually
   17466       necessary. */
   17467    insn0 = getUShortLittleEndianly( guest_instr );
   17468    insn1 = 0; /* We'll get it later, once we know we need it. */
   17469 
   17470    /* Similarly, will set this later. */
   17471    IRTemp old_itstate = IRTemp_INVALID;
   17472 
   17473    if (0) vex_printf("insn: 0x%x\n", insn0);
   17474 
   17475    DIP("\t(thumb) 0x%x:  ", (UInt)guest_R15_curr_instr_notENC);
   17476 
   17477    vassert(0 == (guest_R15_curr_instr_notENC & 1));
   17478 
   17479    /* ----------------------------------------------------------- */
   17480    /* Spot "Special" instructions (see comment at top of file). */
   17481    {
   17482       const UChar* code = guest_instr;
   17483       /* Spot the 16-byte preamble:
   17484 
   17485          ea4f 0cfc  mov.w   ip, ip, ror #3
   17486          ea4f 3c7c  mov.w   ip, ip, ror #13
   17487          ea4f 7c7c  mov.w   ip, ip, ror #29
   17488          ea4f 4cfc  mov.w   ip, ip, ror #19
   17489       */
   17490       UInt word1 = 0x0CFCEA4F;
   17491       UInt word2 = 0x3C7CEA4F;
   17492       UInt word3 = 0x7C7CEA4F;
   17493       UInt word4 = 0x4CFCEA4F;
   17494       if (getUIntLittleEndianly(code+ 0) == word1 &&
   17495           getUIntLittleEndianly(code+ 4) == word2 &&
   17496           getUIntLittleEndianly(code+ 8) == word3 &&
   17497           getUIntLittleEndianly(code+12) == word4) {
   17498          /* Got a "Special" instruction preamble.  Which one is it? */
   17499          // 0x 0A 0A EA 4A
   17500          if (getUIntLittleEndianly(code+16) == 0x0A0AEA4A
   17501                                                /* orr.w r10,r10,r10 */) {
   17502             /* R3 = client_request ( R4 ) */
   17503             DIP("r3 = client_request ( %%r4 )\n");
   17504             llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
   17505             dres.jk_StopHere = Ijk_ClientReq;
   17506             dres.whatNext    = Dis_StopHere;
   17507             goto decode_success;
   17508          }
   17509          else
   17510          // 0x 0B 0B EA 4B
   17511          if (getUIntLittleEndianly(code+16) == 0x0B0BEA4B
   17512                                                /* orr r11,r11,r11 */) {
   17513             /* R3 = guest_NRADDR */
   17514             DIP("r3 = guest_NRADDR\n");
   17515             dres.len = 20;
   17516             llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
   17517             goto decode_success;
   17518          }
   17519          else
   17520          // 0x 0C 0C EA 4C
   17521          if (getUIntLittleEndianly(code+16) == 0x0C0CEA4C
   17522                                                /* orr r12,r12,r12 */) {
   17523             /*  branch-and-link-to-noredir R4 */
   17524             DIP("branch-and-link-to-noredir r4\n");
   17525             llPutIReg(14, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
   17526             llPutIReg(15, getIRegT(4));
   17527             dres.jk_StopHere = Ijk_NoRedir;
   17528             dres.whatNext    = Dis_StopHere;
   17529             goto decode_success;
   17530          }
   17531          else
   17532          // 0x 09 09 EA 49
   17533          if (getUIntLittleEndianly(code+16) == 0x0909EA49
   17534                                                /* orr r9,r9,r9 */) {
   17535             /* IR injection */
   17536             DIP("IR injection\n");
   17537             vex_inject_ir(irsb, Iend_LE);
   17538             // Invalidate the current insn. The reason is that the IRop we're
   17539             // injecting here can change. In which case the translation has to
   17540             // be redone. For ease of handling, we simply invalidate all the
   17541             // time.
   17542             stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_R15_curr_instr_notENC)));
   17543             stmt(IRStmt_Put(OFFB_CMLEN,   mkU32(20)));
   17544             llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
   17545             dres.whatNext    = Dis_StopHere;
   17546             dres.jk_StopHere = Ijk_InvalICache;
   17547             goto decode_success;
   17548          }
   17549          /* We don't know what it is.  Set insn0 so decode_failure
   17550             can print the insn following the Special-insn preamble. */
   17551          insn0 = getUShortLittleEndianly(code+16);
   17552          goto decode_failure;
   17553          /*NOTREACHED*/
   17554       }
   17555 
   17556    }
   17557 
   17558    /* ----------------------------------------------------------- */
   17559 
   17560    /* Main Thumb instruction decoder starts here.  It's a series of
   17561       switches which examine ever longer bit sequences at the MSB of
   17562       the instruction word, first for 16-bit insns, then for 32-bit
   17563       insns. */
   17564 
   17565    /* --- BEGIN ITxxx optimisation analysis --- */
   17566    /* This is a crucial optimisation for the ITState boilerplate that
   17567       follows.  Examine the 9 halfwords preceding this instruction,
   17568       and if we are absolutely sure that none of them constitute an
   17569       'it' instruction, then we can be sure that this instruction is
   17570       not under the control of any 'it' instruction, and so
   17571       guest_ITSTATE must be zero.  So write zero into ITSTATE right
   17572       now, so that iropt can fold out almost all of the resulting
   17573       junk.
   17574 
   17575       If we aren't sure, we can always safely skip this step.  So be a
   17576       bit conservative about it: only poke around in the same page as
   17577       this instruction, lest we get a fault from the previous page
   17578       that would not otherwise have happened.  The saving grace is
   17579       that such skipping is pretty rare -- it only happens,
   17580       statistically, 18/4096ths of the time, so is judged unlikely to
   17581       be a performance problems.
   17582 
   17583       FIXME: do better.  Take into account the number of insns covered
   17584       by any IT insns we find, to rule out cases where an IT clearly
   17585       cannot cover this instruction.  This would improve behaviour for
   17586       branch targets immediately following an IT-guarded group that is
   17587       not of full length.  Eg, (and completely ignoring issues of 16-
   17588       vs 32-bit insn length):
   17589 
   17590              ite cond
   17591              insn1
   17592              insn2
   17593       label: insn3
   17594              insn4
   17595 
   17596       The 'it' only conditionalises insn1 and insn2.  However, the
   17597       current analysis is conservative and considers insn3 and insn4
   17598       also possibly guarded.  Hence if 'label:' is the start of a hot
   17599       loop we will get a big performance hit.
   17600    */
   17601    {
   17602       /* Summary result of this analysis: False == safe but
   17603          suboptimal. */
   17604       vassert(guaranteedUnconditional == False);
   17605 
   17606       UInt pc = guest_R15_curr_instr_notENC;
   17607       vassert(0 == (pc & 1));
   17608 
   17609       UInt pageoff = pc & 0xFFF;
   17610       if (pageoff >= 18) {
   17611          /* It's safe to poke about in the 9 halfwords preceding this
   17612             insn.  So, have a look at them. */
   17613          guaranteedUnconditional = True; /* assume no 'it' insn found,
   17614                                             till we do */
   17615          UShort* hwp = (UShort*)(HWord)pc;
   17616          Int i;
   17617          for (i = -1; i >= -9; i--) {
   17618             /* We're in the same page.  (True, but commented out due
   17619                to expense.) */
   17620             /*
   17621             vassert( ( ((UInt)(&hwp[i])) & 0xFFFFF000 )
   17622                       == ( pc & 0xFFFFF000 ) );
   17623             */
   17624             /* All valid IT instructions must have the form 0xBFxy,
   17625                where x can be anything, but y must be nonzero.  Find
   17626                the number of insns covered by it (1 .. 4) and check to
   17627                see if it can possibly reach up to the instruction in
   17628                question.  Some (x,y) combinations mean UNPREDICTABLE,
   17629                and the table is constructed to be conservative by
   17630                returning 4 for those cases, so the analysis is safe
   17631                even if the code uses unpredictable IT instructions (in
   17632                which case its authors are nuts, but hey.)  */
   17633             UShort hwp_i = hwp[i];
   17634             if (UNLIKELY((hwp_i & 0xFF00) == 0xBF00 && (hwp_i & 0xF) != 0)) {
   17635                /* might be an 'it' insn. */
   17636                /* # guarded insns */
   17637                Int n_guarded = (Int)it_length_table[hwp_i & 0xFF];
   17638                vassert(n_guarded >= 1 && n_guarded <= 4);
   17639                if (n_guarded * 2 /* # guarded HWs, worst case */
   17640                    > (-(i+1)))   /* -(i+1): # remaining HWs after the IT */
   17641                    /* -(i+0) also seems to work, even though I think
   17642                       it's wrong.  I don't understand that. */
   17643                   guaranteedUnconditional = False;
   17644                break;
   17645             }
   17646          }
   17647       }
   17648    }
   17649    /* --- END ITxxx optimisation analysis --- */
   17650 
   17651    /* Generate the guarding condition for this insn, by examining
   17652       ITSTATE.  Assign it to condT.  Also, generate new
   17653       values for ITSTATE ready for stuffing back into the
   17654       guest state, but don't actually do the Put yet, since it will
   17655       need to stuffed back in only after the instruction gets to a
   17656       point where it is sure to complete.  Mostly we let the code at
   17657       decode_success handle this, but in cases where the insn contains
   17658       a side exit, we have to update them before the exit. */
   17659 
   17660    /* If the ITxxx optimisation analysis above could not prove that
   17661       this instruction is guaranteed unconditional, we insert a
   17662       lengthy IR preamble to compute the guarding condition at
   17663       runtime.  If it can prove it (which obviously we hope is the
   17664       normal case) then we insert a minimal preamble, which is
   17665       equivalent to setting guest_ITSTATE to zero and then folding
   17666       that through the full preamble (which completely disappears). */
   17667 
   17668    IRTemp condT              = IRTemp_INVALID;
   17669    IRTemp cond_AND_notInIT_T = IRTemp_INVALID;
   17670 
   17671    IRTemp new_itstate        = IRTemp_INVALID;
   17672    vassert(old_itstate == IRTemp_INVALID);
   17673 
   17674    if (guaranteedUnconditional) {
   17675       /* BEGIN "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
   17676 
   17677       // ITSTATE = 0 :: I32
   17678       IRTemp z32 = newTemp(Ity_I32);
   17679       assign(z32, mkU32(0));
   17680       put_ITSTATE(z32);
   17681 
   17682       // old_itstate = 0 :: I32
   17683       //
   17684       // old_itstate = get_ITSTATE();
   17685       old_itstate = z32; /* 0 :: I32 */
   17686 
   17687       // new_itstate = old_itstate >> 8
   17688       //             = 0 >> 8
   17689       //             = 0 :: I32
   17690       //
   17691       // new_itstate = newTemp(Ity_I32);
   17692       // assign(new_itstate,
   17693       //        binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
   17694       new_itstate = z32;
   17695 
   17696       // ITSTATE = 0 :: I32(again)
   17697       //
   17698       // put_ITSTATE(new_itstate);
   17699 
   17700       // condT1 = calc_cond_dyn( xor(and(old_istate,0xF0), 0xE0) )
   17701       //        = calc_cond_dyn( xor(0,0xE0) )
   17702       //        = calc_cond_dyn ( 0xE0 )
   17703       //        = 1 :: I32
   17704       // Not that this matters, since the computed value is not used:
   17705       // see condT folding below
   17706       //
   17707       // IRTemp condT1 = newTemp(Ity_I32);
   17708       // assign(condT1,
   17709       //        mk_armg_calculate_condition_dyn(
   17710       //           binop(Iop_Xor32,
   17711       //                 binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
   17712       //                 mkU32(0xE0))
   17713       //       )
   17714       // );
   17715 
   17716       // condT = 32to8(and32(old_itstate,0xF0)) == 0  ? 1  : condT1
   17717       //       = 32to8(and32(0,0xF0)) == 0  ? 1  : condT1
   17718       //       = 32to8(0) == 0  ? 1  : condT1
   17719       //       = 0 == 0  ? 1  : condT1
   17720       //       = 1
   17721       //
   17722       // condT = newTemp(Ity_I32);
   17723       // assign(condT, IRExpr_ITE(
   17724       //                  unop(Iop_32to8, binop(Iop_And32,
   17725       //                                        mkexpr(old_itstate),
   17726       //                                        mkU32(0xF0))),
   17727       //                  mkexpr(condT1),
   17728       //                  mkU32(1))
   17729       //       ));
   17730       condT = newTemp(Ity_I32);
   17731       assign(condT, mkU32(1));
   17732 
   17733       // notInITt = xor32(and32(old_itstate, 1), 1)
   17734       //          = xor32(and32(0, 1), 1)
   17735       //          = xor32(0, 1)
   17736       //          = 1 :: I32
   17737       //
   17738       // IRTemp notInITt = newTemp(Ity_I32);
   17739       // assign(notInITt,
   17740       //        binop(Iop_Xor32,
   17741       //              binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
   17742       //              mkU32(1)));
   17743 
   17744       // cond_AND_notInIT_T = and32(notInITt, condT)
   17745       //                    = and32(1, 1)
   17746       //                    = 1
   17747       //
   17748       // cond_AND_notInIT_T = newTemp(Ity_I32);
   17749       // assign(cond_AND_notInIT_T,
   17750       //        binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
   17751       cond_AND_notInIT_T = condT; /* 1 :: I32 */
   17752 
   17753       /* END "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
   17754    } else {
   17755       /* BEGIN { STANDARD PREAMBLE; } */
   17756 
   17757       old_itstate = get_ITSTATE();
   17758 
   17759       new_itstate = newTemp(Ity_I32);
   17760       assign(new_itstate,
   17761              binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
   17762 
   17763       put_ITSTATE(new_itstate);
   17764 
   17765       /* Same strategy as for ARM insns: generate a condition
   17766          temporary at this point (or IRTemp_INVALID, meaning
   17767          unconditional).  We leave it to lower-level instruction
   17768          decoders to decide whether they can generate straight-line
   17769          code, or whether they must generate a side exit before the
   17770          instruction.  condT :: Ity_I32 and is always either zero or
   17771          one. */
   17772       IRTemp condT1 = newTemp(Ity_I32);
   17773       assign(condT1,
   17774              mk_armg_calculate_condition_dyn(
   17775                 binop(Iop_Xor32,
   17776                       binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
   17777                       mkU32(0xE0))
   17778             )
   17779       );
   17780 
   17781       /* This is a bit complex, but needed to make Memcheck understand
   17782          that, if the condition in old_itstate[7:4] denotes AL (that
   17783          is, if this instruction is to be executed unconditionally),
   17784          then condT does not depend on the results of calling the
   17785          helper.
   17786 
   17787          We test explicitly for old_itstate[7:4] == AL ^ 0xE, and in
   17788          that case set condT directly to 1.  Else we use the results
   17789          of the helper.  Since old_itstate is always defined and
   17790          because Memcheck does lazy V-bit propagation through ITE,
   17791          this will cause condT to always be a defined 1 if the
   17792          condition is 'AL'.  From an execution semantics point of view
   17793          this is irrelevant since we're merely duplicating part of the
   17794          behaviour of the helper.  But it makes it clear to Memcheck,
   17795          in this case, that condT does not in fact depend on the
   17796          contents of the condition code thunk.  Without it, we get
   17797          quite a lot of false errors.
   17798 
   17799          So, just to clarify: from a straight semantics point of view,
   17800          we can simply do "assign(condT, mkexpr(condT1))", and the
   17801          simulator still runs fine.  It's just that we get loads of
   17802          false errors from Memcheck. */
   17803       condT = newTemp(Ity_I32);
   17804       assign(condT, IRExpr_ITE(
   17805                        binop(Iop_CmpNE32, binop(Iop_And32,
   17806                                                 mkexpr(old_itstate),
   17807                                                 mkU32(0xF0)),
   17808                                           mkU32(0)),
   17809                        mkexpr(condT1),
   17810                        mkU32(1)
   17811             ));
   17812 
   17813       /* Something we don't have in ARM: generate a 0 or 1 value
   17814          indicating whether or not we are in an IT block (NB: 0 = in
   17815          IT block, 1 = not in IT block).  This is used to gate
   17816          condition code updates in 16-bit Thumb instructions. */
   17817       IRTemp notInITt = newTemp(Ity_I32);
   17818       assign(notInITt,
   17819              binop(Iop_Xor32,
   17820                    binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
   17821                    mkU32(1)));
   17822 
   17823       /* Compute 'condT && notInITt' -- that is, the instruction is
   17824          going to execute, and we're not in an IT block.  This is the
   17825          gating condition for updating condition codes in 16-bit Thumb
   17826          instructions, except for CMP, CMN and TST. */
   17827       cond_AND_notInIT_T = newTemp(Ity_I32);
   17828       assign(cond_AND_notInIT_T,
   17829              binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
   17830       /* END { STANDARD PREAMBLE; } */
   17831    }
   17832 
   17833 
   17834    /* At this point:
   17835       * ITSTATE has been updated
   17836       * condT holds the guarding condition for this instruction (0 or 1),
   17837       * notInITt is 1 if we're in "normal" code, 0 if in an IT block
   17838       * cond_AND_notInIT_T is the AND of the above two.
   17839 
   17840       If the instruction proper can't trap, then there's nothing else
   17841       to do w.r.t. ITSTATE -- just go and and generate IR for the
   17842       insn, taking into account the guarding condition.
   17843 
   17844       If, however, the instruction might trap, then we must back up
   17845       ITSTATE to the old value, and re-update it after the potentially
   17846       trapping IR section.  A trap can happen either via a memory
   17847       reference or because we need to throw SIGILL.
   17848 
   17849       If an instruction has a side exit, we need to be sure that any
   17850       ITSTATE backup is re-updated before the side exit.
   17851    */
   17852 
   17853    /* ----------------------------------------------------------- */
   17854    /* --                                                       -- */
   17855    /* -- Thumb 16-bit integer instructions                     -- */
   17856    /* --                                                       -- */
   17857    /* -- IMPORTANT: references to insn1 or INSN1 are           -- */
   17858    /* --            not allowed in this section                -- */
   17859    /* --                                                       -- */
   17860    /* ----------------------------------------------------------- */
   17861 
   17862    /* 16-bit instructions inside an IT block, apart from CMP, CMN and
   17863       TST, do not set the condition codes.  Hence we must dynamically
   17864       test for this case for every condition code update. */
   17865 
   17866    IROp   anOp   = Iop_INVALID;
   17867    const HChar* anOpNm = NULL;
   17868 
   17869    /* ================ 16-bit 15:6 cases ================ */
   17870 
   17871    switch (INSN0(15,6)) {
   17872 
   17873    case 0x10a:   // CMP
   17874    case 0x10b: { // CMN
   17875       /* ---------------- CMP Rn, Rm ---------------- */
   17876       Bool   isCMN = INSN0(15,6) == 0x10b;
   17877       UInt   rN    = INSN0(2,0);
   17878       UInt   rM    = INSN0(5,3);
   17879       IRTemp argL  = newTemp(Ity_I32);
   17880       IRTemp argR  = newTemp(Ity_I32);
   17881       assign( argL, getIRegT(rN) );
   17882       assign( argR, getIRegT(rM) );
   17883       /* Update flags regardless of whether in an IT block or not. */
   17884       setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
   17885                       argL, argR, condT );
   17886       DIP("%s r%u, r%u\n", isCMN ? "cmn" : "cmp", rN, rM);
   17887       goto decode_success;
   17888    }
   17889 
   17890    case 0x108: {
   17891       /* ---------------- TST Rn, Rm ---------------- */
   17892       UInt   rN   = INSN0(2,0);
   17893       UInt   rM   = INSN0(5,3);
   17894       IRTemp oldC = newTemp(Ity_I32);
   17895       IRTemp oldV = newTemp(Ity_I32);
   17896       IRTemp res  = newTemp(Ity_I32);
   17897       assign( oldC, mk_armg_calculate_flag_c() );
   17898       assign( oldV, mk_armg_calculate_flag_v() );
   17899       assign( res,  binop(Iop_And32, getIRegT(rN), getIRegT(rM)) );
   17900       /* Update flags regardless of whether in an IT block or not. */
   17901       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
   17902       DIP("tst r%u, r%u\n", rN, rM);
   17903       goto decode_success;
   17904    }
   17905 
   17906    case 0x109: {
   17907       /* ---------------- NEGS Rd, Rm ---------------- */
   17908       /* Rd = -Rm */
   17909       UInt   rM   = INSN0(5,3);
   17910       UInt   rD   = INSN0(2,0);
   17911       IRTemp arg  = newTemp(Ity_I32);
   17912       IRTemp zero = newTemp(Ity_I32);
   17913       assign(arg, getIRegT(rM));
   17914       assign(zero, mkU32(0));
   17915       // rD can never be r15
   17916       putIRegT(rD, binop(Iop_Sub32, mkexpr(zero), mkexpr(arg)), condT);
   17917       setFlags_D1_D2( ARMG_CC_OP_SUB, zero, arg, cond_AND_notInIT_T);
   17918       DIP("negs r%u, r%u\n", rD, rM);
   17919       goto decode_success;
   17920    }
   17921 
   17922    case 0x10F: {
   17923       /* ---------------- MVNS Rd, Rm ---------------- */
   17924       /* Rd = ~Rm */
   17925       UInt   rM   = INSN0(5,3);
   17926       UInt   rD   = INSN0(2,0);
   17927       IRTemp oldV = newTemp(Ity_I32);
   17928       IRTemp oldC = newTemp(Ity_I32);
   17929       IRTemp res  = newTemp(Ity_I32);
   17930       assign( oldV, mk_armg_calculate_flag_v() );
   17931       assign( oldC, mk_armg_calculate_flag_c() );
   17932       assign(res, unop(Iop_Not32, getIRegT(rM)));
   17933       // rD can never be r15
   17934       putIRegT(rD, mkexpr(res), condT);
   17935       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   17936                          cond_AND_notInIT_T );
   17937       DIP("mvns r%u, r%u\n", rD, rM);
   17938       goto decode_success;
   17939    }
   17940 
   17941    case 0x10C:
   17942       /* ---------------- ORRS Rd, Rm ---------------- */
   17943       anOp = Iop_Or32; anOpNm = "orr"; goto and_orr_eor_mul;
   17944    case 0x100:
   17945       /* ---------------- ANDS Rd, Rm ---------------- */
   17946       anOp = Iop_And32; anOpNm = "and"; goto and_orr_eor_mul;
   17947    case 0x101:
   17948       /* ---------------- EORS Rd, Rm ---------------- */
   17949       anOp = Iop_Xor32; anOpNm = "eor"; goto and_orr_eor_mul;
   17950    case 0x10d:
   17951       /* ---------------- MULS Rd, Rm ---------------- */
   17952       anOp = Iop_Mul32; anOpNm = "mul"; goto and_orr_eor_mul;
   17953    and_orr_eor_mul: {
   17954       /* Rd = Rd `op` Rm */
   17955       UInt   rM   = INSN0(5,3);
   17956       UInt   rD   = INSN0(2,0);
   17957       IRTemp res  = newTemp(Ity_I32);
   17958       IRTemp oldV = newTemp(Ity_I32);
   17959       IRTemp oldC = newTemp(Ity_I32);
   17960       assign( oldV, mk_armg_calculate_flag_v() );
   17961       assign( oldC, mk_armg_calculate_flag_c() );
   17962       assign( res, binop(anOp, getIRegT(rD), getIRegT(rM) ));
   17963       // not safe to read guest state after here
   17964       // rD can never be r15
   17965       putIRegT(rD, mkexpr(res), condT);
   17966       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   17967                          cond_AND_notInIT_T );
   17968       DIP("%s r%u, r%u\n", anOpNm, rD, rM);
   17969       goto decode_success;
   17970    }
   17971 
   17972    case 0x10E: {
   17973       /* ---------------- BICS Rd, Rm ---------------- */
   17974       /* Rd = Rd & ~Rm */
   17975       UInt   rM   = INSN0(5,3);
   17976       UInt   rD   = INSN0(2,0);
   17977       IRTemp res  = newTemp(Ity_I32);
   17978       IRTemp oldV = newTemp(Ity_I32);
   17979       IRTemp oldC = newTemp(Ity_I32);
   17980       assign( oldV, mk_armg_calculate_flag_v() );
   17981       assign( oldC, mk_armg_calculate_flag_c() );
   17982       assign( res, binop(Iop_And32, getIRegT(rD),
   17983                                     unop(Iop_Not32, getIRegT(rM) )));
   17984       // not safe to read guest state after here
   17985       // rD can never be r15
   17986       putIRegT(rD, mkexpr(res), condT);
   17987       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   17988                          cond_AND_notInIT_T );
   17989       DIP("bics r%u, r%u\n", rD, rM);
   17990       goto decode_success;
   17991    }
   17992 
   17993    case 0x105: {
   17994       /* ---------------- ADCS Rd, Rm ---------------- */
   17995       /* Rd = Rd + Rm + oldC */
   17996       UInt   rM   = INSN0(5,3);
   17997       UInt   rD   = INSN0(2,0);
   17998       IRTemp argL = newTemp(Ity_I32);
   17999       IRTemp argR = newTemp(Ity_I32);
   18000       IRTemp oldC = newTemp(Ity_I32);
   18001       IRTemp res  = newTemp(Ity_I32);
   18002       assign(argL, getIRegT(rD));
   18003       assign(argR, getIRegT(rM));
   18004       assign(oldC, mk_armg_calculate_flag_c());
   18005       assign(res, binop(Iop_Add32,
   18006                         binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
   18007                         mkexpr(oldC)));
   18008       // rD can never be r15
   18009       putIRegT(rD, mkexpr(res), condT);
   18010       setFlags_D1_D2_ND( ARMG_CC_OP_ADC, argL, argR, oldC,
   18011                          cond_AND_notInIT_T );
   18012       DIP("adcs r%u, r%u\n", rD, rM);
   18013       goto decode_success;
   18014    }
   18015 
   18016    case 0x106: {
   18017       /* ---------------- SBCS Rd, Rm ---------------- */
   18018       /* Rd = Rd - Rm - (oldC ^ 1) */
   18019       UInt   rM   = INSN0(5,3);
   18020       UInt   rD   = INSN0(2,0);
   18021       IRTemp argL = newTemp(Ity_I32);
   18022       IRTemp argR = newTemp(Ity_I32);
   18023       IRTemp oldC = newTemp(Ity_I32);
   18024       IRTemp res  = newTemp(Ity_I32);
   18025       assign(argL, getIRegT(rD));
   18026       assign(argR, getIRegT(rM));
   18027       assign(oldC, mk_armg_calculate_flag_c());
   18028       assign(res, binop(Iop_Sub32,
   18029                         binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
   18030                         binop(Iop_Xor32, mkexpr(oldC), mkU32(1))));
   18031       // rD can never be r15
   18032       putIRegT(rD, mkexpr(res), condT);
   18033       setFlags_D1_D2_ND( ARMG_CC_OP_SBB, argL, argR, oldC,
   18034                          cond_AND_notInIT_T );
   18035       DIP("sbcs r%u, r%u\n", rD, rM);
   18036       goto decode_success;
   18037    }
   18038 
   18039    case 0x2CB: {
   18040       /* ---------------- UXTB Rd, Rm ---------------- */
   18041       /* Rd = 8Uto32(Rm) */
   18042       UInt rM = INSN0(5,3);
   18043       UInt rD = INSN0(2,0);
   18044       putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFF)),
   18045                    condT);
   18046       DIP("uxtb r%u, r%u\n", rD, rM);
   18047       goto decode_success;
   18048    }
   18049 
   18050    case 0x2C9: {
   18051       /* ---------------- SXTB Rd, Rm ---------------- */
   18052       /* Rd = 8Sto32(Rm) */
   18053       UInt rM = INSN0(5,3);
   18054       UInt rD = INSN0(2,0);
   18055       putIRegT(rD, binop(Iop_Sar32,
   18056                          binop(Iop_Shl32, getIRegT(rM), mkU8(24)),
   18057                          mkU8(24)),
   18058                    condT);
   18059       DIP("sxtb r%u, r%u\n", rD, rM);
   18060       goto decode_success;
   18061    }
   18062 
   18063    case 0x2CA: {
   18064       /* ---------------- UXTH Rd, Rm ---------------- */
   18065       /* Rd = 16Uto32(Rm) */
   18066       UInt rM = INSN0(5,3);
   18067       UInt rD = INSN0(2,0);
   18068       putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFFFF)),
   18069                    condT);
   18070       DIP("uxth r%u, r%u\n", rD, rM);
   18071       goto decode_success;
   18072    }
   18073 
   18074    case 0x2C8: {
   18075       /* ---------------- SXTH Rd, Rm ---------------- */
   18076       /* Rd = 16Sto32(Rm) */
   18077       UInt rM = INSN0(5,3);
   18078       UInt rD = INSN0(2,0);
   18079       putIRegT(rD, binop(Iop_Sar32,
   18080                          binop(Iop_Shl32, getIRegT(rM), mkU8(16)),
   18081                          mkU8(16)),
   18082                    condT);
   18083       DIP("sxth r%u, r%u\n", rD, rM);
   18084       goto decode_success;
   18085    }
   18086 
   18087    case 0x102:   // LSLS
   18088    case 0x103:   // LSRS
   18089    case 0x104:   // ASRS
   18090    case 0x107: { // RORS
   18091       /* ---------------- LSLS Rs, Rd ---------------- */
   18092       /* ---------------- LSRS Rs, Rd ---------------- */
   18093       /* ---------------- ASRS Rs, Rd ---------------- */
   18094       /* ---------------- RORS Rs, Rd ---------------- */
   18095       /* Rd = Rd `op` Rs, and set flags */
   18096       UInt   rS   = INSN0(5,3);
   18097       UInt   rD   = INSN0(2,0);
   18098       IRTemp oldV = newTemp(Ity_I32);
   18099       IRTemp rDt  = newTemp(Ity_I32);
   18100       IRTemp rSt  = newTemp(Ity_I32);
   18101       IRTemp res  = newTemp(Ity_I32);
   18102       IRTemp resC = newTemp(Ity_I32);
   18103       const HChar* wot  = "???";
   18104       assign(rSt, getIRegT(rS));
   18105       assign(rDt, getIRegT(rD));
   18106       assign(oldV, mk_armg_calculate_flag_v());
   18107       /* Does not appear to be the standard 'how' encoding. */
   18108       switch (INSN0(15,6)) {
   18109          case 0x102:
   18110             compute_result_and_C_after_LSL_by_reg(
   18111                dis_buf, &res, &resC, rDt, rSt, rD, rS
   18112             );
   18113             wot = "lsl";
   18114             break;
   18115          case 0x103:
   18116             compute_result_and_C_after_LSR_by_reg(
   18117                dis_buf, &res, &resC, rDt, rSt, rD, rS
   18118             );
   18119             wot = "lsr";
   18120             break;
   18121          case 0x104:
   18122             compute_result_and_C_after_ASR_by_reg(
   18123                dis_buf, &res, &resC, rDt, rSt, rD, rS
   18124             );
   18125             wot = "asr";
   18126             break;
   18127          case 0x107:
   18128             compute_result_and_C_after_ROR_by_reg(
   18129                dis_buf, &res, &resC, rDt, rSt, rD, rS
   18130             );
   18131             wot = "ror";
   18132             break;
   18133          default:
   18134             /*NOTREACHED*/vassert(0);
   18135       }
   18136       // not safe to read guest state after this point
   18137       putIRegT(rD, mkexpr(res), condT);
   18138       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
   18139                          cond_AND_notInIT_T );
   18140       DIP("%ss r%u, r%u\n", wot, rS, rD);
   18141       goto decode_success;
   18142    }
   18143 
   18144    case 0x2E8:   // REV
   18145    case 0x2E9: { // REV16
   18146       /* ---------------- REV   Rd, Rm ---------------- */
   18147       /* ---------------- REV16 Rd, Rm ---------------- */
   18148       UInt rM = INSN0(5,3);
   18149       UInt rD = INSN0(2,0);
   18150       Bool isREV = INSN0(15,6) == 0x2E8;
   18151       IRTemp arg = newTemp(Ity_I32);
   18152       assign(arg, getIRegT(rM));
   18153       IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
   18154       putIRegT(rD, mkexpr(res), condT);
   18155       DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM);
   18156       goto decode_success;
   18157    }
   18158 
   18159    case 0x2EB: { // REVSH
   18160       /* ---------------- REVSH Rd, Rn ---------------- */
   18161       UInt rM = INSN0(5,3);
   18162       UInt rD = INSN0(2,0);
   18163       IRTemp irt_rM  = newTemp(Ity_I32);
   18164       IRTemp irt_hi  = newTemp(Ity_I32);
   18165       IRTemp irt_low = newTemp(Ity_I32);
   18166       IRTemp irt_res = newTemp(Ity_I32);
   18167       assign(irt_rM, getIRegT(rM));
   18168       assign(irt_hi,
   18169              binop(Iop_Sar32,
   18170                    binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
   18171                    mkU8(16)
   18172              )
   18173       );
   18174       assign(irt_low,
   18175              binop(Iop_And32,
   18176                    binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
   18177                    mkU32(0xFF)
   18178              )
   18179       );
   18180       assign(irt_res,
   18181              binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
   18182       );
   18183       putIRegT(rD, mkexpr(irt_res), condT);
   18184       DIP("revsh r%u, r%u\n", rD, rM);
   18185       goto decode_success;
   18186    }
   18187 
   18188    default:
   18189       break; /* examine the next shortest prefix */
   18190 
   18191    }
   18192 
   18193 
   18194    /* ================ 16-bit 15:7 cases ================ */
   18195 
   18196    switch (INSN0(15,7)) {
   18197 
   18198    case BITS9(1,0,1,1,0,0,0,0,0): {
   18199       /* ------------ ADD SP, #imm7 * 4 ------------ */
   18200       UInt uimm7 = INSN0(6,0);
   18201       putIRegT(13, binop(Iop_Add32, getIRegT(13), mkU32(uimm7 * 4)),
   18202                    condT);
   18203       DIP("add sp, #%u\n", uimm7 * 4);
   18204       goto decode_success;
   18205    }
   18206 
   18207    case BITS9(1,0,1,1,0,0,0,0,1): {
   18208       /* ------------ SUB SP, #imm7 * 4 ------------ */
   18209       UInt uimm7 = INSN0(6,0);
   18210       putIRegT(13, binop(Iop_Sub32, getIRegT(13), mkU32(uimm7 * 4)),
   18211                    condT);
   18212       DIP("sub sp, #%u\n", uimm7 * 4);
   18213       goto decode_success;
   18214    }
   18215 
   18216    case BITS9(0,1,0,0,0,1,1,1,0): {
   18217       /* ---------------- BX rM ---------------- */
   18218       /* Branch to reg, and optionally switch modes.  Reg contains a
   18219          suitably encoded address therefore (w CPSR.T at the bottom).
   18220          Have to special-case r15, as usual. */
   18221       UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
   18222       if (BITS3(0,0,0) == INSN0(2,0)) {
   18223          IRTemp dst = newTemp(Ity_I32);
   18224          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   18225          mk_skip_over_T16_if_cond_is_false(condT);
   18226          condT = IRTemp_INVALID;
   18227          // now uncond
   18228          if (rM <= 14) {
   18229             assign( dst, getIRegT(rM) );
   18230          } else {
   18231             vassert(rM == 15);
   18232             assign( dst, mkU32(guest_R15_curr_instr_notENC + 4) );
   18233          }
   18234          llPutIReg(15, mkexpr(dst));
   18235          dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
   18236          dres.whatNext    = Dis_StopHere;
   18237          DIP("bx r%u (possibly switch to ARM mode)\n", rM);
   18238          goto decode_success;
   18239       }
   18240       break;
   18241    }
   18242 
   18243    /* ---------------- BLX rM ---------------- */
   18244    /* Branch and link to interworking address in rM. */
   18245    case BITS9(0,1,0,0,0,1,1,1,1): {
   18246       if (BITS3(0,0,0) == INSN0(2,0)) {
   18247          UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
   18248          IRTemp dst = newTemp(Ity_I32);
   18249          if (rM <= 14) {
   18250             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   18251             mk_skip_over_T16_if_cond_is_false(condT);
   18252             condT = IRTemp_INVALID;
   18253             // now uncond
   18254             /* We're returning to Thumb code, hence "| 1" */
   18255             assign( dst, getIRegT(rM) );
   18256             putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ),
   18257                           IRTemp_INVALID );
   18258             llPutIReg(15, mkexpr(dst));
   18259             dres.jk_StopHere = Ijk_Call;
   18260             dres.whatNext    = Dis_StopHere;
   18261             DIP("blx r%u (possibly switch to ARM mode)\n", rM);
   18262             goto decode_success;
   18263          }
   18264          /* else unpredictable, fall through */
   18265       }
   18266       break;
   18267    }
   18268 
   18269    default:
   18270       break; /* examine the next shortest prefix */
   18271 
   18272    }
   18273 
   18274 
   18275    /* ================ 16-bit 15:8 cases ================ */
   18276 
   18277    switch (INSN0(15,8)) {
   18278 
   18279    case BITS8(1,1,0,1,1,1,1,1): {
   18280       /* ---------------- SVC ---------------- */
   18281       UInt imm8 = INSN0(7,0);
   18282       if (imm8 == 0) {
   18283          /* A syscall.  We can't do this conditionally, hence: */
   18284          mk_skip_over_T16_if_cond_is_false( condT );
   18285          // FIXME: what if we have to back up and restart this insn?
   18286          // then ITSTATE will be wrong (we'll have it as "used")
   18287          // when it isn't.  Correct is to save ITSTATE in a
   18288          // stash pseudo-reg, and back up from that if we have to
   18289          // restart.
   18290          // uncond after here
   18291          llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ));
   18292          dres.jk_StopHere = Ijk_Sys_syscall;
   18293          dres.whatNext    = Dis_StopHere;
   18294          DIP("svc #0x%08x\n", imm8);
   18295          goto decode_success;
   18296       }
   18297       /* else fall through */
   18298       break;
   18299    }
   18300 
   18301    case BITS8(0,1,0,0,0,1,0,0): {
   18302       /* ---------------- ADD(HI) Rd, Rm ---------------- */
   18303       UInt h1 = INSN0(7,7);
   18304       UInt h2 = INSN0(6,6);
   18305       UInt rM = (h2 << 3) | INSN0(5,3);
   18306       UInt rD = (h1 << 3) | INSN0(2,0);
   18307       //if (h1 == 0 && h2 == 0) { // Original T1 was more restrictive
   18308       if (rD == 15 && rM == 15) {
   18309          // then it's invalid
   18310       } else {
   18311          IRTemp res = newTemp(Ity_I32);
   18312          assign( res, binop(Iop_Add32, getIRegT(rD), getIRegT(rM) ));
   18313          if (rD != 15) {
   18314             putIRegT( rD, mkexpr(res), condT );
   18315          } else {
   18316             /* Only allowed outside or last-in IT block; SIGILL if not so. */
   18317             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   18318             /* jump over insn if not selected */
   18319             mk_skip_over_T16_if_cond_is_false(condT);
   18320             condT = IRTemp_INVALID;
   18321             // now uncond
   18322             /* non-interworking branch */
   18323             llPutIReg(15, binop(Iop_Or32, mkexpr(res), mkU32(1)));
   18324             dres.jk_StopHere = Ijk_Boring;
   18325             dres.whatNext    = Dis_StopHere;
   18326          }
   18327          DIP("add(hi) r%u, r%u\n", rD, rM);
   18328          goto decode_success;
   18329       }
   18330       break;
   18331    }
   18332 
   18333    case BITS8(0,1,0,0,0,1,0,1): {
   18334       /* ---------------- CMP(HI) Rd, Rm ---------------- */
   18335       UInt h1 = INSN0(7,7);
   18336       UInt h2 = INSN0(6,6);
   18337       UInt rM = (h2 << 3) | INSN0(5,3);
   18338       UInt rN = (h1 << 3) | INSN0(2,0);
   18339       if (h1 != 0 || h2 != 0) {
   18340          IRTemp argL  = newTemp(Ity_I32);
   18341          IRTemp argR  = newTemp(Ity_I32);
   18342          assign( argL, getIRegT(rN) );
   18343          assign( argR, getIRegT(rM) );
   18344          /* Update flags regardless of whether in an IT block or not. */
   18345          setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
   18346          DIP("cmphi r%u, r%u\n", rN, rM);
   18347          goto decode_success;
   18348       }
   18349       break;
   18350    }
   18351 
   18352    case BITS8(0,1,0,0,0,1,1,0): {
   18353       /* ---------------- MOV(HI) Rd, Rm ---------------- */
   18354       UInt h1 = INSN0(7,7);
   18355       UInt h2 = INSN0(6,6);
   18356       UInt rM = (h2 << 3) | INSN0(5,3);
   18357       UInt rD = (h1 << 3) | INSN0(2,0);
   18358       /* The old ARM ARM seems to disallow the case where both Rd and
   18359          Rm are "low" registers, but newer versions allow it. */
   18360       if (1 /*h1 != 0 || h2 != 0*/) {
   18361          IRTemp val = newTemp(Ity_I32);
   18362          assign( val, getIRegT(rM) );
   18363          if (rD != 15) {
   18364             putIRegT( rD, mkexpr(val), condT );
   18365          } else {
   18366             /* Only allowed outside or last-in IT block; SIGILL if not so. */
   18367             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   18368             /* jump over insn if not selected */
   18369             mk_skip_over_T16_if_cond_is_false(condT);
   18370             condT = IRTemp_INVALID;
   18371             // now uncond
   18372             /* non-interworking branch */
   18373             llPutIReg(15, binop(Iop_Or32, mkexpr(val), mkU32(1)));
   18374             dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
   18375             dres.whatNext    = Dis_StopHere;
   18376          }
   18377          DIP("mov r%u, r%u\n", rD, rM);
   18378          goto decode_success;
   18379       }
   18380       break;
   18381    }
   18382 
   18383    case BITS8(1,0,1,1,1,1,1,1): {
   18384       /* ---------------- IT (if-then) ---------------- */
   18385       UInt firstcond = INSN0(7,4);
   18386       UInt mask = INSN0(3,0);
   18387       UInt newITSTATE = 0;
   18388       /* This is the ITSTATE represented as described in
   18389          libvex_guest_arm.h.  It is not the ARM ARM representation. */
   18390       HChar c1 = '.';
   18391       HChar c2 = '.';
   18392       HChar c3 = '.';
   18393       Bool valid = compute_ITSTATE( &newITSTATE, &c1, &c2, &c3,
   18394                                     firstcond, mask );
   18395       if (valid && firstcond != 0xF/*NV*/) {
   18396          /* Not allowed in an IT block; SIGILL if so. */
   18397          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   18398 
   18399          IRTemp t = newTemp(Ity_I32);
   18400          assign(t, mkU32(newITSTATE));
   18401          put_ITSTATE(t);
   18402 
   18403          DIP("it%c%c%c %s\n", c1, c2, c3, nCC(firstcond));
   18404          goto decode_success;
   18405       }
   18406       break;
   18407    }
   18408 
   18409    case BITS8(1,0,1,1,0,0,0,1):
   18410    case BITS8(1,0,1,1,0,0,1,1):
   18411    case BITS8(1,0,1,1,1,0,0,1):
   18412    case BITS8(1,0,1,1,1,0,1,1): {
   18413       /* ---------------- CB{N}Z ---------------- */
   18414       UInt rN    = INSN0(2,0);
   18415       UInt bOP   = INSN0(11,11);
   18416       UInt imm32 = (INSN0(9,9) << 6) | (INSN0(7,3) << 1);
   18417       gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   18418       /* It's a conditional branch forward. */
   18419       IRTemp kond = newTemp(Ity_I1);
   18420       assign( kond, binop(bOP ? Iop_CmpNE32 : Iop_CmpEQ32,
   18421                           getIRegT(rN), mkU32(0)) );
   18422 
   18423       vassert(0 == (guest_R15_curr_instr_notENC & 1));
   18424       /* Looks like the nearest insn we can branch to is the one after
   18425          next.  That makes sense, as there's no point in being able to
   18426          encode a conditional branch to the next instruction. */
   18427       UInt dst = (guest_R15_curr_instr_notENC + 4 + imm32) | 1;
   18428       stmt(IRStmt_Exit( mkexpr(kond),
   18429                         Ijk_Boring,
   18430                         IRConst_U32(toUInt(dst)),
   18431                         OFFB_R15T ));
   18432       DIP("cb%s r%u, 0x%x\n", bOP ? "nz" : "z", rN, dst - 1);
   18433       goto decode_success;
   18434    }
   18435 
   18436    default:
   18437       break; /* examine the next shortest prefix */
   18438 
   18439    }
   18440 
   18441 
   18442    /* ================ 16-bit 15:9 cases ================ */
   18443 
   18444    switch (INSN0(15,9)) {
   18445 
   18446    case BITS7(1,0,1,1,0,1,0): {
   18447       /* ---------------- PUSH ---------------- */
   18448       /* This is a bit like STMxx, but way simpler. Complications we
   18449          don't have to deal with:
   18450          * SP being one of the transferred registers
   18451          * direction (increment vs decrement)
   18452          * before-vs-after-ness
   18453       */
   18454       Int  i, nRegs;
   18455       UInt bitR    = INSN0(8,8);
   18456       UInt regList = INSN0(7,0);
   18457       if (bitR) regList |= (1 << 14);
   18458 
   18459       /* At least one register must be transferred, else result is
   18460          UNPREDICTABLE. */
   18461       if (regList != 0) {
   18462          /* Since we can't generate a guaranteed non-trapping IR
   18463             sequence, (1) jump over the insn if it is gated false, and
   18464             (2) back out the ITSTATE update. */
   18465          mk_skip_over_T16_if_cond_is_false(condT);
   18466          condT = IRTemp_INVALID;
   18467          put_ITSTATE(old_itstate);
   18468          // now uncond
   18469 
   18470          nRegs = 0;
   18471          for (i = 0; i < 16; i++) {
   18472             if ((regList & (1 << i)) != 0)
   18473                nRegs++;
   18474          }
   18475          vassert(nRegs >= 1 && nRegs <= 9);
   18476 
   18477          /* Move SP down first of all, so we're "covered".  And don't
   18478             mess with its alignment. */
   18479          IRTemp newSP = newTemp(Ity_I32);
   18480          assign(newSP, binop(Iop_Sub32, getIRegT(13), mkU32(4 * nRegs)));
   18481          putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
   18482 
   18483          /* Generate a transfer base address as a forced-aligned
   18484             version of the final SP value. */
   18485          IRTemp base = newTemp(Ity_I32);
   18486          assign(base, binop(Iop_And32, mkexpr(newSP), mkU32(~3)));
   18487 
   18488          /* Now the transfers */
   18489          nRegs = 0;
   18490          for (i = 0; i < 16; i++) {
   18491             if ((regList & (1 << i)) != 0) {
   18492                storeLE( binop(Iop_Add32, mkexpr(base), mkU32(4 * nRegs)),
   18493                         getIRegT(i) );
   18494                nRegs++;
   18495             }
   18496          }
   18497 
   18498          /* Reinstate the ITSTATE update. */
   18499          put_ITSTATE(new_itstate);
   18500 
   18501          DIP("push {%s0x%04x}\n", bitR ? "lr," : "", regList & 0xFF);
   18502          goto decode_success;
   18503       }
   18504       break;
   18505    }
   18506 
   18507    case BITS7(1,0,1,1,1,1,0): {
   18508       /* ---------------- POP ---------------- */
   18509       Int  i, nRegs;
   18510       UInt bitR    = INSN0(8,8);
   18511       UInt regList = INSN0(7,0);
   18512 
   18513       /* At least one register must be transferred, else result is
   18514          UNPREDICTABLE. */
   18515       if (regList != 0 || bitR) {
   18516          /* Since we can't generate a guaranteed non-trapping IR
   18517             sequence, (1) jump over the insn if it is gated false, and
   18518             (2) back out the ITSTATE update. */
   18519          mk_skip_over_T16_if_cond_is_false(condT);
   18520          condT = IRTemp_INVALID;
   18521          put_ITSTATE(old_itstate);
   18522          // now uncond
   18523 
   18524          nRegs = 0;
   18525          for (i = 0; i < 8; i++) {
   18526             if ((regList & (1 << i)) != 0)
   18527                nRegs++;
   18528          }
   18529          vassert(nRegs >= 0 && nRegs <= 8);
   18530          vassert(bitR == 0 || bitR == 1);
   18531 
   18532          IRTemp oldSP = newTemp(Ity_I32);
   18533          assign(oldSP, getIRegT(13));
   18534 
   18535          /* Generate a transfer base address as a forced-aligned
   18536             version of the original SP value. */
   18537          IRTemp base = newTemp(Ity_I32);
   18538          assign(base, binop(Iop_And32, mkexpr(oldSP), mkU32(~3)));
   18539 
   18540          /* Compute a new value for SP, but don't install it yet, so
   18541             that we're "covered" until all the transfers are done.
   18542             And don't mess with its alignment. */
   18543          IRTemp newSP = newTemp(Ity_I32);
   18544          assign(newSP, binop(Iop_Add32, mkexpr(oldSP),
   18545                                         mkU32(4 * (nRegs + bitR))));
   18546 
   18547          /* Now the transfers, not including PC */
   18548          nRegs = 0;
   18549          for (i = 0; i < 8; i++) {
   18550             if ((regList & (1 << i)) != 0) {
   18551                putIRegT(i, loadLE( Ity_I32,
   18552                                    binop(Iop_Add32, mkexpr(base),
   18553                                                     mkU32(4 * nRegs))),
   18554                            IRTemp_INVALID );
   18555                nRegs++;
   18556             }
   18557          }
   18558 
   18559          IRTemp newPC = IRTemp_INVALID;
   18560          if (bitR) {
   18561             newPC = newTemp(Ity_I32);
   18562             assign( newPC, loadLE( Ity_I32,
   18563                                    binop(Iop_Add32, mkexpr(base),
   18564                                                     mkU32(4 * nRegs))));
   18565          }
   18566 
   18567          /* Now we can safely install the new SP value */
   18568          putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
   18569 
   18570          /* Reinstate the ITSTATE update. */
   18571          put_ITSTATE(new_itstate);
   18572 
   18573          /* now, do we also have to do a branch?  If so, it turns out
   18574             that the new PC value is encoded exactly as we need it to
   18575             be -- with CPSR.T in the bottom bit.  So we can simply use
   18576             it as is, no need to mess with it.  Note, therefore, this
   18577             is an interworking return. */
   18578          if (bitR) {
   18579             llPutIReg(15, mkexpr(newPC));
   18580             dres.jk_StopHere = Ijk_Ret;
   18581             dres.whatNext    = Dis_StopHere;
   18582          }
   18583 
   18584          DIP("pop {%s0x%04x}\n", bitR ? "pc," : "", regList & 0xFF);
   18585          goto decode_success;
   18586       }
   18587       break;
   18588    }
   18589 
   18590    case BITS7(0,0,0,1,1,1,0):   /* ADDS */
   18591    case BITS7(0,0,0,1,1,1,1): { /* SUBS */
   18592       /* ---------------- ADDS Rd, Rn, #uimm3 ---------------- */
   18593       /* ---------------- SUBS Rd, Rn, #uimm3 ---------------- */
   18594       UInt   uimm3 = INSN0(8,6);
   18595       UInt   rN    = INSN0(5,3);
   18596       UInt   rD    = INSN0(2,0);
   18597       UInt   isSub = INSN0(9,9);
   18598       IRTemp argL  = newTemp(Ity_I32);
   18599       IRTemp argR  = newTemp(Ity_I32);
   18600       assign( argL, getIRegT(rN) );
   18601       assign( argR, mkU32(uimm3) );
   18602       putIRegT(rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
   18603                          mkexpr(argL), mkexpr(argR)),
   18604                    condT);
   18605       setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
   18606                       argL, argR, cond_AND_notInIT_T );
   18607       DIP("%s r%u, r%u, #%u\n", isSub ? "subs" : "adds", rD, rN, uimm3);
   18608       goto decode_success;
   18609    }
   18610 
   18611    case BITS7(0,0,0,1,1,0,0):   /* ADDS */
   18612    case BITS7(0,0,0,1,1,0,1): { /* SUBS */
   18613       /* ---------------- ADDS Rd, Rn, Rm ---------------- */
   18614       /* ---------------- SUBS Rd, Rn, Rm ---------------- */
   18615       UInt   rM    = INSN0(8,6);
   18616       UInt   rN    = INSN0(5,3);
   18617       UInt   rD    = INSN0(2,0);
   18618       UInt   isSub = INSN0(9,9);
   18619       IRTemp argL  = newTemp(Ity_I32);
   18620       IRTemp argR  = newTemp(Ity_I32);
   18621       assign( argL, getIRegT(rN) );
   18622       assign( argR, getIRegT(rM) );
   18623       putIRegT( rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
   18624                           mkexpr(argL), mkexpr(argR)),
   18625                     condT );
   18626       setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
   18627                       argL, argR, cond_AND_notInIT_T );
   18628       DIP("%s r%u, r%u, r%u\n", isSub ? "subs" : "adds", rD, rN, rM);
   18629       goto decode_success;
   18630    }
   18631 
   18632    case BITS7(0,1,0,1,0,0,0):   /* STR */
   18633    case BITS7(0,1,0,1,1,0,0): { /* LDR */
   18634       /* ------------- LDR Rd, [Rn, Rm] ------------- */
   18635       /* ------------- STR Rd, [Rn, Rm] ------------- */
   18636       /* LDR/STR Rd, [Rn + Rm] */
   18637       UInt    rD   = INSN0(2,0);
   18638       UInt    rN   = INSN0(5,3);
   18639       UInt    rM   = INSN0(8,6);
   18640       UInt    isLD = INSN0(11,11);
   18641 
   18642       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
   18643       put_ITSTATE(old_itstate); // backout
   18644       if (isLD) {
   18645          IRTemp tD = newTemp(Ity_I32);
   18646          loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
   18647          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18648       } else {
   18649          storeGuardedLE(ea, getIRegT(rD), condT);
   18650       }
   18651       put_ITSTATE(new_itstate); // restore
   18652 
   18653       DIP("%s r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
   18654       goto decode_success;
   18655    }
   18656 
   18657    case BITS7(0,1,0,1,0,0,1):
   18658    case BITS7(0,1,0,1,1,0,1): {
   18659       /* ------------- LDRH Rd, [Rn, Rm] ------------- */
   18660       /* ------------- STRH Rd, [Rn, Rm] ------------- */
   18661       /* LDRH/STRH Rd, [Rn + Rm] */
   18662       UInt    rD   = INSN0(2,0);
   18663       UInt    rN   = INSN0(5,3);
   18664       UInt    rM   = INSN0(8,6);
   18665       UInt    isLD = INSN0(11,11);
   18666 
   18667       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
   18668       put_ITSTATE(old_itstate); // backout
   18669       if (isLD) {
   18670          IRTemp tD = newTemp(Ity_I32);
   18671          loadGuardedLE(tD, ILGop_16Uto32, ea, llGetIReg(rD), condT);
   18672          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18673       } else {
   18674          storeGuardedLE( ea, unop(Iop_32to16, getIRegT(rD)), condT );
   18675       }
   18676       put_ITSTATE(new_itstate); // restore
   18677 
   18678       DIP("%sh r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
   18679       goto decode_success;
   18680    }
   18681 
   18682    case BITS7(0,1,0,1,1,1,1): {
   18683       /* ------------- LDRSH Rd, [Rn, Rm] ------------- */
   18684       /* LDRSH Rd, [Rn + Rm] */
   18685       UInt    rD = INSN0(2,0);
   18686       UInt    rN = INSN0(5,3);
   18687       UInt    rM = INSN0(8,6);
   18688 
   18689       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
   18690       put_ITSTATE(old_itstate); // backout
   18691       IRTemp tD = newTemp(Ity_I32);
   18692       loadGuardedLE(tD, ILGop_16Sto32, ea, llGetIReg(rD), condT);
   18693       putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18694       put_ITSTATE(new_itstate); // restore
   18695 
   18696       DIP("ldrsh r%u, [r%u, r%u]\n", rD, rN, rM);
   18697       goto decode_success;
   18698    }
   18699 
   18700    case BITS7(0,1,0,1,0,1,1): {
   18701       /* ------------- LDRSB Rd, [Rn, Rm] ------------- */
   18702       /* LDRSB Rd, [Rn + Rm] */
   18703       UInt    rD = INSN0(2,0);
   18704       UInt    rN = INSN0(5,3);
   18705       UInt    rM = INSN0(8,6);
   18706 
   18707       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
   18708       put_ITSTATE(old_itstate); // backout
   18709       IRTemp tD = newTemp(Ity_I32);
   18710       loadGuardedLE(tD, ILGop_8Sto32, ea, llGetIReg(rD), condT);
   18711       putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18712       put_ITSTATE(new_itstate); // restore
   18713 
   18714       DIP("ldrsb r%u, [r%u, r%u]\n", rD, rN, rM);
   18715       goto decode_success;
   18716    }
   18717 
   18718    case BITS7(0,1,0,1,0,1,0):
   18719    case BITS7(0,1,0,1,1,1,0): {
   18720       /* ------------- LDRB Rd, [Rn, Rm] ------------- */
   18721       /* ------------- STRB Rd, [Rn, Rm] ------------- */
   18722       /* LDRB/STRB Rd, [Rn + Rm] */
   18723       UInt    rD   = INSN0(2,0);
   18724       UInt    rN   = INSN0(5,3);
   18725       UInt    rM   = INSN0(8,6);
   18726       UInt    isLD = INSN0(11,11);
   18727 
   18728       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
   18729       put_ITSTATE(old_itstate); // backout
   18730       if (isLD) {
   18731          IRTemp tD = newTemp(Ity_I32);
   18732          loadGuardedLE(tD, ILGop_8Uto32, ea, llGetIReg(rD), condT);
   18733          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18734       } else {
   18735          storeGuardedLE( ea, unop(Iop_32to8, getIRegT(rD)), condT );
   18736       }
   18737       put_ITSTATE(new_itstate); // restore
   18738 
   18739       DIP("%sb r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
   18740       goto decode_success;
   18741    }
   18742 
   18743    default:
   18744       break; /* examine the next shortest prefix */
   18745 
   18746    }
   18747 
   18748 
   18749    /* ================ 16-bit 15:11 cases ================ */
   18750 
   18751    switch (INSN0(15,11)) {
   18752 
   18753    case BITS5(0,0,1,1,0):
   18754    case BITS5(0,0,1,1,1): {
   18755       /* ---------------- ADDS Rn, #uimm8 ---------------- */
   18756       /* ---------------- SUBS Rn, #uimm8 ---------------- */
   18757       UInt   isSub = INSN0(11,11);
   18758       UInt   rN    = INSN0(10,8);
   18759       UInt   uimm8 = INSN0(7,0);
   18760       IRTemp argL  = newTemp(Ity_I32);
   18761       IRTemp argR  = newTemp(Ity_I32);
   18762       assign( argL, getIRegT(rN) );
   18763       assign( argR, mkU32(uimm8) );
   18764       putIRegT( rN, binop(isSub ? Iop_Sub32 : Iop_Add32,
   18765                           mkexpr(argL), mkexpr(argR)), condT );
   18766       setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
   18767                       argL, argR, cond_AND_notInIT_T );
   18768       DIP("%s r%u, #%u\n", isSub ? "subs" : "adds", rN, uimm8);
   18769       goto decode_success;
   18770    }
   18771 
   18772    case BITS5(1,0,1,0,0): {
   18773       /* ---------------- ADD rD, PC, #imm8 * 4 ---------------- */
   18774       /* a.k.a. ADR */
   18775       /* rD = align4(PC) + imm8 * 4 */
   18776       UInt rD   = INSN0(10,8);
   18777       UInt imm8 = INSN0(7,0);
   18778       putIRegT(rD, binop(Iop_Add32,
   18779                          binop(Iop_And32, getIRegT(15), mkU32(~3U)),
   18780                          mkU32(imm8 * 4)),
   18781                    condT);
   18782       DIP("add r%u, pc, #%u\n", rD, imm8 * 4);
   18783       goto decode_success;
   18784    }
   18785 
   18786    case BITS5(1,0,1,0,1): {
   18787       /* ---------------- ADD rD, SP, #imm8 * 4 ---------------- */
   18788       UInt rD   = INSN0(10,8);
   18789       UInt imm8 = INSN0(7,0);
   18790       putIRegT(rD, binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4)),
   18791                    condT);
   18792       DIP("add r%u, r13, #%u\n", rD, imm8 * 4);
   18793       goto decode_success;
   18794    }
   18795 
   18796    case BITS5(0,0,1,0,1): {
   18797       /* ---------------- CMP Rn, #uimm8 ---------------- */
   18798       UInt   rN    = INSN0(10,8);
   18799       UInt   uimm8 = INSN0(7,0);
   18800       IRTemp argL  = newTemp(Ity_I32);
   18801       IRTemp argR  = newTemp(Ity_I32);
   18802       assign( argL, getIRegT(rN) );
   18803       assign( argR, mkU32(uimm8) );
   18804       /* Update flags regardless of whether in an IT block or not. */
   18805       setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
   18806       DIP("cmp r%u, #%u\n", rN, uimm8);
   18807       goto decode_success;
   18808    }
   18809 
   18810    case BITS5(0,0,1,0,0): {
   18811       /* -------------- (T1) MOVS Rn, #uimm8 -------------- */
   18812       UInt   rD    = INSN0(10,8);
   18813       UInt   uimm8 = INSN0(7,0);
   18814       IRTemp oldV  = newTemp(Ity_I32);
   18815       IRTemp oldC  = newTemp(Ity_I32);
   18816       IRTemp res   = newTemp(Ity_I32);
   18817       assign( oldV, mk_armg_calculate_flag_v() );
   18818       assign( oldC, mk_armg_calculate_flag_c() );
   18819       assign( res, mkU32(uimm8) );
   18820       putIRegT(rD, mkexpr(res), condT);
   18821       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   18822                          cond_AND_notInIT_T );
   18823       DIP("movs r%u, #%u\n", rD, uimm8);
   18824       goto decode_success;
   18825    }
   18826 
   18827    case BITS5(0,1,0,0,1): {
   18828       /* ------------- LDR Rd, [PC, #imm8 * 4] ------------- */
   18829       /* LDR Rd, [align4(PC) + imm8 * 4] */
   18830       UInt   rD   = INSN0(10,8);
   18831       UInt   imm8 = INSN0(7,0);
   18832       IRTemp ea   = newTemp(Ity_I32);
   18833 
   18834       assign(ea, binop(Iop_Add32,
   18835                        binop(Iop_And32, getIRegT(15), mkU32(~3U)),
   18836                        mkU32(imm8 * 4)));
   18837       put_ITSTATE(old_itstate); // backout
   18838       IRTemp tD = newTemp(Ity_I32);
   18839       loadGuardedLE( tD, ILGop_Ident32, mkexpr(ea), llGetIReg(rD), condT );
   18840       putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18841       put_ITSTATE(new_itstate); // restore
   18842 
   18843       DIP("ldr r%u, [pc, #%u]\n", rD, imm8 * 4);
   18844       goto decode_success;
   18845    }
   18846 
   18847    case BITS5(0,1,1,0,0):   /* STR */
   18848    case BITS5(0,1,1,0,1): { /* LDR */
   18849       /* ------------- LDR Rd, [Rn, #imm5 * 4] ------------- */
   18850       /* ------------- STR Rd, [Rn, #imm5 * 4] ------------- */
   18851       /* LDR/STR Rd, [Rn + imm5 * 4] */
   18852       UInt    rD   = INSN0(2,0);
   18853       UInt    rN   = INSN0(5,3);
   18854       UInt    imm5 = INSN0(10,6);
   18855       UInt    isLD = INSN0(11,11);
   18856 
   18857       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 4));
   18858       put_ITSTATE(old_itstate); // backout
   18859       if (isLD) {
   18860          IRTemp tD = newTemp(Ity_I32);
   18861          loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
   18862          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18863       } else {
   18864          storeGuardedLE( ea, getIRegT(rD), condT );
   18865       }
   18866       put_ITSTATE(new_itstate); // restore
   18867 
   18868       DIP("%s r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 4);
   18869       goto decode_success;
   18870    }
   18871 
   18872    case BITS5(1,0,0,0,0):   /* STRH */
   18873    case BITS5(1,0,0,0,1): { /* LDRH */
   18874       /* ------------- LDRH Rd, [Rn, #imm5 * 2] ------------- */
   18875       /* ------------- STRH Rd, [Rn, #imm5 * 2] ------------- */
   18876       /* LDRH/STRH Rd, [Rn + imm5 * 2] */
   18877       UInt    rD   = INSN0(2,0);
   18878       UInt    rN   = INSN0(5,3);
   18879       UInt    imm5 = INSN0(10,6);
   18880       UInt    isLD = INSN0(11,11);
   18881 
   18882       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 2));
   18883       put_ITSTATE(old_itstate); // backout
   18884       if (isLD) {
   18885          IRTemp tD = newTemp(Ity_I32);
   18886          loadGuardedLE( tD, ILGop_16Uto32, ea, llGetIReg(rD), condT );
   18887          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18888       } else {
   18889          storeGuardedLE( ea, unop(Iop_32to16, getIRegT(rD)), condT );
   18890       }
   18891       put_ITSTATE(new_itstate); // restore
   18892 
   18893       DIP("%sh r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 2);
   18894       goto decode_success;
   18895    }
   18896 
   18897    case BITS5(0,1,1,1,0):   /* STRB */
   18898    case BITS5(0,1,1,1,1): { /* LDRB */
   18899       /* ------------- LDRB Rd, [Rn, #imm5] ------------- */
   18900       /* ------------- STRB Rd, [Rn, #imm5] ------------- */
   18901       /* LDRB/STRB Rd, [Rn + imm5] */
   18902       UInt    rD   = INSN0(2,0);
   18903       UInt    rN   = INSN0(5,3);
   18904       UInt    imm5 = INSN0(10,6);
   18905       UInt    isLD = INSN0(11,11);
   18906 
   18907       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5));
   18908       put_ITSTATE(old_itstate); // backout
   18909       if (isLD) {
   18910          IRTemp tD = newTemp(Ity_I32);
   18911          loadGuardedLE( tD, ILGop_8Uto32, ea, llGetIReg(rD), condT );
   18912          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18913       } else {
   18914          storeGuardedLE( ea, unop(Iop_32to8, getIRegT(rD)), condT );
   18915       }
   18916       put_ITSTATE(new_itstate); // restore
   18917 
   18918       DIP("%sb r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5);
   18919       goto decode_success;
   18920    }
   18921 
   18922    case BITS5(1,0,0,1,0):   /* STR */
   18923    case BITS5(1,0,0,1,1): { /* LDR */
   18924       /* ------------- LDR Rd, [SP, #imm8 * 4] ------------- */
   18925       /* ------------- STR Rd, [SP, #imm8 * 4] ------------- */
   18926       /* LDR/STR Rd, [SP + imm8 * 4] */
   18927       UInt rD    = INSN0(10,8);
   18928       UInt imm8  = INSN0(7,0);
   18929       UInt isLD  = INSN0(11,11);
   18930 
   18931       IRExpr* ea = binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4));
   18932       put_ITSTATE(old_itstate); // backout
   18933       if (isLD) {
   18934          IRTemp tD = newTemp(Ity_I32);
   18935          loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
   18936          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   18937       } else {
   18938          storeGuardedLE(ea, getIRegT(rD), condT);
   18939       }
   18940       put_ITSTATE(new_itstate); // restore
   18941 
   18942       DIP("%s r%u, [sp, #%u]\n", isLD ? "ldr" : "str", rD, imm8 * 4);
   18943       goto decode_success;
   18944    }
   18945 
   18946    case BITS5(1,1,0,0,1): {
   18947       /* ------------- LDMIA Rn!, {reglist} ------------- */
   18948       Int i, nRegs = 0;
   18949       UInt rN   = INSN0(10,8);
   18950       UInt list = INSN0(7,0);
   18951       /* Empty lists aren't allowed. */
   18952       if (list != 0) {
   18953          mk_skip_over_T16_if_cond_is_false(condT);
   18954          condT = IRTemp_INVALID;
   18955          put_ITSTATE(old_itstate);
   18956          // now uncond
   18957 
   18958          IRTemp oldRn = newTemp(Ity_I32);
   18959          IRTemp base  = newTemp(Ity_I32);
   18960          assign(oldRn, getIRegT(rN));
   18961          assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
   18962          for (i = 0; i < 8; i++) {
   18963             if (0 == (list & (1 << i)))
   18964                continue;
   18965             nRegs++;
   18966             putIRegT(
   18967                i, loadLE(Ity_I32,
   18968                          binop(Iop_Add32, mkexpr(base),
   18969                                           mkU32(nRegs * 4 - 4))),
   18970                IRTemp_INVALID
   18971             );
   18972          }
   18973          /* Only do the writeback for rN if it isn't in the list of
   18974             registers to be transferred. */
   18975          if (0 == (list & (1 << rN))) {
   18976             putIRegT(rN,
   18977                      binop(Iop_Add32, mkexpr(oldRn),
   18978                                       mkU32(nRegs * 4)),
   18979                      IRTemp_INVALID
   18980             );
   18981          }
   18982 
   18983          /* Reinstate the ITSTATE update. */
   18984          put_ITSTATE(new_itstate);
   18985 
   18986          DIP("ldmia r%u!, {0x%04x}\n", rN, list);
   18987          goto decode_success;
   18988       }
   18989       break;
   18990    }
   18991 
   18992    case BITS5(1,1,0,0,0): {
   18993       /* ------------- STMIA Rn!, {reglist} ------------- */
   18994       Int i, nRegs = 0;
   18995       UInt rN   = INSN0(10,8);
   18996       UInt list = INSN0(7,0);
   18997       /* Empty lists aren't allowed.  Also, if rN is in the list then
   18998          it must be the lowest numbered register in the list. */
   18999       Bool valid = list != 0;
   19000       if (valid && 0 != (list & (1 << rN))) {
   19001          for (i = 0; i < rN; i++) {
   19002             if (0 != (list & (1 << i)))
   19003                valid = False;
   19004          }
   19005       }
   19006       if (valid) {
   19007          mk_skip_over_T16_if_cond_is_false(condT);
   19008          condT = IRTemp_INVALID;
   19009          put_ITSTATE(old_itstate);
   19010          // now uncond
   19011 
   19012          IRTemp oldRn = newTemp(Ity_I32);
   19013          IRTemp base = newTemp(Ity_I32);
   19014          assign(oldRn, getIRegT(rN));
   19015          assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
   19016          for (i = 0; i < 8; i++) {
   19017             if (0 == (list & (1 << i)))
   19018                continue;
   19019             nRegs++;
   19020             storeLE( binop(Iop_Add32, mkexpr(base), mkU32(nRegs * 4 - 4)),
   19021                      getIRegT(i) );
   19022          }
   19023          /* Always do the writeback. */
   19024          putIRegT(rN,
   19025                   binop(Iop_Add32, mkexpr(oldRn),
   19026                                    mkU32(nRegs * 4)),
   19027                   IRTemp_INVALID);
   19028 
   19029          /* Reinstate the ITSTATE update. */
   19030          put_ITSTATE(new_itstate);
   19031 
   19032          DIP("stmia r%u!, {0x%04x}\n", rN, list);
   19033          goto decode_success;
   19034       }
   19035       break;
   19036    }
   19037 
   19038    case BITS5(0,0,0,0,0):   /* LSLS */
   19039    case BITS5(0,0,0,0,1):   /* LSRS */
   19040    case BITS5(0,0,0,1,0): { /* ASRS */
   19041       /* ---------------- LSLS Rd, Rm, #imm5 ---------------- */
   19042       /* ---------------- LSRS Rd, Rm, #imm5 ---------------- */
   19043       /* ---------------- ASRS Rd, Rm, #imm5 ---------------- */
   19044       UInt   rD   = INSN0(2,0);
   19045       UInt   rM   = INSN0(5,3);
   19046       UInt   imm5 = INSN0(10,6);
   19047       IRTemp res  = newTemp(Ity_I32);
   19048       IRTemp resC = newTemp(Ity_I32);
   19049       IRTemp rMt  = newTemp(Ity_I32);
   19050       IRTemp oldV = newTemp(Ity_I32);
   19051       const HChar* wot  = "???";
   19052       assign(rMt, getIRegT(rM));
   19053       assign(oldV, mk_armg_calculate_flag_v());
   19054       /* Looks like INSN0(12,11) are the standard 'how' encoding.
   19055          Could compactify if the ROR case later appears. */
   19056       switch (INSN0(15,11)) {
   19057          case BITS5(0,0,0,0,0):
   19058             compute_result_and_C_after_LSL_by_imm5(
   19059                dis_buf, &res, &resC, rMt, imm5, rM
   19060             );
   19061             wot = "lsl";
   19062             break;
   19063          case BITS5(0,0,0,0,1):
   19064             compute_result_and_C_after_LSR_by_imm5(
   19065                dis_buf, &res, &resC, rMt, imm5, rM
   19066             );
   19067             wot = "lsr";
   19068             break;
   19069          case BITS5(0,0,0,1,0):
   19070             compute_result_and_C_after_ASR_by_imm5(
   19071                dis_buf, &res, &resC, rMt, imm5, rM
   19072             );
   19073             wot = "asr";
   19074             break;
   19075          default:
   19076             /*NOTREACHED*/vassert(0);
   19077       }
   19078       // not safe to read guest state after this point
   19079       putIRegT(rD, mkexpr(res), condT);
   19080       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
   19081                          cond_AND_notInIT_T );
   19082       /* ignore buf and roll our own output */
   19083       DIP("%ss r%u, r%u, #%u\n", wot, rD, rM, imm5);
   19084       goto decode_success;
   19085    }
   19086 
   19087    case BITS5(1,1,1,0,0): {
   19088       /* ---------------- B #simm11 ---------------- */
   19089       Int  simm11 = INSN0(10,0);
   19090            simm11 = (simm11 << 21) >> 20;
   19091       UInt dst    = simm11 + guest_R15_curr_instr_notENC + 4;
   19092       /* Only allowed outside or last-in IT block; SIGILL if not so. */
   19093       gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   19094       // and skip this insn if not selected; being cleverer is too
   19095       // difficult
   19096       mk_skip_over_T16_if_cond_is_false(condT);
   19097       condT = IRTemp_INVALID;
   19098       // now uncond
   19099       llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
   19100       dres.jk_StopHere = Ijk_Boring;
   19101       dres.whatNext    = Dis_StopHere;
   19102       DIP("b 0x%x\n", dst);
   19103       goto decode_success;
   19104    }
   19105 
   19106    default:
   19107       break; /* examine the next shortest prefix */
   19108 
   19109    }
   19110 
   19111 
   19112    /* ================ 16-bit 15:12 cases ================ */
   19113 
   19114    switch (INSN0(15,12)) {
   19115 
   19116    case BITS4(1,1,0,1): {
   19117       /* ---------------- Bcond #simm8 ---------------- */
   19118       UInt cond  = INSN0(11,8);
   19119       Int  simm8 = INSN0(7,0);
   19120            simm8 = (simm8 << 24) >> 23;
   19121       UInt dst   = simm8 + guest_R15_curr_instr_notENC + 4;
   19122       if (cond != ARMCondAL && cond != ARMCondNV) {
   19123          /* Not allowed in an IT block; SIGILL if so. */
   19124          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   19125 
   19126          IRTemp kondT = newTemp(Ity_I32);
   19127          assign( kondT, mk_armg_calculate_condition(cond) );
   19128          stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
   19129                             Ijk_Boring,
   19130                             IRConst_U32(dst | 1/*CPSR.T*/),
   19131                             OFFB_R15T ));
   19132          llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2)
   19133                               | 1 /*CPSR.T*/ ));
   19134          dres.jk_StopHere = Ijk_Boring;
   19135          dres.whatNext    = Dis_StopHere;
   19136          DIP("b%s 0x%x\n", nCC(cond), dst);
   19137          goto decode_success;
   19138       }
   19139       break;
   19140    }
   19141 
   19142    default:
   19143       break; /* hmm, nothing matched */
   19144 
   19145    }
   19146 
   19147    /* ================ 16-bit misc cases ================ */
   19148 
   19149    switch (INSN0(15,0)) {
   19150       case 0xBF00:
   19151          /* ------ NOP ------ */
   19152          DIP("nop\n");
   19153          goto decode_success;
   19154       case 0xBF20:
   19155          /* ------ WFE ------ */
   19156          /* WFE gets used as a spin-loop hint.  Do the usual thing,
   19157             which is to continue after yielding. */
   19158          stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
   19159                             Ijk_Yield,
   19160                             IRConst_U32((guest_R15_curr_instr_notENC + 2)
   19161                                         | 1 /*CPSR.T*/),
   19162                             OFFB_R15T ));
   19163          DIP("wfe\n");
   19164          goto decode_success;
   19165       case 0xBF40:
   19166          /* ------ SEV ------ */
   19167          /* Treat this as a no-op.  Any matching WFEs won't really
   19168             cause the host CPU to snooze; they just cause V to try to
   19169             run some other thread for a while.  So there's no point in
   19170             really doing anything for SEV. */
   19171          DIP("sev\n");
   19172          goto decode_success;
   19173       default:
   19174          break; /* fall through */
   19175    }
   19176 
   19177    /* ----------------------------------------------------------- */
   19178    /* --                                                       -- */
   19179    /* -- Thumb 32-bit integer instructions                     -- */
   19180    /* --                                                       -- */
   19181    /* ----------------------------------------------------------- */
   19182 
   19183 #  define INSN1(_bMax,_bMin)  SLICE_UInt(((UInt)insn1), (_bMax), (_bMin))
   19184 
   19185    /* second 16 bits of the instruction, if any */
   19186    vassert(insn1 == 0);
   19187    insn1 = getUShortLittleEndianly( guest_instr+2 );
   19188 
   19189    anOp   = Iop_INVALID; /* paranoia */
   19190    anOpNm = NULL;        /* paranoia */
   19191 
   19192    /* Change result defaults to suit 32-bit insns. */
   19193    vassert(dres.whatNext   == Dis_Continue);
   19194    vassert(dres.len        == 2);
   19195    vassert(dres.continueAt == 0);
   19196    dres.len = 4;
   19197 
   19198    /* ---------------- BL/BLX simm26 ---------------- */
   19199    if (BITS5(1,1,1,1,0) == INSN0(15,11) && BITS2(1,1) == INSN1(15,14)) {
   19200       UInt isBL = INSN1(12,12);
   19201       UInt bS   = INSN0(10,10);
   19202       UInt bJ1  = INSN1(13,13);
   19203       UInt bJ2  = INSN1(11,11);
   19204       UInt bI1  = 1 ^ (bJ1 ^ bS);
   19205       UInt bI2  = 1 ^ (bJ2 ^ bS);
   19206       Int simm25
   19207          =   (bS          << (1 + 1 + 10 + 11 + 1))
   19208            | (bI1         << (1 + 10 + 11 + 1))
   19209            | (bI2         << (10 + 11 + 1))
   19210            | (INSN0(9,0)  << (11 + 1))
   19211            | (INSN1(10,0) << 1);
   19212       simm25 = (simm25 << 7) >> 7;
   19213 
   19214       vassert(0 == (guest_R15_curr_instr_notENC & 1));
   19215       UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
   19216 
   19217       /* One further validity case to check: in the case of BLX
   19218          (not-BL), that insn1[0] must be zero. */
   19219       Bool valid = True;
   19220       if (isBL == 0 && INSN1(0,0) == 1) valid = False;
   19221       if (valid) {
   19222          /* Only allowed outside or last-in IT block; SIGILL if not so. */
   19223          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   19224          // and skip this insn if not selected; being cleverer is too
   19225          // difficult
   19226          mk_skip_over_T32_if_cond_is_false(condT);
   19227          condT = IRTemp_INVALID;
   19228          // now uncond
   19229 
   19230          /* We're returning to Thumb code, hence "| 1" */
   19231          putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 4) | 1 ),
   19232                    IRTemp_INVALID);
   19233          if (isBL) {
   19234             /* BL: unconditional T -> T call */
   19235             /* we're calling Thumb code, hence "| 1" */
   19236             llPutIReg(15, mkU32( dst | 1 ));
   19237             DIP("bl 0x%x (stay in Thumb mode)\n", dst);
   19238          } else {
   19239             /* BLX: unconditional T -> A call */
   19240             /* we're calling ARM code, hence "& 3" to align to a
   19241                valid ARM insn address */
   19242             llPutIReg(15, mkU32( dst & ~3 ));
   19243             DIP("blx 0x%x (switch to ARM mode)\n", dst & ~3);
   19244          }
   19245          dres.whatNext    = Dis_StopHere;
   19246          dres.jk_StopHere = Ijk_Call;
   19247          goto decode_success;
   19248       }
   19249    }
   19250 
   19251    /* ---------------- {LD,ST}M{IA,DB} ---------------- */
   19252    if (0x3a2 == INSN0(15,6) // {LD,ST}MIA
   19253        || 0x3a4 == INSN0(15,6)) { // {LD,ST}MDB
   19254       UInt bW      = INSN0(5,5); /* writeback Rn ? */
   19255       UInt bL      = INSN0(4,4);
   19256       UInt rN      = INSN0(3,0);
   19257       UInt bP      = INSN1(15,15); /* reglist entry for r15 */
   19258       UInt bM      = INSN1(14,14); /* reglist entry for r14 */
   19259       UInt rLmost  = INSN1(12,0);  /* reglist entry for r0 .. 12 */
   19260       UInt rL13    = INSN1(13,13); /* must be zero */
   19261       UInt regList = 0;
   19262       Bool valid   = True;
   19263 
   19264       UInt bINC    = 1;
   19265       UInt bBEFORE = 0;
   19266       if (INSN0(15,6) == 0x3a4) {
   19267          bINC    = 0;
   19268          bBEFORE = 1;
   19269       }
   19270 
   19271       /* detect statically invalid cases, and construct the final
   19272          reglist */
   19273       if (rL13 == 1)
   19274          valid = False;
   19275 
   19276       if (bL == 1) {
   19277          regList = (bP << 15) | (bM << 14) | rLmost;
   19278          if (rN == 15)                       valid = False;
   19279          if (popcount32(regList) < 2)        valid = False;
   19280          if (bP == 1 && bM == 1)             valid = False;
   19281          if (bW == 1 && (regList & (1<<rN))) valid = False;
   19282       } else {
   19283          regList = (bM << 14) | rLmost;
   19284          if (bP == 1)                        valid = False;
   19285          if (rN == 15)                       valid = False;
   19286          if (popcount32(regList) < 2)        valid = False;
   19287          if (bW == 1 && (regList & (1<<rN))) valid = False;
   19288       }
   19289 
   19290       if (valid) {
   19291          if (bL == 1 && bP == 1) {
   19292             // We'll be writing the PC.  Hence:
   19293             /* Only allowed outside or last-in IT block; SIGILL if not so. */
   19294             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   19295          }
   19296 
   19297          /* Go uncond: */
   19298          mk_skip_over_T32_if_cond_is_false(condT);
   19299          condT = IRTemp_INVALID;
   19300          // now uncond
   19301 
   19302          /* Generate the IR.  This might generate a write to R15. */
   19303          mk_ldm_stm(False/*!arm*/, rN, bINC, bBEFORE, bW, bL, regList);
   19304 
   19305          if (bL == 1 && (regList & (1<<15))) {
   19306             // If we wrote to R15, we have an interworking return to
   19307             // deal with.
   19308             llPutIReg(15, llGetIReg(15));
   19309             dres.jk_StopHere = Ijk_Ret;
   19310             dres.whatNext    = Dis_StopHere;
   19311          }
   19312 
   19313          DIP("%sm%c%c r%u%s, {0x%04x}\n",
   19314               bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
   19315               rN, bW ? "!" : "", regList);
   19316 
   19317          goto decode_success;
   19318       }
   19319    }
   19320 
   19321    /* -------------- (T3) ADD{S}.W Rd, Rn, #constT -------------- */
   19322    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19323        && INSN0(9,5) == BITS5(0,1,0,0,0)
   19324        && INSN1(15,15) == 0) {
   19325       UInt bS = INSN0(4,4);
   19326       UInt rN = INSN0(3,0);
   19327       UInt rD = INSN1(11,8);
   19328       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
   19329       /* but allow "add.w reg, sp, #constT" for reg != PC */
   19330       if (!valid && rD <= 14 && rN == 13)
   19331          valid = True;
   19332       if (valid) {
   19333          IRTemp argL  = newTemp(Ity_I32);
   19334          IRTemp argR  = newTemp(Ity_I32);
   19335          IRTemp res   = newTemp(Ity_I32);
   19336          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
   19337          assign(argL, getIRegT(rN));
   19338          assign(argR, mkU32(imm32));
   19339          assign(res,  binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
   19340          putIRegT(rD, mkexpr(res), condT);
   19341          if (bS == 1)
   19342             setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
   19343          DIP("add%s.w r%u, r%u, #%u\n",
   19344              bS == 1 ? "s" : "", rD, rN, imm32);
   19345          goto decode_success;
   19346       }
   19347    }
   19348 
   19349    /* ---------------- (T4) ADDW Rd, Rn, #uimm12 -------------- */
   19350    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19351        && INSN0(9,4) == BITS6(1,0,0,0,0,0)
   19352        && INSN1(15,15) == 0) {
   19353       UInt rN = INSN0(3,0);
   19354       UInt rD = INSN1(11,8);
   19355       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
   19356       /* but allow "addw reg, sp, #uimm12" for reg != PC */
   19357       if (!valid && rD <= 14 && rN == 13)
   19358          valid = True;
   19359       if (valid) {
   19360          IRTemp argL = newTemp(Ity_I32);
   19361          IRTemp argR = newTemp(Ity_I32);
   19362          IRTemp res  = newTemp(Ity_I32);
   19363          UInt imm12  = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
   19364          assign(argL, getIRegT(rN));
   19365          assign(argR, mkU32(imm12));
   19366          assign(res,  binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
   19367          putIRegT(rD, mkexpr(res), condT);
   19368          DIP("addw r%u, r%u, #%u\n", rD, rN, imm12);
   19369          goto decode_success;
   19370       }
   19371    }
   19372 
   19373    /* ---------------- (T2) CMP.W Rn, #constT ---------------- */
   19374    /* ---------------- (T2) CMN.W Rn, #constT ---------------- */
   19375    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19376        && (   INSN0(9,4) == BITS6(0,1,1,0,1,1)  // CMP
   19377            || INSN0(9,4) == BITS6(0,1,0,0,0,1)) // CMN
   19378        && INSN1(15,15) == 0
   19379        && INSN1(11,8) == BITS4(1,1,1,1)) {
   19380       UInt rN = INSN0(3,0);
   19381       if (rN != 15) {
   19382          IRTemp argL  = newTemp(Ity_I32);
   19383          IRTemp argR  = newTemp(Ity_I32);
   19384          Bool   isCMN = INSN0(9,4) == BITS6(0,1,0,0,0,1);
   19385          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
   19386          assign(argL, getIRegT(rN));
   19387          assign(argR, mkU32(imm32));
   19388          setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
   19389                          argL, argR, condT );
   19390          DIP("%s.w r%u, #%u\n", isCMN ? "cmn" : "cmp", rN, imm32);
   19391          goto decode_success;
   19392       }
   19393    }
   19394 
   19395    /* -------------- (T1) TST.W Rn, #constT -------------- */
   19396    /* -------------- (T1) TEQ.W Rn, #constT -------------- */
   19397    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19398        && (   INSN0(9,4) == BITS6(0,0,0,0,0,1)  // TST
   19399            || INSN0(9,4) == BITS6(0,0,1,0,0,1)) // TEQ
   19400        && INSN1(15,15) == 0
   19401        && INSN1(11,8) == BITS4(1,1,1,1)) {
   19402       UInt rN = INSN0(3,0);
   19403       if (!isBadRegT(rN)) { // yes, really, it's inconsistent with CMP.W
   19404          Bool  isTST  = INSN0(9,4) == BITS6(0,0,0,0,0,1);
   19405          IRTemp argL  = newTemp(Ity_I32);
   19406          IRTemp argR  = newTemp(Ity_I32);
   19407          IRTemp res   = newTemp(Ity_I32);
   19408          IRTemp oldV  = newTemp(Ity_I32);
   19409          IRTemp oldC  = newTemp(Ity_I32);
   19410          Bool   updC  = False;
   19411          UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
   19412          assign(argL, getIRegT(rN));
   19413          assign(argR, mkU32(imm32));
   19414          assign(res,  binop(isTST ? Iop_And32 : Iop_Xor32,
   19415                             mkexpr(argL), mkexpr(argR)));
   19416          assign( oldV, mk_armg_calculate_flag_v() );
   19417          assign( oldC, updC
   19418                        ? mkU32((imm32 >> 31) & 1)
   19419                        : mk_armg_calculate_flag_c() );
   19420          setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
   19421          DIP("%s.w r%u, #%u\n", isTST ? "tst" : "teq", rN, imm32);
   19422          goto decode_success;
   19423       }
   19424    }
   19425 
   19426    /* -------------- (T3) SUB{S}.W Rd, Rn, #constT -------------- */
   19427    /* -------------- (T3) RSB{S}.W Rd, Rn, #constT -------------- */
   19428    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19429        && (INSN0(9,5) == BITS5(0,1,1,0,1) // SUB
   19430            || INSN0(9,5) == BITS5(0,1,1,1,0)) // RSB
   19431        && INSN1(15,15) == 0) {
   19432       Bool isRSB = INSN0(9,5) == BITS5(0,1,1,1,0);
   19433       UInt bS    = INSN0(4,4);
   19434       UInt rN    = INSN0(3,0);
   19435       UInt rD    = INSN1(11,8);
   19436       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
   19437       /* but allow "sub{s}.w reg, sp, #constT
   19438          this is (T2) of "SUB (SP minus immediate)" */
   19439       if (!valid && !isRSB && rN == 13 && rD != 15)
   19440          valid = True;
   19441       if (valid) {
   19442          IRTemp argL  = newTemp(Ity_I32);
   19443          IRTemp argR  = newTemp(Ity_I32);
   19444          IRTemp res   = newTemp(Ity_I32);
   19445          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
   19446          assign(argL, getIRegT(rN));
   19447          assign(argR, mkU32(imm32));
   19448          assign(res,  isRSB
   19449                       ? binop(Iop_Sub32, mkexpr(argR), mkexpr(argL))
   19450                       : binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
   19451          putIRegT(rD, mkexpr(res), condT);
   19452          if (bS == 1) {
   19453             if (isRSB)
   19454                setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
   19455             else
   19456                setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
   19457          }
   19458          DIP("%s%s.w r%u, r%u, #%u\n",
   19459              isRSB ? "rsb" : "sub", bS == 1 ? "s" : "", rD, rN, imm32);
   19460          goto decode_success;
   19461       }
   19462    }
   19463 
   19464    /* -------------- (T4) SUBW Rd, Rn, #uimm12 ------------------- */
   19465    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19466        && INSN0(9,4) == BITS6(1,0,1,0,1,0)
   19467        && INSN1(15,15) == 0) {
   19468       UInt rN = INSN0(3,0);
   19469       UInt rD = INSN1(11,8);
   19470       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
   19471       /* but allow "subw sp, sp, #uimm12" */
   19472       if (!valid && rD == 13 && rN == 13)
   19473          valid = True;
   19474       if (valid) {
   19475          IRTemp argL  = newTemp(Ity_I32);
   19476          IRTemp argR  = newTemp(Ity_I32);
   19477          IRTemp res   = newTemp(Ity_I32);
   19478          UInt imm12   = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
   19479          assign(argL, getIRegT(rN));
   19480          assign(argR, mkU32(imm12));
   19481          assign(res,  binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
   19482          putIRegT(rD, mkexpr(res), condT);
   19483          DIP("subw r%u, r%u, #%u\n", rD, rN, imm12);
   19484          goto decode_success;
   19485       }
   19486    }
   19487 
   19488    /* -------------- (T1) ADC{S}.W Rd, Rn, #constT -------------- */
   19489    /* -------------- (T1) SBC{S}.W Rd, Rn, #constT -------------- */
   19490    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19491        && (   INSN0(9,5) == BITS5(0,1,0,1,0)  // ADC
   19492            || INSN0(9,5) == BITS5(0,1,0,1,1)) // SBC
   19493        && INSN1(15,15) == 0) {
   19494       /* ADC:  Rd = Rn + constT + oldC */
   19495       /* SBC:  Rd = Rn - constT - (oldC ^ 1) */
   19496       UInt bS    = INSN0(4,4);
   19497       UInt rN    = INSN0(3,0);
   19498       UInt rD    = INSN1(11,8);
   19499       if (!isBadRegT(rN) && !isBadRegT(rD)) {
   19500          IRTemp argL  = newTemp(Ity_I32);
   19501          IRTemp argR  = newTemp(Ity_I32);
   19502          IRTemp res   = newTemp(Ity_I32);
   19503          IRTemp oldC  = newTemp(Ity_I32);
   19504          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
   19505          assign(argL, getIRegT(rN));
   19506          assign(argR, mkU32(imm32));
   19507          assign(oldC, mk_armg_calculate_flag_c() );
   19508          const HChar* nm  = "???";
   19509          switch (INSN0(9,5)) {
   19510             case BITS5(0,1,0,1,0): // ADC
   19511                nm = "adc";
   19512                assign(res,
   19513                       binop(Iop_Add32,
   19514                             binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
   19515                             mkexpr(oldC) ));
   19516                putIRegT(rD, mkexpr(res), condT);
   19517                if (bS)
   19518                   setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
   19519                                      argL, argR, oldC, condT );
   19520                break;
   19521             case BITS5(0,1,0,1,1): // SBC
   19522                nm = "sbc";
   19523                assign(res,
   19524                       binop(Iop_Sub32,
   19525                             binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
   19526                             binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
   19527                putIRegT(rD, mkexpr(res), condT);
   19528                if (bS)
   19529                   setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
   19530                                      argL, argR, oldC, condT );
   19531                break;
   19532             default:
   19533               vassert(0);
   19534          }
   19535          DIP("%s%s.w r%u, r%u, #%u\n",
   19536              nm, bS == 1 ? "s" : "", rD, rN, imm32);
   19537          goto decode_success;
   19538       }
   19539    }
   19540 
   19541    /* -------------- (T1) ORR{S}.W Rd, Rn, #constT -------------- */
   19542    /* -------------- (T1) AND{S}.W Rd, Rn, #constT -------------- */
   19543    /* -------------- (T1) BIC{S}.W Rd, Rn, #constT -------------- */
   19544    /* -------------- (T1) EOR{S}.W Rd, Rn, #constT -------------- */
   19545    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19546        && (   INSN0(9,5) == BITS5(0,0,0,1,0)  // ORR
   19547            || INSN0(9,5) == BITS5(0,0,0,0,0)  // AND
   19548            || INSN0(9,5) == BITS5(0,0,0,0,1)  // BIC
   19549            || INSN0(9,5) == BITS5(0,0,1,0,0)  // EOR
   19550            || INSN0(9,5) == BITS5(0,0,0,1,1)) // ORN
   19551        && INSN1(15,15) == 0) {
   19552       UInt bS = INSN0(4,4);
   19553       UInt rN = INSN0(3,0);
   19554       UInt rD = INSN1(11,8);
   19555       if (!isBadRegT(rN) && !isBadRegT(rD)) {
   19556          Bool   notArgR = False;
   19557          IROp   op      = Iop_INVALID;
   19558          const HChar* nm = "???";
   19559          switch (INSN0(9,5)) {
   19560             case BITS5(0,0,0,1,0): op = Iop_Or32;  nm = "orr"; break;
   19561             case BITS5(0,0,0,0,0): op = Iop_And32; nm = "and"; break;
   19562             case BITS5(0,0,0,0,1): op = Iop_And32; nm = "bic";
   19563                                    notArgR = True; break;
   19564             case BITS5(0,0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
   19565             case BITS5(0,0,0,1,1): op = Iop_Or32;  nm = "orn";
   19566                                    notArgR = True; break;
   19567             default: vassert(0);
   19568          }
   19569          IRTemp argL  = newTemp(Ity_I32);
   19570          IRTemp argR  = newTemp(Ity_I32);
   19571          IRTemp res   = newTemp(Ity_I32);
   19572          Bool   updC  = False;
   19573          UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
   19574          assign(argL, getIRegT(rN));
   19575          assign(argR, mkU32(notArgR ? ~imm32 : imm32));
   19576          assign(res,  binop(op, mkexpr(argL), mkexpr(argR)));
   19577          putIRegT(rD, mkexpr(res), condT);
   19578          if (bS) {
   19579             IRTemp oldV = newTemp(Ity_I32);
   19580             IRTemp oldC = newTemp(Ity_I32);
   19581             assign( oldV, mk_armg_calculate_flag_v() );
   19582             assign( oldC, updC
   19583                           ? mkU32((imm32 >> 31) & 1)
   19584                           : mk_armg_calculate_flag_c() );
   19585             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   19586                                condT );
   19587          }
   19588          DIP("%s%s.w r%u, r%u, #%u\n",
   19589              nm, bS == 1 ? "s" : "", rD, rN, imm32);
   19590          goto decode_success;
   19591       }
   19592    }
   19593 
   19594    /* ---------- (T3) ADD{S}.W Rd, Rn, Rm, {shift} ---------- */
   19595    /* ---------- (T3) SUB{S}.W Rd, Rn, Rm, {shift} ---------- */
   19596    /* ---------- (T3) RSB{S}.W Rd, Rn, Rm, {shift} ---------- */
   19597    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
   19598        && (   INSN0(8,5) == BITS4(1,0,0,0)  // add subopc
   19599            || INSN0(8,5) == BITS4(1,1,0,1)  // sub subopc
   19600            || INSN0(8,5) == BITS4(1,1,1,0)) // rsb subopc
   19601        && INSN1(15,15) == 0) {
   19602       UInt rN   = INSN0(3,0);
   19603       UInt rD   = INSN1(11,8);
   19604       UInt rM   = INSN1(3,0);
   19605       UInt bS   = INSN0(4,4);
   19606       UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
   19607       UInt how  = INSN1(5,4);
   19608 
   19609       Bool valid = !isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM);
   19610       /* but allow "add.w reg, sp, reg, lsl #N for N=0,1,2 or 3
   19611          (T3) "ADD (SP plus register) */
   19612       if (!valid && INSN0(8,5) == BITS4(1,0,0,0) // add
   19613           && rD != 15 && rN == 13 && imm5 <= 3 && how == 0) {
   19614          valid = True;
   19615       }
   19616       /* also allow "sub.w reg, sp, reg   w/ no shift
   19617          (T1) "SUB (SP minus register) */
   19618       if (!valid && INSN0(8,5) == BITS4(1,1,0,1) // sub
   19619           && rD != 15 && rN == 13 && imm5 == 0 && how == 0) {
   19620          valid = True;
   19621       }
   19622       if (valid) {
   19623          Bool   swap = False;
   19624          IROp   op   = Iop_INVALID;
   19625          const HChar* nm = "???";
   19626          switch (INSN0(8,5)) {
   19627             case BITS4(1,0,0,0): op = Iop_Add32; nm = "add"; break;
   19628             case BITS4(1,1,0,1): op = Iop_Sub32; nm = "sub"; break;
   19629             case BITS4(1,1,1,0): op = Iop_Sub32; nm = "rsb";
   19630                                  swap = True; break;
   19631             default: vassert(0);
   19632          }
   19633 
   19634          IRTemp argL = newTemp(Ity_I32);
   19635          assign(argL, getIRegT(rN));
   19636 
   19637          IRTemp rMt = newTemp(Ity_I32);
   19638          assign(rMt, getIRegT(rM));
   19639 
   19640          IRTemp argR = newTemp(Ity_I32);
   19641          compute_result_and_C_after_shift_by_imm5(
   19642             dis_buf, &argR, NULL, rMt, how, imm5, rM
   19643          );
   19644 
   19645          IRTemp res = newTemp(Ity_I32);
   19646          assign(res, swap
   19647                      ? binop(op, mkexpr(argR), mkexpr(argL))
   19648                      : binop(op, mkexpr(argL), mkexpr(argR)));
   19649 
   19650          putIRegT(rD, mkexpr(res), condT);
   19651          if (bS) {
   19652             switch (op) {
   19653                case Iop_Add32:
   19654                   setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
   19655                   break;
   19656                case Iop_Sub32:
   19657                   if (swap)
   19658                      setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
   19659                   else
   19660                      setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
   19661                   break;
   19662                default:
   19663                   vassert(0);
   19664             }
   19665          }
   19666 
   19667          DIP("%s%s.w r%u, r%u, %s\n",
   19668              nm, bS ? "s" : "", rD, rN, dis_buf);
   19669          goto decode_success;
   19670       }
   19671    }
   19672 
   19673    /* ---------- (T3) ADC{S}.W Rd, Rn, Rm, {shift} ---------- */
   19674    /* ---------- (T2) SBC{S}.W Rd, Rn, Rm, {shift} ---------- */
   19675    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
   19676        && (   INSN0(8,5) == BITS4(1,0,1,0)   // adc subopc
   19677            || INSN0(8,5) == BITS4(1,0,1,1))  // sbc subopc
   19678        && INSN1(15,15) == 0) {
   19679       /* ADC:  Rd = Rn + shifter_operand + oldC */
   19680       /* SBC:  Rd = Rn - shifter_operand - (oldC ^ 1) */
   19681       UInt rN = INSN0(3,0);
   19682       UInt rD = INSN1(11,8);
   19683       UInt rM = INSN1(3,0);
   19684       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   19685          UInt bS   = INSN0(4,4);
   19686          UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
   19687          UInt how  = INSN1(5,4);
   19688 
   19689          IRTemp argL = newTemp(Ity_I32);
   19690          assign(argL, getIRegT(rN));
   19691 
   19692          IRTemp rMt = newTemp(Ity_I32);
   19693          assign(rMt, getIRegT(rM));
   19694 
   19695          IRTemp oldC = newTemp(Ity_I32);
   19696          assign(oldC, mk_armg_calculate_flag_c());
   19697 
   19698          IRTemp argR = newTemp(Ity_I32);
   19699          compute_result_and_C_after_shift_by_imm5(
   19700             dis_buf, &argR, NULL, rMt, how, imm5, rM
   19701          );
   19702 
   19703          const HChar* nm  = "???";
   19704          IRTemp res = newTemp(Ity_I32);
   19705          switch (INSN0(8,5)) {
   19706             case BITS4(1,0,1,0): // ADC
   19707                nm = "adc";
   19708                assign(res,
   19709                       binop(Iop_Add32,
   19710                             binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
   19711                             mkexpr(oldC) ));
   19712                putIRegT(rD, mkexpr(res), condT);
   19713                if (bS)
   19714                   setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
   19715                                      argL, argR, oldC, condT );
   19716                break;
   19717             case BITS4(1,0,1,1): // SBC
   19718                nm = "sbc";
   19719                assign(res,
   19720                       binop(Iop_Sub32,
   19721                             binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
   19722                             binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
   19723                putIRegT(rD, mkexpr(res), condT);
   19724                if (bS)
   19725                   setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
   19726                                      argL, argR, oldC, condT );
   19727                break;
   19728             default:
   19729                vassert(0);
   19730          }
   19731 
   19732          DIP("%s%s.w r%u, r%u, %s\n",
   19733              nm, bS ? "s" : "", rD, rN, dis_buf);
   19734          goto decode_success;
   19735       }
   19736    }
   19737 
   19738    /* ---------- (T3) AND{S}.W Rd, Rn, Rm, {shift} ---------- */
   19739    /* ---------- (T3) ORR{S}.W Rd, Rn, Rm, {shift} ---------- */
   19740    /* ---------- (T3) EOR{S}.W Rd, Rn, Rm, {shift} ---------- */
   19741    /* ---------- (T3) BIC{S}.W Rd, Rn, Rm, {shift} ---------- */
   19742    /* ---------- (T1) ORN{S}.W Rd, Rn, Rm, {shift} ---------- */
   19743    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
   19744        && (   INSN0(8,5) == BITS4(0,0,0,0)  // and subopc
   19745            || INSN0(8,5) == BITS4(0,0,1,0)  // orr subopc
   19746            || INSN0(8,5) == BITS4(0,1,0,0)  // eor subopc
   19747            || INSN0(8,5) == BITS4(0,0,0,1)  // bic subopc
   19748            || INSN0(8,5) == BITS4(0,0,1,1)) // orn subopc
   19749        && INSN1(15,15) == 0) {
   19750       UInt rN = INSN0(3,0);
   19751       UInt rD = INSN1(11,8);
   19752       UInt rM = INSN1(3,0);
   19753       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   19754          Bool notArgR = False;
   19755          IROp op      = Iop_INVALID;
   19756          const HChar* nm  = "???";
   19757          switch (INSN0(8,5)) {
   19758             case BITS4(0,0,0,0): op = Iop_And32; nm = "and"; break;
   19759             case BITS4(0,0,1,0): op = Iop_Or32;  nm = "orr"; break;
   19760             case BITS4(0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
   19761             case BITS4(0,0,0,1): op = Iop_And32; nm = "bic";
   19762                                  notArgR = True; break;
   19763             case BITS4(0,0,1,1): op = Iop_Or32; nm = "orn";
   19764                                  notArgR = True; break;
   19765             default: vassert(0);
   19766          }
   19767          UInt bS   = INSN0(4,4);
   19768          UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
   19769          UInt how  = INSN1(5,4);
   19770 
   19771          IRTemp rNt = newTemp(Ity_I32);
   19772          assign(rNt, getIRegT(rN));
   19773 
   19774          IRTemp rMt = newTemp(Ity_I32);
   19775          assign(rMt, getIRegT(rM));
   19776 
   19777          IRTemp argR = newTemp(Ity_I32);
   19778          IRTemp oldC = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
   19779 
   19780          compute_result_and_C_after_shift_by_imm5(
   19781             dis_buf, &argR, bS ? &oldC : NULL, rMt, how, imm5, rM
   19782          );
   19783 
   19784          IRTemp res = newTemp(Ity_I32);
   19785          if (notArgR) {
   19786             vassert(op == Iop_And32 || op == Iop_Or32);
   19787             assign(res, binop(op, mkexpr(rNt),
   19788                                   unop(Iop_Not32, mkexpr(argR))));
   19789          } else {
   19790             assign(res, binop(op, mkexpr(rNt), mkexpr(argR)));
   19791          }
   19792 
   19793          putIRegT(rD, mkexpr(res), condT);
   19794          if (bS) {
   19795             IRTemp oldV = newTemp(Ity_I32);
   19796             assign( oldV, mk_armg_calculate_flag_v() );
   19797             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   19798                                condT );
   19799          }
   19800 
   19801          DIP("%s%s.w r%u, r%u, %s\n",
   19802              nm, bS ? "s" : "", rD, rN, dis_buf);
   19803          goto decode_success;
   19804       }
   19805    }
   19806 
   19807    /* -------------- (T?) LSL{S}.W Rd, Rn, Rm -------------- */
   19808    /* -------------- (T?) LSR{S}.W Rd, Rn, Rm -------------- */
   19809    /* -------------- (T?) ASR{S}.W Rd, Rn, Rm -------------- */
   19810    /* -------------- (T?) ROR{S}.W Rd, Rn, Rm -------------- */
   19811    if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,0,0)
   19812        && INSN1(15,12) == BITS4(1,1,1,1)
   19813        && INSN1(7,4) == BITS4(0,0,0,0)) {
   19814       UInt how = INSN0(6,5); // standard encoding
   19815       UInt rN  = INSN0(3,0);
   19816       UInt rD  = INSN1(11,8);
   19817       UInt rM  = INSN1(3,0);
   19818       UInt bS  = INSN0(4,4);
   19819       Bool valid = !isBadRegT(rN) && !isBadRegT(rM) && !isBadRegT(rD);
   19820       if (valid) {
   19821          IRTemp rNt    = newTemp(Ity_I32);
   19822          IRTemp rMt    = newTemp(Ity_I32);
   19823          IRTemp res    = newTemp(Ity_I32);
   19824          IRTemp oldC   = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
   19825          IRTemp oldV   = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
   19826          const HChar* nms[4] = { "lsl", "lsr", "asr", "ror" };
   19827          const HChar* nm     = nms[how];
   19828          assign(rNt, getIRegT(rN));
   19829          assign(rMt, getIRegT(rM));
   19830          compute_result_and_C_after_shift_by_reg(
   19831             dis_buf, &res, bS ? &oldC : NULL,
   19832             rNt, how, rMt, rN, rM
   19833          );
   19834          if (bS)
   19835             assign(oldV, mk_armg_calculate_flag_v());
   19836          putIRegT(rD, mkexpr(res), condT);
   19837          if (bS) {
   19838             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   19839                                condT );
   19840          }
   19841          DIP("%s%s.w r%u, r%u, r%u\n",
   19842              nm, bS ? "s" : "", rD, rN, rM);
   19843          goto decode_success;
   19844       }
   19845    }
   19846 
   19847    /* ------------ (T?) MOV{S}.W Rd, Rn, {shift} ------------ */
   19848    /* ------------ (T?) MVN{S}.W Rd, Rn, {shift} ------------ */
   19849    if ((INSN0(15,0) & 0xFFCF) == 0xEA4F
   19850        && INSN1(15,15) == 0) {
   19851       UInt rD = INSN1(11,8);
   19852       UInt rN = INSN1(3,0);
   19853       if (!isBadRegT(rD) && !isBadRegT(rN)) {
   19854          UInt bS    = INSN0(4,4);
   19855          UInt isMVN = INSN0(5,5);
   19856          UInt imm5  = (INSN1(14,12) << 2) | INSN1(7,6);
   19857          UInt how   = INSN1(5,4);
   19858 
   19859          IRTemp rNt = newTemp(Ity_I32);
   19860          assign(rNt, getIRegT(rN));
   19861 
   19862          IRTemp oldRn = newTemp(Ity_I32);
   19863          IRTemp oldC  = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
   19864          compute_result_and_C_after_shift_by_imm5(
   19865             dis_buf, &oldRn, bS ? &oldC : NULL, rNt, how, imm5, rN
   19866          );
   19867 
   19868          IRTemp res = newTemp(Ity_I32);
   19869          assign(res, isMVN ? unop(Iop_Not32, mkexpr(oldRn))
   19870                            : mkexpr(oldRn));
   19871 
   19872          putIRegT(rD, mkexpr(res), condT);
   19873          if (bS) {
   19874             IRTemp oldV = newTemp(Ity_I32);
   19875             assign( oldV, mk_armg_calculate_flag_v() );
   19876             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT);
   19877          }
   19878          DIP("%s%s.w r%u, %s\n",
   19879              isMVN ? "mvn" : "mov", bS ? "s" : "", rD, dis_buf);
   19880          goto decode_success;
   19881       }
   19882    }
   19883 
   19884    /* -------------- (T?) TST.W Rn, Rm, {shift} -------------- */
   19885    /* -------------- (T?) TEQ.W Rn, Rm, {shift} -------------- */
   19886    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
   19887        && (   INSN0(8,4) == BITS5(0,0,0,0,1)  // TST
   19888            || INSN0(8,4) == BITS5(0,1,0,0,1)) // TEQ
   19889        && INSN1(15,15) == 0
   19890        && INSN1(11,8) == BITS4(1,1,1,1)) {
   19891       UInt rN = INSN0(3,0);
   19892       UInt rM = INSN1(3,0);
   19893       if (!isBadRegT(rN) && !isBadRegT(rM)) {
   19894          Bool isTST = INSN0(8,4) == BITS5(0,0,0,0,1);
   19895 
   19896          UInt how  = INSN1(5,4);
   19897          UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
   19898 
   19899          IRTemp argL = newTemp(Ity_I32);
   19900          assign(argL, getIRegT(rN));
   19901 
   19902          IRTemp rMt = newTemp(Ity_I32);
   19903          assign(rMt, getIRegT(rM));
   19904 
   19905          IRTemp argR = newTemp(Ity_I32);
   19906          IRTemp oldC = newTemp(Ity_I32);
   19907          compute_result_and_C_after_shift_by_imm5(
   19908             dis_buf, &argR, &oldC, rMt, how, imm5, rM
   19909          );
   19910 
   19911          IRTemp oldV = newTemp(Ity_I32);
   19912          assign( oldV, mk_armg_calculate_flag_v() );
   19913 
   19914          IRTemp res = newTemp(Ity_I32);
   19915          assign(res, binop(isTST ? Iop_And32 : Iop_Xor32,
   19916                            mkexpr(argL), mkexpr(argR)));
   19917 
   19918          setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   19919                             condT );
   19920          DIP("%s.w r%u, %s\n", isTST ? "tst" : "teq", rN, dis_buf);
   19921          goto decode_success;
   19922       }
   19923    }
   19924 
   19925    /* -------------- (T3) CMP.W Rn, Rm, {shift} -------------- */
   19926    /* -------------- (T2) CMN.W Rn, Rm, {shift} -------------- */
   19927    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
   19928        && (   INSN0(8,4) == BITS5(1,1,0,1,1)  // CMP
   19929            || INSN0(8,4) == BITS5(1,0,0,0,1)) // CMN
   19930        && INSN1(15,15) == 0
   19931        && INSN1(11,8) == BITS4(1,1,1,1)) {
   19932       UInt rN = INSN0(3,0);
   19933       UInt rM = INSN1(3,0);
   19934       if (!isBadRegT(rN) && !isBadRegT(rM)) {
   19935          Bool isCMN = INSN0(8,4) == BITS5(1,0,0,0,1);
   19936          UInt how   = INSN1(5,4);
   19937          UInt imm5  = (INSN1(14,12) << 2) | INSN1(7,6);
   19938 
   19939          IRTemp argL = newTemp(Ity_I32);
   19940          assign(argL, getIRegT(rN));
   19941 
   19942          IRTemp rMt = newTemp(Ity_I32);
   19943          assign(rMt, getIRegT(rM));
   19944 
   19945          IRTemp argR = newTemp(Ity_I32);
   19946          compute_result_and_C_after_shift_by_imm5(
   19947             dis_buf, &argR, NULL, rMt, how, imm5, rM
   19948          );
   19949 
   19950          setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
   19951                          argL, argR, condT );
   19952 
   19953          DIP("%s.w r%u, %s\n", isCMN ? "cmn" : "cmp", rN, dis_buf);
   19954          goto decode_success;
   19955       }
   19956    }
   19957 
   19958    /* -------------- (T2) MOV{S}.W Rd, #constT -------------- */
   19959    /* -------------- (T2) MVN{S}.W Rd, #constT -------------- */
   19960    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19961        && (   INSN0(9,5) == BITS5(0,0,0,1,0)  // MOV
   19962            || INSN0(9,5) == BITS5(0,0,0,1,1)) // MVN
   19963        && INSN0(3,0) == BITS4(1,1,1,1)
   19964        && INSN1(15,15) == 0) {
   19965       UInt rD = INSN1(11,8);
   19966       if (!isBadRegT(rD)) {
   19967          Bool   updC  = False;
   19968          UInt   bS    = INSN0(4,4);
   19969          Bool   isMVN = INSN0(5,5) == 1;
   19970          UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
   19971          IRTemp res   = newTemp(Ity_I32);
   19972          assign(res, mkU32(isMVN ? ~imm32 : imm32));
   19973          putIRegT(rD, mkexpr(res), condT);
   19974          if (bS) {
   19975             IRTemp oldV = newTemp(Ity_I32);
   19976             IRTemp oldC = newTemp(Ity_I32);
   19977             assign( oldV, mk_armg_calculate_flag_v() );
   19978             assign( oldC, updC
   19979                           ? mkU32((imm32 >> 31) & 1)
   19980                           : mk_armg_calculate_flag_c() );
   19981             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   19982                                condT );
   19983          }
   19984          DIP("%s%s.w r%u, #%u\n",
   19985              isMVN ? "mvn" : "mov", bS ? "s" : "", rD, imm32);
   19986          goto decode_success;
   19987       }
   19988    }
   19989 
   19990    /* -------------- (T3) MOVW Rd, #imm16 -------------- */
   19991    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   19992        && INSN0(9,4) == BITS6(1,0,0,1,0,0)
   19993        && INSN1(15,15) == 0) {
   19994       UInt rD = INSN1(11,8);
   19995       if (!isBadRegT(rD)) {
   19996          UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
   19997                       | (INSN1(14,12) << 8) | INSN1(7,0);
   19998          putIRegT(rD, mkU32(imm16), condT);
   19999          DIP("movw r%u, #%u\n", rD, imm16);
   20000          goto decode_success;
   20001       }
   20002    }
   20003 
   20004    /* ---------------- MOVT Rd, #imm16 ---------------- */
   20005    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   20006        && INSN0(9,4) == BITS6(1,0,1,1,0,0)
   20007        && INSN1(15,15) == 0) {
   20008       UInt rD = INSN1(11,8);
   20009       if (!isBadRegT(rD)) {
   20010          UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
   20011                       | (INSN1(14,12) << 8) | INSN1(7,0);
   20012          IRTemp res = newTemp(Ity_I32);
   20013          assign(res,
   20014                 binop(Iop_Or32,
   20015                       binop(Iop_And32, getIRegT(rD), mkU32(0xFFFF)),
   20016                       mkU32(imm16 << 16)));
   20017          putIRegT(rD, mkexpr(res), condT);
   20018          DIP("movt r%u, #%u\n", rD, imm16);
   20019          goto decode_success;
   20020       }
   20021    }
   20022 
   20023    /* ---------------- LD/ST reg+/-#imm8 ---------------- */
   20024    /* Loads and stores of the form:
   20025          op  Rt, [Rn, #-imm8]      or
   20026          op  Rt, [Rn], #+/-imm8    or
   20027          op  Rt, [Rn, #+/-imm8]!
   20028       where op is one of
   20029          ldrb ldrh ldr  ldrsb ldrsh
   20030          strb strh str
   20031    */
   20032    if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0) && INSN1(11,11) == 1) {
   20033       Bool   valid  = True;
   20034       Bool   syned  = False;
   20035       Bool   isST   = False;
   20036       IRType ty     = Ity_I8;
   20037       const HChar* nm = "???";
   20038 
   20039       switch (INSN0(8,4)) {
   20040          case BITS5(0,0,0,0,0):   // strb
   20041             nm = "strb"; isST = True; break;
   20042          case BITS5(0,0,0,0,1):   // ldrb
   20043             nm = "ldrb"; break;
   20044          case BITS5(1,0,0,0,1):   // ldrsb
   20045             nm = "ldrsb"; syned = True; break;
   20046          case BITS5(0,0,0,1,0):   // strh
   20047             nm = "strh"; ty = Ity_I16; isST = True; break;
   20048          case BITS5(0,0,0,1,1):   // ldrh
   20049             nm = "ldrh"; ty = Ity_I16; break;
   20050          case BITS5(1,0,0,1,1):   // ldrsh
   20051             nm = "ldrsh"; ty = Ity_I16; syned = True; break;
   20052          case BITS5(0,0,1,0,0):   // str
   20053             nm = "str"; ty = Ity_I32; isST = True; break;
   20054          case BITS5(0,0,1,0,1):
   20055             nm = "ldr"; ty = Ity_I32; break;  // ldr
   20056          default:
   20057             valid = False; break;
   20058       }
   20059 
   20060       UInt rN      = INSN0(3,0);
   20061       UInt rT      = INSN1(15,12);
   20062       UInt bP      = INSN1(10,10);
   20063       UInt bU      = INSN1(9,9);
   20064       UInt bW      = INSN1(8,8);
   20065       UInt imm8    = INSN1(7,0);
   20066       Bool loadsPC = False;
   20067 
   20068       if (valid) {
   20069          if (bP == 1 && bU == 1 && bW == 0)
   20070             valid = False;
   20071          if (bP == 0 && bW == 0)
   20072             valid = False;
   20073          if (rN == 15)
   20074             valid = False;
   20075          if (bW == 1 && rN == rT)
   20076             valid = False;
   20077          if (ty == Ity_I8 || ty == Ity_I16) {
   20078             if (isBadRegT(rT))
   20079                valid = False;
   20080          } else {
   20081             /* ty == Ity_I32 */
   20082             if (isST && rT == 15)
   20083                valid = False;
   20084             if (!isST && rT == 15)
   20085                loadsPC = True;
   20086          }
   20087       }
   20088 
   20089       if (valid) {
   20090          // if it's a branch, it can't happen in the middle of an IT block
   20091          // Also, if it is a branch, make it unconditional at this point.
   20092          // Doing conditional branches in-line is too complex (for now)
   20093          if (loadsPC) {
   20094             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   20095             // go uncond
   20096             mk_skip_over_T32_if_cond_is_false(condT);
   20097             condT = IRTemp_INVALID;
   20098             // now uncond
   20099          }
   20100 
   20101          IRTemp preAddr = newTemp(Ity_I32);
   20102          assign(preAddr, getIRegT(rN));
   20103 
   20104          IRTemp postAddr = newTemp(Ity_I32);
   20105          assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
   20106                                 mkexpr(preAddr), mkU32(imm8)));
   20107 
   20108          IRTemp transAddr = bP == 1 ? postAddr : preAddr;
   20109 
   20110          if (isST) {
   20111 
   20112             /* Store.  If necessary, update the base register before
   20113                the store itself, so that the common idiom of "str rX,
   20114                [sp, #-4]!" (store rX at sp-4, then do new sp = sp-4,
   20115                a.k.a "push rX") doesn't cause Memcheck to complain
   20116                that the access is below the stack pointer.  Also, not
   20117                updating sp before the store confuses Valgrind's
   20118                dynamic stack-extending logic.  So do it before the
   20119                store.  Hence we need to snarf the store data before
   20120                doing the basereg update. */
   20121 
   20122             /* get hold of the data to be stored */
   20123             IRTemp oldRt = newTemp(Ity_I32);
   20124             assign(oldRt, getIRegT(rT));
   20125 
   20126             /* Update Rn if necessary. */
   20127             if (bW == 1) {
   20128                vassert(rN != rT); // assured by validity check above
   20129                putIRegT(rN, mkexpr(postAddr), condT);
   20130             }
   20131 
   20132             /* generate the transfer */
   20133             IRExpr* data = NULL;
   20134             switch (ty) {
   20135                case Ity_I8:
   20136                   data = unop(Iop_32to8, mkexpr(oldRt));
   20137                   break;
   20138                case Ity_I16:
   20139                   data = unop(Iop_32to16, mkexpr(oldRt));
   20140                   break;
   20141                case Ity_I32:
   20142                   data = mkexpr(oldRt);
   20143                   break;
   20144                default:
   20145                   vassert(0);
   20146             }
   20147             storeGuardedLE(mkexpr(transAddr), data, condT);
   20148 
   20149          } else {
   20150 
   20151             /* Load. */
   20152             IRTemp llOldRt = newTemp(Ity_I32);
   20153             assign(llOldRt, llGetIReg(rT));
   20154 
   20155             /* generate the transfer */
   20156             IRTemp    newRt = newTemp(Ity_I32);
   20157             IRLoadGOp widen = ILGop_INVALID;
   20158             switch (ty) {
   20159                case Ity_I8:
   20160                   widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
   20161                case Ity_I16:
   20162                   widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
   20163                case Ity_I32:
   20164                   widen = ILGop_Ident32; break;
   20165                default:
   20166                   vassert(0);
   20167             }
   20168             loadGuardedLE(newRt, widen,
   20169                           mkexpr(transAddr), mkexpr(llOldRt), condT);
   20170             if (rT == 15) {
   20171                vassert(loadsPC);
   20172                /* We'll do the write to the PC just below */
   20173             } else {
   20174                vassert(!loadsPC);
   20175                /* IRTemp_INVALID is OK here because in the case where
   20176                   condT is false at run time, we're just putting the
   20177                   old rT value back. */
   20178                putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   20179             }
   20180 
   20181             /* Update Rn if necessary. */
   20182             if (bW == 1) {
   20183                vassert(rN != rT); // assured by validity check above
   20184                putIRegT(rN, mkexpr(postAddr), condT);
   20185             }
   20186 
   20187             if (loadsPC) {
   20188                /* Presumably this is an interworking branch. */
   20189                vassert(rN != 15); // assured by validity check above
   20190                vassert(rT == 15);
   20191                vassert(condT == IRTemp_INVALID); /* due to check above */
   20192                llPutIReg(15, mkexpr(newRt));
   20193                dres.jk_StopHere = Ijk_Boring;  /* or _Ret ? */
   20194                dres.whatNext    = Dis_StopHere;
   20195             }
   20196          }
   20197 
   20198          if (bP == 1 && bW == 0) {
   20199             DIP("%s.w r%u, [r%u, #%c%u]\n",
   20200                 nm, rT, rN, bU ? '+' : '-', imm8);
   20201          }
   20202          else if (bP == 1 && bW == 1) {
   20203             DIP("%s.w r%u, [r%u, #%c%u]!\n",
   20204                 nm, rT, rN, bU ? '+' : '-', imm8);
   20205          }
   20206          else {
   20207             vassert(bP == 0 && bW == 1);
   20208             DIP("%s.w r%u, [r%u], #%c%u\n",
   20209                 nm, rT, rN, bU ? '+' : '-', imm8);
   20210          }
   20211 
   20212          goto decode_success;
   20213       }
   20214    }
   20215 
   20216    /* ------------- LD/ST reg+(reg<<imm2) ------------- */
   20217    /* Loads and stores of the form:
   20218          op  Rt, [Rn, Rm, LSL #imm8]
   20219       where op is one of
   20220          ldrb ldrh ldr  ldrsb ldrsh
   20221          strb strh str
   20222    */
   20223    if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)
   20224        && INSN1(11,6) == BITS6(0,0,0,0,0,0)) {
   20225       Bool   valid  = True;
   20226       Bool   syned  = False;
   20227       Bool   isST   = False;
   20228       IRType ty     = Ity_I8;
   20229       const HChar* nm = "???";
   20230 
   20231       switch (INSN0(8,4)) {
   20232          case BITS5(0,0,0,0,0):   // strb
   20233             nm = "strb"; isST = True; break;
   20234          case BITS5(0,0,0,0,1):   // ldrb
   20235             nm = "ldrb"; break;
   20236          case BITS5(1,0,0,0,1):   // ldrsb
   20237             nm = "ldrsb"; syned = True; break;
   20238          case BITS5(0,0,0,1,0):   // strh
   20239             nm = "strh"; ty = Ity_I16; isST = True; break;
   20240          case BITS5(0,0,0,1,1):   // ldrh
   20241             nm = "ldrh"; ty = Ity_I16; break;
   20242          case BITS5(1,0,0,1,1):   // ldrsh
   20243             nm = "ldrsh"; ty = Ity_I16; syned = True; break;
   20244          case BITS5(0,0,1,0,0):   // str
   20245             nm = "str"; ty = Ity_I32; isST = True; break;
   20246          case BITS5(0,0,1,0,1):
   20247             nm = "ldr"; ty = Ity_I32; break;  // ldr
   20248          default:
   20249             valid = False; break;
   20250       }
   20251 
   20252       UInt rN      = INSN0(3,0);
   20253       UInt rM      = INSN1(3,0);
   20254       UInt rT      = INSN1(15,12);
   20255       UInt imm2    = INSN1(5,4);
   20256       Bool loadsPC = False;
   20257 
   20258       if (ty == Ity_I8 || ty == Ity_I16) {
   20259          /* all 8- and 16-bit load and store cases have the
   20260             same exclusion set. */
   20261          if (rN == 15 || isBadRegT(rT) || isBadRegT(rM))
   20262             valid = False;
   20263       } else {
   20264          vassert(ty == Ity_I32);
   20265          if (rN == 15 || isBadRegT(rM))
   20266             valid = False;
   20267          if (isST && rT == 15)
   20268             valid = False;
   20269          /* If it is a load and rT is 15, that's only allowable if we
   20270             not in an IT block, or are the last in it.  Need to insert
   20271             a dynamic check for that. */
   20272          if (!isST && rT == 15)
   20273             loadsPC = True;
   20274       }
   20275 
   20276       if (valid) {
   20277          // if it's a branch, it can't happen in the middle of an IT block
   20278          // Also, if it is a branch, make it unconditional at this point.
   20279          // Doing conditional branches in-line is too complex (for now)
   20280          if (loadsPC) {
   20281             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   20282             // go uncond
   20283             mk_skip_over_T32_if_cond_is_false(condT);
   20284             condT = IRTemp_INVALID;
   20285             // now uncond
   20286          }
   20287 
   20288          IRTemp transAddr = newTemp(Ity_I32);
   20289          assign(transAddr,
   20290                 binop( Iop_Add32,
   20291                        getIRegT(rN),
   20292                        binop(Iop_Shl32, getIRegT(rM), mkU8(imm2)) ));
   20293 
   20294          if (isST) {
   20295 
   20296             /* get hold of the data to be stored */
   20297             IRTemp oldRt = newTemp(Ity_I32);
   20298             assign(oldRt, getIRegT(rT));
   20299 
   20300             /* generate the transfer */
   20301             IRExpr* data = NULL;
   20302             switch (ty) {
   20303                case Ity_I8:
   20304                   data = unop(Iop_32to8, mkexpr(oldRt));
   20305                   break;
   20306                case Ity_I16:
   20307                   data = unop(Iop_32to16, mkexpr(oldRt));
   20308                   break;
   20309               case Ity_I32:
   20310                   data = mkexpr(oldRt);
   20311                   break;
   20312               default:
   20313                  vassert(0);
   20314             }
   20315             storeGuardedLE(mkexpr(transAddr), data, condT);
   20316 
   20317          } else {
   20318 
   20319             /* Load. */
   20320             IRTemp llOldRt = newTemp(Ity_I32);
   20321             assign(llOldRt, llGetIReg(rT));
   20322 
   20323             /* generate the transfer */
   20324             IRTemp    newRt = newTemp(Ity_I32);
   20325             IRLoadGOp widen = ILGop_INVALID;
   20326             switch (ty) {
   20327                case Ity_I8:
   20328                   widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
   20329                case Ity_I16:
   20330                   widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
   20331                case Ity_I32:
   20332                   widen = ILGop_Ident32; break;
   20333                default:
   20334                   vassert(0);
   20335             }
   20336             loadGuardedLE(newRt, widen,
   20337                           mkexpr(transAddr), mkexpr(llOldRt), condT);
   20338 
   20339             if (rT == 15) {
   20340                vassert(loadsPC);
   20341                /* We'll do the write to the PC just below */
   20342             } else {
   20343                vassert(!loadsPC);
   20344                /* IRTemp_INVALID is OK here because in the case where
   20345                   condT is false at run time, we're just putting the
   20346                   old rT value back. */
   20347                putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   20348             }
   20349 
   20350             if (loadsPC) {
   20351                /* Presumably this is an interworking branch. */
   20352                vassert(rN != 15); // assured by validity check above
   20353                vassert(rT == 15);
   20354                vassert(condT == IRTemp_INVALID); /* due to check above */
   20355                llPutIReg(15, mkexpr(newRt));
   20356                dres.jk_StopHere = Ijk_Boring;  /* or _Ret ? */
   20357                dres.whatNext    = Dis_StopHere;
   20358             }
   20359          }
   20360 
   20361          DIP("%s.w r%u, [r%u, r%u, LSL #%u]\n",
   20362              nm, rT, rN, rM, imm2);
   20363 
   20364          goto decode_success;
   20365       }
   20366    }
   20367 
   20368    /* --------------- LD/ST reg+imm12 --------------- */
   20369    /* Loads and stores of the form:
   20370          op  Rt, [Rn, #+-imm12]
   20371       where op is one of
   20372          ldrb ldrh ldr  ldrsb ldrsh
   20373          strb strh str
   20374    */
   20375    if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)) {
   20376       Bool   valid  = True;
   20377       Bool   syned  = INSN0(8,8) == 1;
   20378       Bool   isST   = False;
   20379       IRType ty     = Ity_I8;
   20380       UInt   bU     = INSN0(7,7); // 1: +imm   0: -imm
   20381                                   // -imm is only supported by literal versions
   20382       const HChar* nm = "???";
   20383 
   20384       switch (INSN0(6,4)) {
   20385          case BITS3(0,0,0):   // strb
   20386             nm = "strb"; isST = True; break;
   20387          case BITS3(0,0,1):   // ldrb
   20388             nm = syned ? "ldrsb" : "ldrb"; break;
   20389          case BITS3(0,1,0):   // strh
   20390             nm = "strh"; ty = Ity_I16; isST = True; break;
   20391          case BITS3(0,1,1):   // ldrh
   20392             nm = syned ? "ldrsh" : "ldrh"; ty = Ity_I16; break;
   20393          case BITS3(1,0,0):   // str
   20394             nm = "str"; ty = Ity_I32; isST = True; break;
   20395          case BITS3(1,0,1):
   20396             nm = "ldr"; ty = Ity_I32; break;  // ldr
   20397          default:
   20398             valid = False; break;
   20399       }
   20400 
   20401       UInt rN      = INSN0(3,0);
   20402       UInt rT      = INSN1(15,12);
   20403       UInt imm12   = INSN1(11,0);
   20404       Bool loadsPC = False;
   20405 
   20406       if (rN != 15 && bU == 0) {
   20407          // only pc supports #-imm12
   20408          valid = False;
   20409       }
   20410 
   20411       if (isST) {
   20412          if (syned) valid = False;
   20413          if (rN == 15 || rT == 15)
   20414             valid = False;
   20415       } else {
   20416          /* For a 32-bit load, rT == 15 is only allowable if we are not
   20417             in an IT block, or are the last in it.  Need to insert
   20418             a dynamic check for that.  Also, in this particular
   20419             case, rN == 15 is allowable.  In this case however, the
   20420             value obtained for rN is (apparently)
   20421             "word-align(address of current insn + 4)". */
   20422          if (rT == 15) {
   20423             if (ty == Ity_I32)
   20424                loadsPC = True;
   20425             else // Can't do it for B/H loads
   20426                valid = False;
   20427          }
   20428       }
   20429 
   20430       if (valid) {
   20431          // if it's a branch, it can't happen in the middle of an IT block
   20432          // Also, if it is a branch, make it unconditional at this point.
   20433          // Doing conditional branches in-line is too complex (for now)
   20434          if (loadsPC) {
   20435             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   20436             // go uncond
   20437             mk_skip_over_T32_if_cond_is_false(condT);
   20438             condT = IRTemp_INVALID;
   20439             // now uncond
   20440          }
   20441 
   20442          IRTemp rNt = newTemp(Ity_I32);
   20443          if (rN == 15) {
   20444             vassert(!isST);
   20445             assign(rNt, binop(Iop_And32, getIRegT(15), mkU32(~3)));
   20446          } else {
   20447             assign(rNt, getIRegT(rN));
   20448          }
   20449 
   20450          IRTemp transAddr = newTemp(Ity_I32);
   20451          assign(transAddr,
   20452                 binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
   20453                       mkexpr(rNt), mkU32(imm12)));
   20454 
   20455          IRTemp oldRt = newTemp(Ity_I32);
   20456          assign(oldRt, getIRegT(rT));
   20457 
   20458          IRTemp llOldRt = newTemp(Ity_I32);
   20459          assign(llOldRt, llGetIReg(rT));
   20460 
   20461          if (isST) {
   20462             IRExpr* data = NULL;
   20463             switch (ty) {
   20464                case Ity_I8:
   20465                   data = unop(Iop_32to8, mkexpr(oldRt));
   20466                   break;
   20467                case Ity_I16:
   20468                   data = unop(Iop_32to16, mkexpr(oldRt));
   20469                   break;
   20470               case Ity_I32:
   20471                   data = mkexpr(oldRt);
   20472                   break;
   20473               default:
   20474                  vassert(0);
   20475             }
   20476             storeGuardedLE(mkexpr(transAddr), data, condT);
   20477          } else {
   20478             IRTemp    newRt = newTemp(Ity_I32);
   20479             IRLoadGOp widen = ILGop_INVALID;
   20480             switch (ty) {
   20481                case Ity_I8:
   20482                   widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
   20483                case Ity_I16:
   20484                   widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
   20485                case Ity_I32:
   20486                   widen = ILGop_Ident32; break;
   20487                default:
   20488                   vassert(0);
   20489             }
   20490             loadGuardedLE(newRt, widen,
   20491                           mkexpr(transAddr), mkexpr(llOldRt), condT);
   20492             if (rT == 15) {
   20493                vassert(loadsPC);
   20494                /* We'll do the write to the PC just below */
   20495             } else {
   20496                vassert(!loadsPC);
   20497                /* IRTemp_INVALID is OK here because in the case where
   20498                   condT is false at run time, we're just putting the
   20499                   old rT value back. */
   20500                putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   20501             }
   20502 
   20503             if (loadsPC) {
   20504                /* Presumably this is an interworking branch. */
   20505                vassert(rT == 15);
   20506                vassert(condT == IRTemp_INVALID); /* due to check above */
   20507                llPutIReg(15, mkexpr(newRt));
   20508                dres.jk_StopHere = Ijk_Boring;
   20509                dres.whatNext    = Dis_StopHere;
   20510             }
   20511          }
   20512 
   20513          DIP("%s.w r%u, [r%u, +#%u]\n", nm, rT, rN, imm12);
   20514 
   20515          goto decode_success;
   20516       }
   20517    }
   20518 
   20519    /* -------------- LDRD/STRD reg+/-#imm8 -------------- */
   20520    /* Doubleword loads and stores of the form:
   20521          ldrd/strd  Rt, Rt2, [Rn, #+/-imm8]    or
   20522          ldrd/strd  Rt, Rt2, [Rn], #+/-imm8    or
   20523          ldrd/strd  Rt, Rt2, [Rn, #+/-imm8]!
   20524    */
   20525    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,0) && INSN0(6,6) == 1) {
   20526       UInt bP   = INSN0(8,8);
   20527       UInt bU   = INSN0(7,7);
   20528       UInt bW   = INSN0(5,5);
   20529       UInt bL   = INSN0(4,4);  // 1: load  0: store
   20530       UInt rN   = INSN0(3,0);
   20531       UInt rT   = INSN1(15,12);
   20532       UInt rT2  = INSN1(11,8);
   20533       UInt imm8 = INSN1(7,0);
   20534 
   20535       Bool valid = True;
   20536       if (bP == 0 && bW == 0)                 valid = False;
   20537       if (bW == 1 && (rN == rT || rN == rT2)) valid = False;
   20538       if (isBadRegT(rT) || isBadRegT(rT2))    valid = False;
   20539       if (bL == 1 && rT == rT2)               valid = False;
   20540       /* It's OK to use PC as the base register only in the
   20541          following case: ldrd Rt, Rt2, [PC, #+/-imm8] */
   20542       if (rN == 15 && (bL == 0/*store*/
   20543                        || bW == 1/*wb*/))     valid = False;
   20544 
   20545       if (valid) {
   20546          IRTemp preAddr = newTemp(Ity_I32);
   20547          assign(preAddr, 15 == rN
   20548                            ? binop(Iop_And32, getIRegT(15), mkU32(~3U))
   20549                            : getIRegT(rN));
   20550 
   20551          IRTemp postAddr = newTemp(Ity_I32);
   20552          assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
   20553                                 mkexpr(preAddr), mkU32(imm8 << 2)));
   20554 
   20555          IRTemp transAddr = bP == 1 ? postAddr : preAddr;
   20556 
   20557          /* For almost all cases, we do the writeback after the transfers.
   20558             However, that leaves the stack "uncovered" in this case:
   20559                strd    rD, [sp, #-8]
   20560             In which case, do the writeback to SP now, instead of later.
   20561             This is bad in that it makes the insn non-restartable if the
   20562             accesses fault, but at least keeps Memcheck happy. */
   20563          Bool writeback_already_done = False;
   20564          if (bL == 0/*store*/ && bW == 1/*wb*/
   20565              && rN == 13 && rN != rT && rN != rT2
   20566              && bU == 0/*minus*/ && (imm8 << 2) == 8) {
   20567             putIRegT(rN, mkexpr(postAddr), condT);
   20568             writeback_already_done = True;
   20569          }
   20570 
   20571          if (bL == 0) {
   20572             IRTemp oldRt  = newTemp(Ity_I32);
   20573             IRTemp oldRt2 = newTemp(Ity_I32);
   20574             assign(oldRt,  getIRegT(rT));
   20575             assign(oldRt2, getIRegT(rT2));
   20576             storeGuardedLE( mkexpr(transAddr),
   20577                             mkexpr(oldRt), condT );
   20578             storeGuardedLE( binop(Iop_Add32, mkexpr(transAddr), mkU32(4)),
   20579                             mkexpr(oldRt2), condT );
   20580          } else {
   20581             IRTemp oldRt  = newTemp(Ity_I32);
   20582             IRTemp oldRt2 = newTemp(Ity_I32);
   20583             IRTemp newRt  = newTemp(Ity_I32);
   20584             IRTemp newRt2 = newTemp(Ity_I32);
   20585             assign(oldRt,  llGetIReg(rT));
   20586             assign(oldRt2, llGetIReg(rT2));
   20587             loadGuardedLE( newRt, ILGop_Ident32,
   20588                            mkexpr(transAddr),
   20589                            mkexpr(oldRt), condT );
   20590             loadGuardedLE( newRt2, ILGop_Ident32,
   20591                            binop(Iop_Add32, mkexpr(transAddr), mkU32(4)),
   20592                            mkexpr(oldRt2), condT );
   20593             /* Put unconditionally, since we already switched on the condT
   20594                in the guarded loads. */
   20595             putIRegT(rT,  mkexpr(newRt),  IRTemp_INVALID);
   20596             putIRegT(rT2, mkexpr(newRt2), IRTemp_INVALID);
   20597          }
   20598 
   20599          if (bW == 1 && !writeback_already_done) {
   20600             putIRegT(rN, mkexpr(postAddr), condT);
   20601          }
   20602 
   20603          const HChar* nm = bL ? "ldrd" : "strd";
   20604 
   20605          if (bP == 1 && bW == 0) {
   20606             DIP("%s.w r%u, r%u, [r%u, #%c%u]\n",
   20607                 nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
   20608          }
   20609          else if (bP == 1 && bW == 1) {
   20610             DIP("%s.w r%u, r%u, [r%u, #%c%u]!\n",
   20611                 nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
   20612          }
   20613          else {
   20614             vassert(bP == 0 && bW == 1);
   20615             DIP("%s.w r%u, r%u, [r%u], #%c%u\n",
   20616                 nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
   20617          }
   20618 
   20619          goto decode_success;
   20620       }
   20621    }
   20622 
   20623    /* -------------- (T3) Bcond.W label -------------- */
   20624    /* This variant carries its own condition, so can't be part of an
   20625       IT block ... */
   20626    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   20627        && INSN1(15,14) == BITS2(1,0)
   20628        && INSN1(12,12) == 0) {
   20629       UInt cond = INSN0(9,6);
   20630       if (cond != ARMCondAL && cond != ARMCondNV) {
   20631          Int simm21
   20632             =   (INSN0(10,10) << (1 + 1 + 6 + 11 + 1))
   20633               | (INSN1(11,11) << (1 + 6 + 11 + 1))
   20634               | (INSN1(13,13) << (6 + 11 + 1))
   20635               | (INSN0(5,0)   << (11 + 1))
   20636               | (INSN1(10,0)  << 1);
   20637          simm21 = (simm21 << 11) >> 11;
   20638 
   20639          vassert(0 == (guest_R15_curr_instr_notENC & 1));
   20640          UInt dst = simm21 + guest_R15_curr_instr_notENC + 4;
   20641 
   20642          /* Not allowed in an IT block; SIGILL if so. */
   20643          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   20644 
   20645          IRTemp kondT = newTemp(Ity_I32);
   20646          assign( kondT, mk_armg_calculate_condition(cond) );
   20647          stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
   20648                             Ijk_Boring,
   20649                             IRConst_U32(dst | 1/*CPSR.T*/),
   20650                             OFFB_R15T ));
   20651          llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 4)
   20652                               | 1 /*CPSR.T*/ ));
   20653          dres.jk_StopHere = Ijk_Boring;
   20654          dres.whatNext    = Dis_StopHere;
   20655          DIP("b%s.w 0x%x\n", nCC(cond), dst);
   20656          goto decode_success;
   20657       }
   20658    }
   20659 
   20660    /* ---------------- (T4) B.W label ---------------- */
   20661    /* ... whereas this variant doesn't carry its own condition, so it
   20662       has to be either unconditional or the conditional by virtue of
   20663       being the last in an IT block.  The upside is that there's 4
   20664       more bits available for the jump offset, so it has a 16-times
   20665       greater branch range than the T3 variant. */
   20666    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   20667        && INSN1(15,14) == BITS2(1,0)
   20668        && INSN1(12,12) == 1) {
   20669       if (1) {
   20670          UInt bS  = INSN0(10,10);
   20671          UInt bJ1 = INSN1(13,13);
   20672          UInt bJ2 = INSN1(11,11);
   20673          UInt bI1 = 1 ^ (bJ1 ^ bS);
   20674          UInt bI2 = 1 ^ (bJ2 ^ bS);
   20675          Int simm25
   20676             =   (bS          << (1 + 1 + 10 + 11 + 1))
   20677               | (bI1         << (1 + 10 + 11 + 1))
   20678               | (bI2         << (10 + 11 + 1))
   20679               | (INSN0(9,0)  << (11 + 1))
   20680               | (INSN1(10,0) << 1);
   20681          simm25 = (simm25 << 7) >> 7;
   20682 
   20683          vassert(0 == (guest_R15_curr_instr_notENC & 1));
   20684          UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
   20685 
   20686          /* If in an IT block, must be the last insn. */
   20687          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   20688 
   20689          // go uncond
   20690          mk_skip_over_T32_if_cond_is_false(condT);
   20691          condT = IRTemp_INVALID;
   20692          // now uncond
   20693 
   20694          // branch to dst
   20695          llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
   20696          dres.jk_StopHere = Ijk_Boring;
   20697          dres.whatNext    = Dis_StopHere;
   20698          DIP("b.w 0x%x\n", dst);
   20699          goto decode_success;
   20700       }
   20701    }
   20702 
   20703    /* ------------------ TBB, TBH ------------------ */
   20704    if (INSN0(15,4) == 0xE8D && INSN1(15,5) == 0x780) {
   20705       UInt rN = INSN0(3,0);
   20706       UInt rM = INSN1(3,0);
   20707       UInt bH = INSN1(4,4);
   20708       if (bH/*ATC*/ || (rN != 13 && !isBadRegT(rM))) {
   20709          /* Must be last or not-in IT block */
   20710          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   20711          /* Go uncond */
   20712          mk_skip_over_T32_if_cond_is_false(condT);
   20713          condT = IRTemp_INVALID;
   20714 
   20715          IRExpr* ea
   20716              = binop(Iop_Add32,
   20717                      getIRegT(rN),
   20718                      bH ? binop(Iop_Shl32, getIRegT(rM), mkU8(1))
   20719                         : getIRegT(rM));
   20720 
   20721          IRTemp delta = newTemp(Ity_I32);
   20722          if (bH) {
   20723             assign(delta, unop(Iop_16Uto32, loadLE(Ity_I16, ea)));
   20724          } else {
   20725             assign(delta, unop(Iop_8Uto32, loadLE(Ity_I8, ea)));
   20726          }
   20727 
   20728          llPutIReg(
   20729             15,
   20730             binop(Iop_Or32,
   20731                   binop(Iop_Add32,
   20732                         getIRegT(15),
   20733                         binop(Iop_Shl32, mkexpr(delta), mkU8(1))
   20734                   ),
   20735                   mkU32(1)
   20736          ));
   20737          dres.jk_StopHere = Ijk_Boring;
   20738          dres.whatNext    = Dis_StopHere;
   20739          DIP("tb%c [r%u, r%u%s]\n",
   20740              bH ? 'h' : 'b', rN, rM, bH ? ", LSL #1" : "");
   20741          goto decode_success;
   20742       }
   20743    }
   20744 
   20745    /* ------------------ UBFX ------------------ */
   20746    /* ------------------ SBFX ------------------ */
   20747    /* There's also ARM versions of same, but it doesn't seem worth the
   20748       hassle to common up the handling (it's only a couple of C
   20749       statements). */
   20750    if ((INSN0(15,4) == 0xF3C // UBFX
   20751         || INSN0(15,4) == 0xF34) // SBFX
   20752        && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
   20753       UInt rN  = INSN0(3,0);
   20754       UInt rD  = INSN1(11,8);
   20755       UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
   20756       UInt wm1 = INSN1(4,0);
   20757       UInt msb =  lsb + wm1;
   20758       if (!isBadRegT(rD) && !isBadRegT(rN) && msb <= 31) {
   20759          Bool   isU  = INSN0(15,4) == 0xF3C;
   20760          IRTemp src  = newTemp(Ity_I32);
   20761          IRTemp tmp  = newTemp(Ity_I32);
   20762          IRTemp res  = newTemp(Ity_I32);
   20763          UInt   mask = ((1 << wm1) - 1) + (1 << wm1);
   20764          vassert(msb >= 0 && msb <= 31);
   20765          vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
   20766 
   20767          assign(src, getIRegT(rN));
   20768          assign(tmp, binop(Iop_And32,
   20769                            binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
   20770                            mkU32(mask)));
   20771          assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
   20772                            binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
   20773                            mkU8(31-wm1)));
   20774 
   20775          putIRegT(rD, mkexpr(res), condT);
   20776 
   20777          DIP("%s r%u, r%u, #%u, #%u\n",
   20778              isU ? "ubfx" : "sbfx", rD, rN, lsb, wm1 + 1);
   20779          goto decode_success;
   20780       }
   20781    }
   20782 
   20783    /* ------------------ UXTB ------------------ */
   20784    /* ------------------ UXTH ------------------ */
   20785    /* ------------------ SXTB ------------------ */
   20786    /* ------------------ SXTH ------------------ */
   20787    /* ----------------- UXTB16 ----------------- */
   20788    /* ----------------- SXTB16 ----------------- */
   20789    /* FIXME: this is an exact duplicate of the ARM version.  They
   20790       should be commoned up. */
   20791    if ((INSN0(15,0) == 0xFA5F     // UXTB
   20792         || INSN0(15,0) == 0xFA1F  // UXTH
   20793         || INSN0(15,0) == 0xFA4F  // SXTB
   20794         || INSN0(15,0) == 0xFA0F  // SXTH
   20795         || INSN0(15,0) == 0xFA3F  // UXTB16
   20796         || INSN0(15,0) == 0xFA2F) // SXTB16
   20797        && INSN1(15,12) == BITS4(1,1,1,1)
   20798        && INSN1(7,6) == BITS2(1,0)) {
   20799       UInt rD = INSN1(11,8);
   20800       UInt rM = INSN1(3,0);
   20801       UInt rot = INSN1(5,4);
   20802       if (!isBadRegT(rD) && !isBadRegT(rM)) {
   20803          const HChar* nm = "???";
   20804          IRTemp srcT = newTemp(Ity_I32);
   20805          IRTemp rotT = newTemp(Ity_I32);
   20806          IRTemp dstT = newTemp(Ity_I32);
   20807          assign(srcT, getIRegT(rM));
   20808          assign(rotT, genROR32(srcT, 8 * rot));
   20809          switch (INSN0(15,0)) {
   20810             case 0xFA5F: // UXTB
   20811                nm = "uxtb";
   20812                assign(dstT, unop(Iop_8Uto32,
   20813                                  unop(Iop_32to8, mkexpr(rotT))));
   20814                break;
   20815             case 0xFA1F: // UXTH
   20816                nm = "uxth";
   20817                assign(dstT, unop(Iop_16Uto32,
   20818                                  unop(Iop_32to16, mkexpr(rotT))));
   20819                break;
   20820             case 0xFA4F: // SXTB
   20821                nm = "sxtb";
   20822                assign(dstT, unop(Iop_8Sto32,
   20823                                  unop(Iop_32to8, mkexpr(rotT))));
   20824                break;
   20825             case 0xFA0F: // SXTH
   20826                nm = "sxth";
   20827                assign(dstT, unop(Iop_16Sto32,
   20828                                  unop(Iop_32to16, mkexpr(rotT))));
   20829                break;
   20830             case 0xFA3F: // UXTB16
   20831                nm = "uxtb16";
   20832                assign(dstT, binop(Iop_And32, mkexpr(rotT),
   20833                                              mkU32(0x00FF00FF)));
   20834                break;
   20835             case 0xFA2F: { // SXTB16
   20836                nm = "sxtb16";
   20837                IRTemp lo32 = newTemp(Ity_I32);
   20838                IRTemp hi32 = newTemp(Ity_I32);
   20839                assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
   20840                assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
   20841                assign(
   20842                   dstT,
   20843                   binop(Iop_Or32,
   20844                         binop(Iop_And32,
   20845                               unop(Iop_8Sto32,
   20846                                    unop(Iop_32to8, mkexpr(lo32))),
   20847                               mkU32(0xFFFF)),
   20848                         binop(Iop_Shl32,
   20849                               unop(Iop_8Sto32,
   20850                                    unop(Iop_32to8, mkexpr(hi32))),
   20851                               mkU8(16))
   20852                ));
   20853                break;
   20854             }
   20855             default:
   20856                vassert(0);
   20857          }
   20858          putIRegT(rD, mkexpr(dstT), condT);
   20859          DIP("%s r%u, r%u, ror #%u\n", nm, rD, rM, 8 * rot);
   20860          goto decode_success;
   20861       }
   20862    }
   20863 
   20864    /* -------------- MUL.W Rd, Rn, Rm -------------- */
   20865    if (INSN0(15,4) == 0xFB0
   20866        && (INSN1(15,0) & 0xF0F0) == 0xF000) {
   20867       UInt rN = INSN0(3,0);
   20868       UInt rD = INSN1(11,8);
   20869       UInt rM = INSN1(3,0);
   20870       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   20871          IRTemp res = newTemp(Ity_I32);
   20872          assign(res, binop(Iop_Mul32, getIRegT(rN), getIRegT(rM)));
   20873          putIRegT(rD, mkexpr(res), condT);
   20874          DIP("mul.w r%u, r%u, r%u\n", rD, rN, rM);
   20875          goto decode_success;
   20876       }
   20877    }
   20878 
   20879    /* -------------- SDIV.W Rd, Rn, Rm -------------- */
   20880    if (INSN0(15,4) == 0xFB9
   20881        && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
   20882       UInt rN = INSN0(3,0);
   20883       UInt rD = INSN1(11,8);
   20884       UInt rM = INSN1(3,0);
   20885       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   20886          IRTemp res  = newTemp(Ity_I32);
   20887          IRTemp argL = newTemp(Ity_I32);
   20888          IRTemp argR = newTemp(Ity_I32);
   20889          assign(argL, getIRegT(rN));
   20890          assign(argR, getIRegT(rM));
   20891          assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
   20892          putIRegT(rD, mkexpr(res), condT);
   20893          DIP("sdiv.w r%u, r%u, r%u\n", rD, rN, rM);
   20894          goto decode_success;
   20895       }
   20896    }
   20897 
   20898    /* -------------- UDIV.W Rd, Rn, Rm -------------- */
   20899    if (INSN0(15,4) == 0xFBB
   20900        && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
   20901       UInt rN = INSN0(3,0);
   20902       UInt rD = INSN1(11,8);
   20903       UInt rM = INSN1(3,0);
   20904       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   20905          IRTemp res  = newTemp(Ity_I32);
   20906          IRTemp argL = newTemp(Ity_I32);
   20907          IRTemp argR = newTemp(Ity_I32);
   20908          assign(argL, getIRegT(rN));
   20909          assign(argR, getIRegT(rM));
   20910          assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
   20911          putIRegT(rD, mkexpr(res), condT);
   20912          DIP("udiv.w r%u, r%u, r%u\n", rD, rN, rM);
   20913          goto decode_success;
   20914       }
   20915    }
   20916 
   20917    /* ------------------ {U,S}MULL ------------------ */
   20918    if ((INSN0(15,4) == 0xFB8 || INSN0(15,4) == 0xFBA)
   20919        && INSN1(7,4) == BITS4(0,0,0,0)) {
   20920       UInt isU  = INSN0(5,5);
   20921       UInt rN   = INSN0(3,0);
   20922       UInt rDlo = INSN1(15,12);
   20923       UInt rDhi = INSN1(11,8);
   20924       UInt rM   = INSN1(3,0);
   20925       if (!isBadRegT(rDhi) && !isBadRegT(rDlo)
   20926           && !isBadRegT(rN) && !isBadRegT(rM) && rDlo != rDhi) {
   20927          IRTemp res   = newTemp(Ity_I64);
   20928          assign(res, binop(isU ? Iop_MullU32 : Iop_MullS32,
   20929                            getIRegT(rN), getIRegT(rM)));
   20930          putIRegT( rDhi, unop(Iop_64HIto32, mkexpr(res)), condT );
   20931          putIRegT( rDlo, unop(Iop_64to32, mkexpr(res)), condT );
   20932          DIP("%cmull r%u, r%u, r%u, r%u\n",
   20933              isU ? 'u' : 's', rDlo, rDhi, rN, rM);
   20934          goto decode_success;
   20935       }
   20936    }
   20937 
   20938    /* ------------------ ML{A,S} ------------------ */
   20939    if (INSN0(15,4) == 0xFB0
   20940        && (   INSN1(7,4) == BITS4(0,0,0,0)    // MLA
   20941            || INSN1(7,4) == BITS4(0,0,0,1))) { // MLS
   20942       UInt rN = INSN0(3,0);
   20943       UInt rA = INSN1(15,12);
   20944       UInt rD = INSN1(11,8);
   20945       UInt rM = INSN1(3,0);
   20946       if (!isBadRegT(rD) && !isBadRegT(rN)
   20947           && !isBadRegT(rM) && !isBadRegT(rA)) {
   20948          Bool   isMLA = INSN1(7,4) == BITS4(0,0,0,0);
   20949          IRTemp res   = newTemp(Ity_I32);
   20950          assign(res,
   20951                 binop(isMLA ? Iop_Add32 : Iop_Sub32,
   20952                       getIRegT(rA),
   20953                       binop(Iop_Mul32, getIRegT(rN), getIRegT(rM))));
   20954          putIRegT(rD, mkexpr(res), condT);
   20955          DIP("%s r%u, r%u, r%u, r%u\n",
   20956              isMLA ? "mla" : "mls", rD, rN, rM, rA);
   20957          goto decode_success;
   20958       }
   20959    }
   20960 
   20961    /* ------------------ (T3) ADR ------------------ */
   20962    if ((INSN0(15,0) == 0xF20F || INSN0(15,0) == 0xF60F)
   20963        && INSN1(15,15) == 0) {
   20964       /* rD = align4(PC) + imm32 */
   20965       UInt rD = INSN1(11,8);
   20966       if (!isBadRegT(rD)) {
   20967          UInt imm32 = (INSN0(10,10) << 11)
   20968                       | (INSN1(14,12) << 8) | INSN1(7,0);
   20969          putIRegT(rD, binop(Iop_Add32,
   20970                             binop(Iop_And32, getIRegT(15), mkU32(~3U)),
   20971                             mkU32(imm32)),
   20972                       condT);
   20973          DIP("add r%u, pc, #%u\n", rD, imm32);
   20974          goto decode_success;
   20975       }
   20976    }
   20977 
   20978    /* ----------------- (T1) UMLAL ----------------- */
   20979    /* ----------------- (T1) SMLAL ----------------- */
   20980    if ((INSN0(15,4) == 0xFBE // UMLAL
   20981         || INSN0(15,4) == 0xFBC) // SMLAL
   20982        && INSN1(7,4) == BITS4(0,0,0,0)) {
   20983       UInt rN   = INSN0(3,0);
   20984       UInt rDlo = INSN1(15,12);
   20985       UInt rDhi = INSN1(11,8);
   20986       UInt rM   = INSN1(3,0);
   20987       if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
   20988           && !isBadRegT(rM) && rDhi != rDlo) {
   20989          Bool   isS   = INSN0(15,4) == 0xFBC;
   20990          IRTemp argL  = newTemp(Ity_I32);
   20991          IRTemp argR  = newTemp(Ity_I32);
   20992          IRTemp old   = newTemp(Ity_I64);
   20993          IRTemp res   = newTemp(Ity_I64);
   20994          IRTemp resHi = newTemp(Ity_I32);
   20995          IRTemp resLo = newTemp(Ity_I32);
   20996          IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
   20997          assign( argL, getIRegT(rM));
   20998          assign( argR, getIRegT(rN));
   20999          assign( old, binop(Iop_32HLto64, getIRegT(rDhi), getIRegT(rDlo)) );
   21000          assign( res, binop(Iop_Add64,
   21001                             mkexpr(old),
   21002                             binop(mulOp, mkexpr(argL), mkexpr(argR))) );
   21003          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
   21004          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
   21005          putIRegT( rDhi, mkexpr(resHi), condT );
   21006          putIRegT( rDlo, mkexpr(resLo), condT );
   21007          DIP("%cmlal r%u, r%u, r%u, r%u\n",
   21008              isS ? 's' : 'u', rDlo, rDhi, rN, rM);
   21009          goto decode_success;
   21010       }
   21011    }
   21012 
   21013    /* ------------------ (T1) UMAAL ------------------ */
   21014    if (INSN0(15,4) == 0xFBE && INSN1(7,4) == BITS4(0,1,1,0)) {
   21015       UInt rN   = INSN0(3,0);
   21016       UInt rDlo = INSN1(15,12);
   21017       UInt rDhi = INSN1(11,8);
   21018       UInt rM   = INSN1(3,0);
   21019       if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
   21020           && !isBadRegT(rM) && rDhi != rDlo) {
   21021          IRTemp argN   = newTemp(Ity_I32);
   21022          IRTemp argM   = newTemp(Ity_I32);
   21023          IRTemp argDhi = newTemp(Ity_I32);
   21024          IRTemp argDlo = newTemp(Ity_I32);
   21025          IRTemp res    = newTemp(Ity_I64);
   21026          IRTemp resHi  = newTemp(Ity_I32);
   21027          IRTemp resLo  = newTemp(Ity_I32);
   21028          assign( argN,   getIRegT(rN) );
   21029          assign( argM,   getIRegT(rM) );
   21030          assign( argDhi, getIRegT(rDhi) );
   21031          assign( argDlo, getIRegT(rDlo) );
   21032          assign( res,
   21033                  binop(Iop_Add64,
   21034                        binop(Iop_Add64,
   21035                              binop(Iop_MullU32, mkexpr(argN), mkexpr(argM)),
   21036                              unop(Iop_32Uto64, mkexpr(argDhi))),
   21037                        unop(Iop_32Uto64, mkexpr(argDlo))) );
   21038          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
   21039          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
   21040          putIRegT( rDhi, mkexpr(resHi), condT );
   21041          putIRegT( rDlo, mkexpr(resLo), condT );
   21042          DIP("umaal r%u, r%u, r%u, r%u\n", rDlo, rDhi, rN, rM);
   21043          goto decode_success;
   21044       }
   21045    }
   21046 
   21047    /* ------------------- (T1) SMMUL{R} ------------------ */
   21048    if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
   21049        && INSN0(6,4) == BITS3(1,0,1)
   21050        && INSN1(15,12) == BITS4(1,1,1,1)
   21051        && INSN1(7,5) == BITS3(0,0,0)) {
   21052       UInt bitR = INSN1(4,4);
   21053       UInt rD = INSN1(11,8);
   21054       UInt rM = INSN1(3,0);
   21055       UInt rN = INSN0(3,0);
   21056       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   21057          IRExpr* res
   21058          = unop(Iop_64HIto32,
   21059                 binop(Iop_Add64,
   21060                       binop(Iop_MullS32, getIRegT(rN), getIRegT(rM)),
   21061                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
   21062          putIRegT(rD, res, condT);
   21063          DIP("smmul%s r%u, r%u, r%u\n",
   21064              bitR ? "r" : "", rD, rN, rM);
   21065          goto decode_success;
   21066       }
   21067    }
   21068 
   21069    /* ------------------- (T1) SMMLA{R} ------------------ */
   21070    if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
   21071        && INSN0(6,4) == BITS3(1,0,1)
   21072        && INSN1(7,5) == BITS3(0,0,0)) {
   21073       UInt bitR = INSN1(4,4);
   21074       UInt rA = INSN1(15,12);
   21075       UInt rD = INSN1(11,8);
   21076       UInt rM = INSN1(3,0);
   21077       UInt rN = INSN0(3,0);
   21078       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && (rA != 13)) {
   21079          IRExpr* res
   21080          = unop(Iop_64HIto32,
   21081                 binop(Iop_Add64,
   21082                       binop(Iop_Add64,
   21083                             binop(Iop_32HLto64, getIRegT(rA), mkU32(0)),
   21084                             binop(Iop_MullS32, getIRegT(rN), getIRegT(rM))),
   21085                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
   21086          putIRegT(rD, res, condT);
   21087          DIP("smmla%s r%u, r%u, r%u, r%u\n",
   21088              bitR ? "r" : "", rD, rN, rM, rA);
   21089          goto decode_success;
   21090       }
   21091    }
   21092 
   21093    /* ------------------ (T2) ADR ------------------ */
   21094    if ((INSN0(15,0) == 0xF2AF || INSN0(15,0) == 0xF6AF)
   21095        && INSN1(15,15) == 0) {
   21096       /* rD = align4(PC) - imm32 */
   21097       UInt rD = INSN1(11,8);
   21098       if (!isBadRegT(rD)) {
   21099          UInt imm32 = (INSN0(10,10) << 11)
   21100                       | (INSN1(14,12) << 8) | INSN1(7,0);
   21101          putIRegT(rD, binop(Iop_Sub32,
   21102                             binop(Iop_And32, getIRegT(15), mkU32(~3U)),
   21103                             mkU32(imm32)),
   21104                       condT);
   21105          DIP("sub r%u, pc, #%u\n", rD, imm32);
   21106          goto decode_success;
   21107       }
   21108    }
   21109 
   21110    /* ------------------- (T1) BFI ------------------- */
   21111    /* ------------------- (T1) BFC ------------------- */
   21112    if (INSN0(15,4) == 0xF36 && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
   21113       UInt rD  = INSN1(11,8);
   21114       UInt rN  = INSN0(3,0);
   21115       UInt msb = INSN1(4,0);
   21116       UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
   21117       if (isBadRegT(rD) || rN == 13 || msb < lsb) {
   21118          /* undecodable; fall through */
   21119       } else {
   21120          IRTemp src    = newTemp(Ity_I32);
   21121          IRTemp olddst = newTemp(Ity_I32);
   21122          IRTemp newdst = newTemp(Ity_I32);
   21123          UInt   mask = 1 << (msb - lsb);
   21124          mask = (mask - 1) + mask;
   21125          vassert(mask != 0); // guaranteed by "msb < lsb" check above
   21126          mask <<= lsb;
   21127 
   21128          assign(src, rN == 15 ? mkU32(0) : getIRegT(rN));
   21129          assign(olddst, getIRegT(rD));
   21130          assign(newdst,
   21131                 binop(Iop_Or32,
   21132                    binop(Iop_And32,
   21133                          binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
   21134                          mkU32(mask)),
   21135                    binop(Iop_And32,
   21136                          mkexpr(olddst),
   21137                          mkU32(~mask)))
   21138                );
   21139 
   21140          putIRegT(rD, mkexpr(newdst), condT);
   21141 
   21142          if (rN == 15) {
   21143             DIP("bfc r%u, #%u, #%u\n",
   21144                 rD, lsb, msb-lsb+1);
   21145          } else {
   21146             DIP("bfi r%u, r%u, #%u, #%u\n",
   21147                 rD, rN, lsb, msb-lsb+1);
   21148          }
   21149          goto decode_success;
   21150       }
   21151    }
   21152 
   21153    /* ------------------- (T1) SXTAH ------------------- */
   21154    /* ------------------- (T1) UXTAH ------------------- */
   21155    if ((INSN0(15,4) == 0xFA1      // UXTAH
   21156         || INSN0(15,4) == 0xFA0)  // SXTAH
   21157        && INSN1(15,12) == BITS4(1,1,1,1)
   21158        && INSN1(7,6) == BITS2(1,0)) {
   21159       Bool isU = INSN0(15,4) == 0xFA1;
   21160       UInt rN  = INSN0(3,0);
   21161       UInt rD  = INSN1(11,8);
   21162       UInt rM  = INSN1(3,0);
   21163       UInt rot = INSN1(5,4);
   21164       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   21165          IRTemp srcL = newTemp(Ity_I32);
   21166          IRTemp srcR = newTemp(Ity_I32);
   21167          IRTemp res  = newTemp(Ity_I32);
   21168          assign(srcR, getIRegT(rM));
   21169          assign(srcL, getIRegT(rN));
   21170          assign(res,  binop(Iop_Add32,
   21171                             mkexpr(srcL),
   21172                             unop(isU ? Iop_16Uto32 : Iop_16Sto32,
   21173                                  unop(Iop_32to16,
   21174                                       genROR32(srcR, 8 * rot)))));
   21175          putIRegT(rD, mkexpr(res), condT);
   21176          DIP("%cxtah r%u, r%u, r%u, ror #%u\n",
   21177              isU ? 'u' : 's', rD, rN, rM, rot);
   21178          goto decode_success;
   21179       }
   21180    }
   21181 
   21182    /* ------------------- (T1) SXTAB ------------------- */
   21183    /* ------------------- (T1) UXTAB ------------------- */
   21184    if ((INSN0(15,4) == 0xFA5      // UXTAB
   21185         || INSN0(15,4) == 0xFA4)  // SXTAB
   21186        && INSN1(15,12) == BITS4(1,1,1,1)
   21187        && INSN1(7,6) == BITS2(1,0)) {
   21188       Bool isU = INSN0(15,4) == 0xFA5;
   21189       UInt rN  = INSN0(3,0);
   21190       UInt rD  = INSN1(11,8);
   21191       UInt rM  = INSN1(3,0);
   21192       UInt rot = INSN1(5,4);
   21193       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   21194          IRTemp srcL = newTemp(Ity_I32);
   21195          IRTemp srcR = newTemp(Ity_I32);
   21196          IRTemp res  = newTemp(Ity_I32);
   21197          assign(srcR, getIRegT(rM));
   21198          assign(srcL, getIRegT(rN));
   21199          assign(res,  binop(Iop_Add32,
   21200                             mkexpr(srcL),
   21201                             unop(isU ? Iop_8Uto32 : Iop_8Sto32,
   21202                                  unop(Iop_32to8,
   21203                                       genROR32(srcR, 8 * rot)))));
   21204          putIRegT(rD, mkexpr(res), condT);
   21205          DIP("%cxtab r%u, r%u, r%u, ror #%u\n",
   21206              isU ? 'u' : 's', rD, rN, rM, rot);
   21207          goto decode_success;
   21208       }
   21209    }
   21210 
   21211    /* ------------------- (T1) CLZ ------------------- */
   21212    if (INSN0(15,4) == 0xFAB
   21213        && INSN1(15,12) == BITS4(1,1,1,1)
   21214        && INSN1(7,4) == BITS4(1,0,0,0)) {
   21215       UInt rM1 = INSN0(3,0);
   21216       UInt rD  = INSN1(11,8);
   21217       UInt rM2 = INSN1(3,0);
   21218       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
   21219          IRTemp arg = newTemp(Ity_I32);
   21220          IRTemp res = newTemp(Ity_I32);
   21221          assign(arg, getIRegT(rM1));
   21222          assign(res, IRExpr_ITE(
   21223                         binop(Iop_CmpEQ32, mkexpr(arg), mkU32(0)),
   21224                         mkU32(32),
   21225                         unop(Iop_Clz32, mkexpr(arg))
   21226          ));
   21227          putIRegT(rD, mkexpr(res), condT);
   21228          DIP("clz r%u, r%u\n", rD, rM1);
   21229          goto decode_success;
   21230       }
   21231    }
   21232 
   21233    /* ------------------- (T1) RBIT ------------------- */
   21234    if (INSN0(15,4) == 0xFA9
   21235        && INSN1(15,12) == BITS4(1,1,1,1)
   21236        && INSN1(7,4) == BITS4(1,0,1,0)) {
   21237       UInt rM1 = INSN0(3,0);
   21238       UInt rD  = INSN1(11,8);
   21239       UInt rM2 = INSN1(3,0);
   21240       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
   21241          IRTemp arg = newTemp(Ity_I32);
   21242          assign(arg, getIRegT(rM1));
   21243          IRTemp res = gen_BITREV(arg);
   21244          putIRegT(rD, mkexpr(res), condT);
   21245          DIP("rbit r%u, r%u\n", rD, rM1);
   21246          goto decode_success;
   21247       }
   21248    }
   21249 
   21250    /* ------------------- (T2) REV   ------------------- */
   21251    /* ------------------- (T2) REV16 ------------------- */
   21252    if (INSN0(15,4) == 0xFA9
   21253        && INSN1(15,12) == BITS4(1,1,1,1)
   21254        && (   INSN1(7,4) == BITS4(1,0,0,0)     // REV
   21255            || INSN1(7,4) == BITS4(1,0,0,1))) { // REV16
   21256       UInt rM1   = INSN0(3,0);
   21257       UInt rD    = INSN1(11,8);
   21258       UInt rM2   = INSN1(3,0);
   21259       Bool isREV = INSN1(7,4) == BITS4(1,0,0,0);
   21260       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
   21261          IRTemp arg = newTemp(Ity_I32);
   21262          assign(arg, getIRegT(rM1));
   21263          IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
   21264          putIRegT(rD, mkexpr(res), condT);
   21265          DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM1);
   21266          goto decode_success;
   21267       }
   21268    }
   21269 
   21270    /* ------------------- (T2) REVSH ------------------ */
   21271    if (INSN0(15,4) == 0xFA9
   21272        && INSN1(15,12) == BITS4(1,1,1,1)
   21273        && INSN1(7,4) == BITS4(1,0,1,1)) {
   21274       UInt rM1 = INSN0(3,0);
   21275       UInt rM2 = INSN1(3,0);
   21276       UInt rD  = INSN1(11,8);
   21277       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
   21278          IRTemp irt_rM  = newTemp(Ity_I32);
   21279          IRTemp irt_hi  = newTemp(Ity_I32);
   21280          IRTemp irt_low = newTemp(Ity_I32);
   21281          IRTemp irt_res = newTemp(Ity_I32);
   21282          assign(irt_rM, getIRegT(rM1));
   21283          assign(irt_hi,
   21284                 binop(Iop_Sar32,
   21285                       binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
   21286                       mkU8(16)
   21287                 )
   21288          );
   21289          assign(irt_low,
   21290                 binop(Iop_And32,
   21291                       binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
   21292                       mkU32(0xFF)
   21293                 )
   21294          );
   21295          assign(irt_res,
   21296                 binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
   21297          );
   21298          putIRegT(rD, mkexpr(irt_res), condT);
   21299          DIP("revsh r%u, r%u\n", rD, rM1);
   21300          goto decode_success;
   21301       }
   21302    }
   21303 
   21304    /* -------------- (T1) MSR apsr, reg -------------- */
   21305    if (INSN0(15,4) == 0xF38
   21306        && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(9,0) == 0x000) {
   21307       UInt rN          = INSN0(3,0);
   21308       UInt write_ge    = INSN1(10,10);
   21309       UInt write_nzcvq = INSN1(11,11);
   21310       if (!isBadRegT(rN) && (write_nzcvq || write_ge)) {
   21311          IRTemp rNt = newTemp(Ity_I32);
   21312          assign(rNt, getIRegT(rN));
   21313          desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
   21314          DIP("msr cpsr_%s%s, r%u\n",
   21315              write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
   21316          goto decode_success;
   21317       }
   21318    }
   21319 
   21320    /* -------------- (T1) MRS reg, apsr -------------- */
   21321    if (INSN0(15,0) == 0xF3EF
   21322        && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(7,0) == 0x00) {
   21323       UInt rD = INSN1(11,8);
   21324       if (!isBadRegT(rD)) {
   21325          IRTemp apsr = synthesise_APSR();
   21326          putIRegT( rD, mkexpr(apsr), condT );
   21327          DIP("mrs r%u, cpsr\n", rD);
   21328          goto decode_success;
   21329       }
   21330    }
   21331 
   21332    /* ----------------- (T1) LDREX ----------------- */
   21333    if (INSN0(15,4) == 0xE85 && INSN1(11,8) == BITS4(1,1,1,1)) {
   21334       UInt rN   = INSN0(3,0);
   21335       UInt rT   = INSN1(15,12);
   21336       UInt imm8 = INSN1(7,0);
   21337       if (!isBadRegT(rT) && rN != 15) {
   21338          IRTemp res;
   21339          // go uncond
   21340          mk_skip_over_T32_if_cond_is_false( condT );
   21341          // now uncond
   21342          res = newTemp(Ity_I32);
   21343          stmt( IRStmt_LLSC(Iend_LE,
   21344                            res,
   21345                            binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
   21346                            NULL/*this is a load*/ ));
   21347          putIRegT(rT, mkexpr(res), IRTemp_INVALID);
   21348          DIP("ldrex r%u, [r%u, #+%u]\n", rT, rN, imm8 * 4);
   21349          goto decode_success;
   21350       }
   21351    }
   21352 
   21353    /* --------------- (T1) LDREX{B,H} --------------- */
   21354    if (INSN0(15,4) == 0xE8D
   21355        && (INSN1(11,0) == 0xF4F || INSN1(11,0) == 0xF5F)) {
   21356       UInt rN  = INSN0(3,0);
   21357       UInt rT  = INSN1(15,12);
   21358       Bool isH = INSN1(11,0) == 0xF5F;
   21359       if (!isBadRegT(rT) && rN != 15) {
   21360          IRTemp res;
   21361          // go uncond
   21362          mk_skip_over_T32_if_cond_is_false( condT );
   21363          // now uncond
   21364          res = newTemp(isH ? Ity_I16 : Ity_I8);
   21365          stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
   21366                            NULL/*this is a load*/ ));
   21367          putIRegT(rT, unop(isH ? Iop_16Uto32 : Iop_8Uto32, mkexpr(res)),
   21368                       IRTemp_INVALID);
   21369          DIP("ldrex%c r%u, [r%u]\n", isH ? 'h' : 'b', rT, rN);
   21370          goto decode_success;
   21371       }
   21372    }
   21373 
   21374    /* --------------- (T1) LDREXD --------------- */
   21375    if (INSN0(15,4) == 0xE8D && INSN1(7,0) == 0x7F) {
   21376       UInt rN  = INSN0(3,0);
   21377       UInt rT  = INSN1(15,12);
   21378       UInt rT2 = INSN1(11,8);
   21379       if (!isBadRegT(rT) && !isBadRegT(rT2) && rT != rT2 && rN != 15) {
   21380          IRTemp res;
   21381          // go uncond
   21382          mk_skip_over_T32_if_cond_is_false( condT );
   21383          // now uncond
   21384          res = newTemp(Ity_I64);
   21385          // FIXME: assumes little-endian guest
   21386          stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
   21387                            NULL/*this is a load*/ ));
   21388          // FIXME: assumes little-endian guest
   21389          putIRegT(rT,  unop(Iop_64to32,   mkexpr(res)), IRTemp_INVALID);
   21390          putIRegT(rT2, unop(Iop_64HIto32, mkexpr(res)), IRTemp_INVALID);
   21391          DIP("ldrexd r%u, r%u, [r%u]\n", rT, rT2, rN);
   21392          goto decode_success;
   21393       }
   21394    }
   21395 
   21396    /* ----------------- (T1) STREX ----------------- */
   21397    if (INSN0(15,4) == 0xE84) {
   21398       UInt rN   = INSN0(3,0);
   21399       UInt rT   = INSN1(15,12);
   21400       UInt rD   = INSN1(11,8);
   21401       UInt imm8 = INSN1(7,0);
   21402       if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15
   21403           && rD != rN && rD != rT) {
   21404          IRTemp resSC1, resSC32;
   21405          // go uncond
   21406          mk_skip_over_T32_if_cond_is_false( condT );
   21407          // now uncond
   21408          /* Ok, now we're unconditional.  Do the store. */
   21409          resSC1 = newTemp(Ity_I1);
   21410          stmt( IRStmt_LLSC(Iend_LE,
   21411                            resSC1,
   21412                            binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
   21413                            getIRegT(rT)) );
   21414          /* Set rD to 1 on failure, 0 on success.  Currently we have
   21415             resSC1 == 0 on failure, 1 on success. */
   21416          resSC32 = newTemp(Ity_I32);
   21417          assign(resSC32,
   21418                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
   21419          putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
   21420          DIP("strex r%u, r%u, [r%u, #+%u]\n", rD, rT, rN, imm8 * 4);
   21421          goto decode_success;
   21422       }
   21423    }
   21424 
   21425    /* --------------- (T1) STREX{B,H} --------------- */
   21426    if (INSN0(15,4) == 0xE8C
   21427        && (INSN1(11,4) == 0xF4 || INSN1(11,4) == 0xF5)) {
   21428       UInt rN  = INSN0(3,0);
   21429       UInt rT  = INSN1(15,12);
   21430       UInt rD  = INSN1(3,0);
   21431       Bool isH = INSN1(11,4) == 0xF5;
   21432       if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15
   21433           && rD != rN && rD != rT) {
   21434          IRTemp resSC1, resSC32;
   21435          // go uncond
   21436          mk_skip_over_T32_if_cond_is_false( condT );
   21437          // now uncond
   21438          /* Ok, now we're unconditional.  Do the store. */
   21439          resSC1 = newTemp(Ity_I1);
   21440          stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN),
   21441                            unop(isH ? Iop_32to16 : Iop_32to8,
   21442                                 getIRegT(rT))) );
   21443          /* Set rD to 1 on failure, 0 on success.  Currently we have
   21444             resSC1 == 0 on failure, 1 on success. */
   21445          resSC32 = newTemp(Ity_I32);
   21446          assign(resSC32,
   21447                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
   21448          putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
   21449          DIP("strex%c r%u, r%u, [r%u]\n", isH ? 'h' : 'b', rD, rT, rN);
   21450          goto decode_success;
   21451       }
   21452    }
   21453 
   21454    /* ---------------- (T1) STREXD ---------------- */
   21455    if (INSN0(15,4) == 0xE8C && INSN1(7,4) == BITS4(0,1,1,1)) {
   21456       UInt rN  = INSN0(3,0);
   21457       UInt rT  = INSN1(15,12);
   21458       UInt rT2 = INSN1(11,8);
   21459       UInt rD  = INSN1(3,0);
   21460       if (!isBadRegT(rD) && !isBadRegT(rT) && !isBadRegT(rT2)
   21461           && rN != 15 && rD != rN && rD != rT && rD != rT) {
   21462          IRTemp resSC1, resSC32, data;
   21463          // go uncond
   21464          mk_skip_over_T32_if_cond_is_false( condT );
   21465          // now uncond
   21466          /* Ok, now we're unconditional.  Do the store. */
   21467          resSC1 = newTemp(Ity_I1);
   21468          data = newTemp(Ity_I64);
   21469          // FIXME: assumes little-endian guest
   21470          assign(data, binop(Iop_32HLto64, getIRegT(rT2), getIRegT(rT)));
   21471          // FIXME: assumes little-endian guest
   21472          stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN), mkexpr(data)));
   21473          /* Set rD to 1 on failure, 0 on success.  Currently we have
   21474             resSC1 == 0 on failure, 1 on success. */
   21475          resSC32 = newTemp(Ity_I32);
   21476          assign(resSC32,
   21477                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
   21478          putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
   21479          DIP("strexd r%u, r%u, r%u, [r%u]\n", rD, rT, rT2, rN);
   21480          goto decode_success;
   21481       }
   21482    }
   21483 
   21484    /* -------------- v7 barrier insns -------------- */
   21485    if (INSN0(15,0) == 0xF3BF && (INSN1(15,0) & 0xFF00) == 0x8F00) {
   21486       /* FIXME: should this be unconditional? */
   21487       /* XXX this isn't really right, is it?  The generated IR does
   21488          them unconditionally.  I guess it doesn't matter since it
   21489          doesn't do any harm to do them even when the guarding
   21490          condition is false -- it's just a performance loss. */
   21491       switch (INSN1(7,0)) {
   21492          case 0x4F: /* DSB sy */
   21493          case 0x4E: /* DSB st */
   21494          case 0x4B: /* DSB ish */
   21495          case 0x4A: /* DSB ishst */
   21496          case 0x47: /* DSB nsh */
   21497          case 0x46: /* DSB nshst */
   21498          case 0x43: /* DSB osh */
   21499          case 0x42: /* DSB oshst */
   21500             stmt( IRStmt_MBE(Imbe_Fence) );
   21501             DIP("DSB\n");
   21502             goto decode_success;
   21503          case 0x5F: /* DMB sy */
   21504          case 0x5E: /* DMB st */
   21505          case 0x5B: /* DMB ish */
   21506          case 0x5A: /* DMB ishst */
   21507          case 0x57: /* DMB nsh */
   21508          case 0x56: /* DMB nshst */
   21509          case 0x53: /* DMB osh */
   21510          case 0x52: /* DMB oshst */
   21511             stmt( IRStmt_MBE(Imbe_Fence) );
   21512             DIP("DMB\n");
   21513             goto decode_success;
   21514          case 0x6F: /* ISB */
   21515             stmt( IRStmt_MBE(Imbe_Fence) );
   21516             DIP("ISB\n");
   21517             goto decode_success;
   21518          default:
   21519             break;
   21520       }
   21521    }
   21522 
   21523    /* ---------------------- PLD{,W} ---------------------- */
   21524    if ((INSN0(15,4) & 0xFFD) == 0xF89 && INSN1(15,12) == 0xF) {
   21525       /* FIXME: should this be unconditional? */
   21526       /* PLD/PLDW immediate, encoding T1 */
   21527       UInt rN    = INSN0(3,0);
   21528       UInt bW    = INSN0(5,5);
   21529       UInt imm12 = INSN1(11,0);
   21530       DIP("pld%s [r%u, #%u]\n", bW ? "w" : "",  rN, imm12);
   21531       goto decode_success;
   21532    }
   21533 
   21534    if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,8) == 0xFC) {
   21535       /* FIXME: should this be unconditional? */
   21536       /* PLD/PLDW immediate, encoding T2 */
   21537       UInt rN    = INSN0(3,0);
   21538       UInt bW    = INSN0(5,5);
   21539       UInt imm8  = INSN1(7,0);
   21540       DIP("pld%s [r%u, #-%u]\n", bW ? "w" : "",  rN, imm8);
   21541       goto decode_success;
   21542    }
   21543 
   21544    if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,6) == 0x3C0) {
   21545       /* FIXME: should this be unconditional? */
   21546       /* PLD/PLDW register, encoding T1 */
   21547       UInt rN   = INSN0(3,0);
   21548       UInt rM   = INSN1(3,0);
   21549       UInt bW   = INSN0(5,5);
   21550       UInt imm2 = INSN1(5,4);
   21551       if (!isBadRegT(rM)) {
   21552          DIP("pld%s [r%u, r%u, lsl %d]\n", bW ? "w" : "", rN, rM, imm2);
   21553          goto decode_success;
   21554       }
   21555       /* fall through */
   21556    }
   21557 
   21558    /* -------------- read CP15 TPIDRURO register ------------- */
   21559    /* mrc     p15, 0,  r0, c13, c0, 3  up to
   21560       mrc     p15, 0, r14, c13, c0, 3
   21561    */
   21562    /* I don't know whether this is really v7-only.  But anyway, we
   21563       have to support it since arm-linux uses TPIDRURO as a thread
   21564       state register. */
   21565    if ((INSN0(15,0) == 0xEE1D) && (INSN1(11,0) == 0x0F70)) {
   21566       /* FIXME: should this be unconditional? */
   21567       UInt rD = INSN1(15,12);
   21568       if (!isBadRegT(rD)) {
   21569          putIRegT(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32), IRTemp_INVALID);
   21570          DIP("mrc p15,0, r%u, c13, c0, 3\n", rD);
   21571          goto decode_success;
   21572       }
   21573       /* fall through */
   21574    }
   21575 
   21576    /* ------------------- CLREX ------------------ */
   21577    if (INSN0(15,0) == 0xF3BF && INSN1(15,0) == 0x8F2F) {
   21578       /* AFAICS, this simply cancels a (all?) reservations made by a
   21579          (any?) preceding LDREX(es).  Arrange to hand it through to
   21580          the back end. */
   21581       mk_skip_over_T32_if_cond_is_false( condT );
   21582       stmt( IRStmt_MBE(Imbe_CancelReservation) );
   21583       DIP("clrex\n");
   21584       goto decode_success;
   21585    }
   21586 
   21587    /* ------------------- NOP ------------------ */
   21588    if (INSN0(15,0) == 0xF3AF && INSN1(15,0) == 0x8000) {
   21589       DIP("nop\n");
   21590       goto decode_success;
   21591    }
   21592 
   21593    /* -------------- (T1) LDRT reg+#imm8 -------------- */
   21594    /* Load Register Unprivileged:
   21595       ldrt Rt, [Rn, #imm8]
   21596    */
   21597    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,1) && INSN0(5,4) == BITS2(0,1)
   21598        && INSN1(11,8) == BITS4(1,1,1,0)) {
   21599       UInt rT    = INSN1(15,12);
   21600       UInt rN    = INSN0(3,0);
   21601       UInt imm8  = INSN1(7,0);
   21602       Bool valid = True;
   21603       if (rN == 15 || isBadRegT(rT)) valid = False;
   21604       if (valid) {
   21605          put_ITSTATE(old_itstate);
   21606          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   21607          IRTemp newRt = newTemp(Ity_I32);
   21608          loadGuardedLE( newRt, ILGop_Ident32, ea, llGetIReg(rT), condT );
   21609          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   21610          put_ITSTATE(new_itstate);
   21611          DIP("ldrt r%u, [r%u, #%u]\n", rT, rN, imm8);
   21612          goto decode_success;
   21613       }
   21614    }
   21615 
   21616    /* -------------- (T1) STRT reg+#imm8 -------------- */
   21617    /* Store Register Unprivileged:
   21618       strt Rt, [Rn, #imm8]
   21619    */
   21620    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,1) && INSN0(5,4) == BITS2(0,0)
   21621        && INSN1(11,8) == BITS4(1,1,1,0)) {
   21622       UInt rT    = INSN1(15,12);
   21623       UInt rN    = INSN0(3,0);
   21624       UInt imm8  = INSN1(7,0);
   21625       Bool valid = True;
   21626       if (rN == 15 || isBadRegT(rT)) valid = False;
   21627       if (valid) {
   21628          put_ITSTATE(old_itstate);
   21629          IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   21630          storeGuardedLE( address, llGetIReg(rT), condT );
   21631          put_ITSTATE(new_itstate);
   21632          DIP("strt r%u, [r%u, #%u]\n", rT, rN, imm8);
   21633          goto decode_success;
   21634       }
   21635    }
   21636 
   21637    /* -------------- (T1) STRBT reg+#imm8 -------------- */
   21638    /* Store Register Byte Unprivileged:
   21639       strbt Rt, [Rn, #imm8]
   21640    */
   21641    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(0,0)
   21642        && INSN1(11,8) == BITS4(1,1,1,0)) {
   21643       UInt rT    = INSN1(15,12);
   21644       UInt rN    = INSN0(3,0);
   21645       UInt imm8  = INSN1(7,0);
   21646       Bool valid = True;
   21647       if (rN == 15 || isBadRegT(rT)) valid = False;
   21648       if (valid) {
   21649          put_ITSTATE(old_itstate);
   21650          IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   21651          IRExpr* data = unop(Iop_32to8, llGetIReg(rT));
   21652          storeGuardedLE( address, data, condT );
   21653          put_ITSTATE(new_itstate);
   21654          DIP("strbt r%u, [r%u, #%u]\n", rT, rN, imm8);
   21655          goto decode_success;
   21656       }
   21657    }
   21658 
   21659    /* -------------- (T1) LDRHT reg+#imm8 -------------- */
   21660    /* Load Register Halfword Unprivileged:
   21661       ldrht Rt, [Rn, #imm8]
   21662    */
   21663    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(1,1)
   21664        && INSN1(11,8) == BITS4(1,1,1,0)) {
   21665       UInt rN    = INSN0(3,0);
   21666       Bool valid = True;
   21667       if (rN == 15) {
   21668          /* In this case our instruction is LDRH (literal), in fact:
   21669             LDRH (literal) was realized earlier, so we don't want to
   21670             make it twice. */
   21671          valid = False;
   21672       }
   21673       UInt rT    = INSN1(15,12);
   21674       UInt imm8  = INSN1(7,0);
   21675       if (isBadRegT(rT)) valid = False;
   21676       if (valid) {
   21677          put_ITSTATE(old_itstate);
   21678          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   21679          IRTemp newRt = newTemp(Ity_I32);
   21680          loadGuardedLE( newRt, ILGop_16Uto32, ea, llGetIReg(rT), condT );
   21681          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   21682          put_ITSTATE(new_itstate);
   21683          DIP("ldrht r%u, [r%u, #%u]\n", rT, rN, imm8);
   21684          goto decode_success;
   21685       }
   21686    }
   21687 
   21688    /* -------------- (T1) LDRSHT reg+#imm8 -------------- */
   21689    /* Load Register Signed Halfword Unprivileged:
   21690       ldrsht Rt, [Rn, #imm8]
   21691    */
   21692    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(1,1)
   21693        && INSN1(11,8) == BITS4(1,1,1,0)) {
   21694       UInt rN    = INSN0(3,0);
   21695       Bool valid = True;
   21696       if (rN == 15) {
   21697          /* In this case our instruction is LDRSH (literal), in fact:
   21698             LDRSH (literal) was realized earlier, so we don't want to
   21699             make it twice. */
   21700          valid = False;
   21701       }
   21702       UInt rT    = INSN1(15,12);
   21703       UInt imm8  = INSN1(7,0);
   21704       if (isBadRegT(rT)) valid = False;
   21705       if (valid) {
   21706          put_ITSTATE(old_itstate);
   21707          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   21708          IRTemp newRt = newTemp(Ity_I32);
   21709          loadGuardedLE( newRt, ILGop_16Sto32, ea, llGetIReg(rT), condT );
   21710          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   21711          put_ITSTATE(new_itstate);
   21712          DIP("ldrsht r%u, [r%u, #%u]\n", rT, rN, imm8);
   21713          goto decode_success;
   21714       }
   21715    }
   21716 
   21717    /* -------------- (T1) STRHT reg+#imm8 -------------- */
   21718    /* Store Register Halfword Unprivileged:
   21719       strht Rt, [Rn, #imm8]
   21720    */
   21721    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(1,0)
   21722        && INSN1(11,8) == BITS4(1,1,1,0)) {
   21723       UInt rT    = INSN1(15,12);
   21724       UInt rN    = INSN0(3,0);
   21725       UInt imm8  = INSN1(7,0);
   21726       Bool valid = True;
   21727       if (rN == 15 || isBadRegT(rT)) valid = False;
   21728       if (valid) {
   21729          put_ITSTATE(old_itstate);
   21730          IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   21731          IRExpr* data = unop(Iop_32to16, llGetIReg(rT));
   21732          storeGuardedLE( address, data, condT );
   21733          put_ITSTATE(new_itstate);
   21734          DIP("strht r%u, [r%u, #%u]\n", rT, rN, imm8);
   21735          goto decode_success;
   21736       }
   21737    }
   21738 
   21739    /* -------------- (T1) LDRBT reg+#imm8 -------------- */
   21740    /* Load Register Byte Unprivileged:
   21741       ldrbt Rt, [Rn, #imm8]
   21742    */
   21743    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(0,1)
   21744        && INSN1(11,8) == BITS4(1,1,1,0)) {
   21745       UInt rN    = INSN0(3,0);
   21746       UInt rT    = INSN1(15,12);
   21747       UInt imm8  = INSN1(7,0);
   21748       Bool valid = True;
   21749       if (rN == 15 /* insn is LDRB (literal) */) valid = False;
   21750       if (isBadRegT(rT)) valid = False;
   21751       if (valid) {
   21752          put_ITSTATE(old_itstate);
   21753          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   21754          IRTemp newRt = newTemp(Ity_I32);
   21755          loadGuardedLE( newRt, ILGop_8Uto32, ea, llGetIReg(rT), condT );
   21756          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   21757          put_ITSTATE(new_itstate);
   21758          DIP("ldrbt r%u, [r%u, #%u]\n", rT, rN, imm8);
   21759          goto decode_success;
   21760       }
   21761    }
   21762 
   21763    /* -------------- (T1) LDRSBT reg+#imm8 -------------- */
   21764    /* Load Register Signed Byte Unprivileged:
   21765       ldrsbt Rt, [Rn, #imm8]
   21766    */
   21767    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(0,1)
   21768        && INSN1(11,8) == BITS4(1,1,1,0)) {
   21769       UInt rN    = INSN0(3,0);
   21770       Bool valid = True;
   21771       UInt rT    = INSN1(15,12);
   21772       UInt imm8  = INSN1(7,0);
   21773       if (rN == 15 /* insn is LDRSB (literal) */) valid = False;
   21774       if (isBadRegT(rT)) valid = False;
   21775       if (valid) {
   21776          put_ITSTATE(old_itstate);
   21777          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   21778          IRTemp newRt = newTemp(Ity_I32);
   21779          loadGuardedLE( newRt, ILGop_8Sto32, ea, llGetIReg(rT), condT );
   21780          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   21781          put_ITSTATE(new_itstate);
   21782          DIP("ldrsbt r%u, [r%u, #%u]\n", rT, rN, imm8);
   21783          goto decode_success;
   21784       }
   21785    }
   21786 
   21787    /* -------------- (T1) PLI reg+#imm12 -------------- */
   21788    /* Preload Instruction:
   21789       pli [Rn, #imm12]
   21790    */
   21791    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,1,0) && INSN0(5,4) == BITS2(0,1)
   21792        && INSN1(15,12) == BITS4(1,1,1,1)) {
   21793       UInt rN    = INSN0(3,0);
   21794       UInt imm12 = INSN1(11,0);
   21795       if (rN != 15) {
   21796          DIP("pli [r%u, #%u]\n", rN, imm12);
   21797          goto decode_success;
   21798       }
   21799    }
   21800 
   21801    /* -------------- (T2) PLI reg-#imm8 -------------- */
   21802    /* Preload Instruction:
   21803       pli [Rn, #-imm8]
   21804    */
   21805    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(0,1)
   21806        && INSN1(15,8) == BITS8(1,1,1,1,1,1,0,0)) {
   21807       UInt rN   = INSN0(3,0);
   21808       UInt imm8 = INSN1(7,0);
   21809       if (rN != 15) {
   21810          DIP("pli [r%u, #-%u]\n", rN, imm8);
   21811          goto decode_success;
   21812       }
   21813    }
   21814 
   21815    /* -------------- (T3) PLI PC+/-#imm12 -------------- */
   21816    /* Preload Instruction:
   21817       pli [PC, #+/-imm12]
   21818    */
   21819    if (INSN0(15,8) == BITS8(1,1,1,1,1,0,0,1)
   21820        && INSN0(6,0) == BITS7(0,0,1,1,1,1,1)
   21821        && INSN1(15,12) == BITS4(1,1,1,1)) {
   21822       UInt imm12 = INSN1(11,0);
   21823       UInt bU    = INSN0(7,7);
   21824       DIP("pli [pc, #%c%u]\n", bU == 1 ? '+' : '-', imm12);
   21825       goto decode_success;
   21826    }
   21827 
   21828    /* ----------------------------------------------------------- */
   21829    /* -- VFP (CP 10, CP 11) instructions (in Thumb mode)       -- */
   21830    /* ----------------------------------------------------------- */
   21831 
   21832    if (INSN0(15,12) == BITS4(1,1,1,0)) {
   21833       UInt insn28 = (INSN0(11,0) << 16) | INSN1(15,0);
   21834       Bool ok_vfp = decode_CP10_CP11_instruction (
   21835                        &dres, insn28, condT, ARMCondAL/*bogus*/,
   21836                        True/*isT*/
   21837                     );
   21838       if (ok_vfp)
   21839          goto decode_success;
   21840    }
   21841 
   21842    /* ----------------------------------------------------------- */
   21843    /* -- NEON instructions (in Thumb mode)                     -- */
   21844    /* ----------------------------------------------------------- */
   21845 
   21846    if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
   21847       UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
   21848       Bool ok_neon = decode_NEON_instruction(
   21849                         &dres, insn32, condT, True/*isT*/
   21850                      );
   21851       if (ok_neon)
   21852          goto decode_success;
   21853    }
   21854 
   21855    /* ----------------------------------------------------------- */
   21856    /* -- v6 media instructions (in Thumb mode)                 -- */
   21857    /* ----------------------------------------------------------- */
   21858 
   21859    { UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
   21860      Bool ok_v6m = decode_V6MEDIA_instruction(
   21861                       &dres, insn32, condT, ARMCondAL/*bogus*/,
   21862                       True/*isT*/
   21863                    );
   21864      if (ok_v6m)
   21865         goto decode_success;
   21866    }
   21867 
   21868    /* ----------------------------------------------------------- */
   21869    /* -- Undecodable                                           -- */
   21870    /* ----------------------------------------------------------- */
   21871 
   21872    goto decode_failure;
   21873    /*NOTREACHED*/
   21874 
   21875   decode_failure:
   21876    /* All decode failures end up here. */
   21877    if (sigill_diag)
   21878       vex_printf("disInstr(thumb): unhandled instruction: "
   21879                  "0x%04x 0x%04x\n", (UInt)insn0, (UInt)insn1);
   21880 
   21881    /* Back up ITSTATE to the initial value for this instruction.
   21882       If we don't do that, any subsequent restart of the instruction
   21883       will restart with the wrong value. */
   21884    if (old_itstate != IRTemp_INVALID)
   21885       put_ITSTATE(old_itstate);
   21886 
   21887    /* Tell the dispatcher that this insn cannot be decoded, and so has
   21888       not been executed, and (is currently) the next to be executed.
   21889       R15 should be up-to-date since it made so at the start of each
   21890       insn, but nevertheless be paranoid and update it again right
   21891       now. */
   21892    vassert(0 == (guest_R15_curr_instr_notENC & 1));
   21893    llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC | 1) );
   21894    dres.len         = 0;
   21895    dres.whatNext    = Dis_StopHere;
   21896    dres.jk_StopHere = Ijk_NoDecode;
   21897    dres.continueAt  = 0;
   21898    return dres;
   21899 
   21900   decode_success:
   21901    /* All decode successes end up here. */
   21902    vassert(dres.len == 4 || dres.len == 2 || dres.len == 20);
   21903    switch (dres.whatNext) {
   21904       case Dis_Continue:
   21905          llPutIReg(15, mkU32(dres.len + (guest_R15_curr_instr_notENC | 1)));
   21906          break;
   21907       case Dis_ResteerU:
   21908       case Dis_ResteerC:
   21909          llPutIReg(15, mkU32(dres.continueAt));
   21910          break;
   21911       case Dis_StopHere:
   21912          break;
   21913       default:
   21914          vassert(0);
   21915    }
   21916 
   21917    DIP("\n");
   21918 
   21919    return dres;
   21920 
   21921 #  undef INSN0
   21922 #  undef INSN1
   21923 }
   21924 
   21925 #undef DIP
   21926 #undef DIS
   21927 
   21928 
   21929 /* Helper table for figuring out how many insns an IT insn
   21930    conditionalises.
   21931 
   21932    An ITxyz instruction of the format "1011 1111 firstcond mask"
   21933    conditionalises some number of instructions, as indicated by the
   21934    following table.  A value of zero indicates the instruction is
   21935    invalid in some way.
   21936 
   21937    mask = 0 means this isn't an IT instruction
   21938    fc = 15 (NV) means unpredictable
   21939 
   21940    The line fc = 14 (AL) is different from the others; there are
   21941    additional constraints in this case.
   21942 
   21943           mask(0 ..                   15)
   21944         +--------------------------------
   21945    fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21946    ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21947         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21948         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21949         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21950         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21951         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21952         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21953         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21954         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21955         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21956         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21957         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21958         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21959         | 0 4 3 0 2 0 0 0 1 0 0 0 0 0 0 0
   21960    15)  | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
   21961 
   21962    To be conservative with the analysis, let's rule out the mask = 0
   21963    case, since that isn't an IT insn at all.  But for all the other
   21964    cases where the table contains zero, that means unpredictable, so
   21965    let's say 4 to be conservative.  Hence we have a safe value for any
   21966    IT (mask,fc) pair that the CPU would actually identify as an IT
   21967    instruction.  The final table is
   21968 
   21969           mask(0 ..                   15)
   21970         +--------------------------------
   21971    fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21972    ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21973         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21974         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21975         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21976         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21977         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21978         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21979         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21980         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21981         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21982         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21983         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21984         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   21985         | 0 4 3 4 2 4 4 4 1 4 4 4 4 4 4 4
   21986    15)  | 0 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
   21987 */
   21988 static const UChar it_length_table[256]
   21989    = { 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   21990        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   21991        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   21992        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   21993        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   21994        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   21995        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   21996        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   21997        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   21998        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   21999        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   22000        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   22001        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   22002        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   22003        0, 4, 3, 4, 2, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4,
   22004        0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
   22005      };
   22006 
   22007 
   22008 /*------------------------------------------------------------*/
   22009 /*--- Top-level fn                                         ---*/
   22010 /*------------------------------------------------------------*/
   22011 
   22012 /* Disassemble a single instruction into IR.  The instruction
   22013    is located in host memory at &guest_code[delta]. */
   22014 
   22015 DisResult disInstr_ARM ( IRSB*        irsb_IN,
   22016                          Bool         (*resteerOkFn) ( void*, Addr ),
   22017                          Bool         resteerCisOk,
   22018                          void*        callback_opaque,
   22019                          const UChar* guest_code_IN,
   22020                          Long         delta_ENCODED,
   22021                          Addr         guest_IP_ENCODED,
   22022                          VexArch      guest_arch,
   22023                          const VexArchInfo* archinfo,
   22024                          const VexAbiInfo*  abiinfo,
   22025                          VexEndness   host_endness_IN,
   22026                          Bool         sigill_diag_IN )
   22027 {
   22028    DisResult dres;
   22029    Bool isThumb = (Bool)(guest_IP_ENCODED & 1);
   22030 
   22031    /* Set globals (see top of this file) */
   22032    vassert(guest_arch == VexArchARM);
   22033 
   22034    irsb            = irsb_IN;
   22035    host_endness    = host_endness_IN;
   22036    __curr_is_Thumb = isThumb;
   22037 
   22038    if (isThumb) {
   22039       guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED - 1;
   22040    } else {
   22041       guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED;
   22042    }
   22043 
   22044    if (isThumb) {
   22045       dres = disInstr_THUMB_WRK ( resteerOkFn,
   22046                                   resteerCisOk, callback_opaque,
   22047                                   &guest_code_IN[delta_ENCODED - 1],
   22048                                   archinfo, abiinfo, sigill_diag_IN );
   22049    } else {
   22050       dres = disInstr_ARM_WRK ( resteerOkFn,
   22051                                 resteerCisOk, callback_opaque,
   22052                                 &guest_code_IN[delta_ENCODED],
   22053                                 archinfo, abiinfo, sigill_diag_IN );
   22054    }
   22055 
   22056    return dres;
   22057 }
   22058 
   22059 /* Test program for the conversion of IRCmpF64Result values to VFP
   22060    nzcv values.  See handling of FCMPD et al above. */
   22061 /*
   22062 UInt foo ( UInt x )
   22063 {
   22064    UInt ix    = ((x >> 5) & 3) | (x & 1);
   22065    UInt termL = (((((ix ^ 1) << 30) - 1) >> 29) + 1);
   22066    UInt termR = (ix & (ix >> 1) & 1);
   22067    return termL  -  termR;
   22068 }
   22069 
   22070 void try ( char* s, UInt ir, UInt req )
   22071 {
   22072    UInt act = foo(ir);
   22073    printf("%s 0x%02x -> req %d%d%d%d act %d%d%d%d (0x%x)\n",
   22074           s, ir, (req >> 3) & 1, (req >> 2) & 1,
   22075                  (req >> 1) & 1, (req >> 0) & 1,
   22076                  (act >> 3) & 1, (act >> 2) & 1,
   22077                  (act >> 1) & 1, (act >> 0) & 1, act);
   22078 
   22079 }
   22080 
   22081 int main ( void )
   22082 {
   22083    printf("\n");
   22084    try("UN", 0x45, 0b0011);
   22085    try("LT", 0x01, 0b1000);
   22086    try("GT", 0x00, 0b0010);
   22087    try("EQ", 0x40, 0b0110);
   22088    printf("\n");
   22089    return 0;
   22090 }
   22091 */
   22092 
   22093 /* Spare code for doing reference implementations of various 64-bit
   22094    SIMD interleaves/deinterleaves/concatenation ops. */
   22095 /*
   22096 // Split a 64 bit value into 4 16 bit ones, in 32-bit IRTemps with
   22097 // the top halves guaranteed to be zero.
   22098 static void break64to16s ( IRTemp* out3, IRTemp* out2, IRTemp* out1,
   22099                            IRTemp* out0, IRTemp v64 )
   22100 {
   22101   if (out3) *out3 = newTemp(Ity_I32);
   22102   if (out2) *out2 = newTemp(Ity_I32);
   22103   if (out1) *out1 = newTemp(Ity_I32);
   22104   if (out0) *out0 = newTemp(Ity_I32);
   22105   IRTemp hi32 = newTemp(Ity_I32);
   22106   IRTemp lo32 = newTemp(Ity_I32);
   22107   assign(hi32, unop(Iop_64HIto32, mkexpr(v64)) );
   22108   assign(lo32, unop(Iop_64to32, mkexpr(v64)) );
   22109   if (out3) assign(*out3, binop(Iop_Shr32, mkexpr(hi32), mkU8(16)));
   22110   if (out2) assign(*out2, binop(Iop_And32, mkexpr(hi32), mkU32(0xFFFF)));
   22111   if (out1) assign(*out1, binop(Iop_Shr32, mkexpr(lo32), mkU8(16)));
   22112   if (out0) assign(*out0, binop(Iop_And32, mkexpr(lo32), mkU32(0xFFFF)));
   22113 }
   22114 
   22115 // Make a 64 bit value from 4 16 bit ones, each of which is in a 32 bit
   22116 // IRTemp.
   22117 static IRTemp mk64from16s ( IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
   22118 {
   22119   IRTemp hi32 = newTemp(Ity_I32);
   22120   IRTemp lo32 = newTemp(Ity_I32);
   22121   assign(hi32,
   22122          binop(Iop_Or32,
   22123                binop(Iop_Shl32, mkexpr(in3), mkU8(16)),
   22124                binop(Iop_And32, mkexpr(in2), mkU32(0xFFFF))));
   22125   assign(lo32,
   22126          binop(Iop_Or32,
   22127                binop(Iop_Shl32, mkexpr(in1), mkU8(16)),
   22128                binop(Iop_And32, mkexpr(in0), mkU32(0xFFFF))));
   22129   IRTemp res = newTemp(Ity_I64);
   22130   assign(res, binop(Iop_32HLto64, mkexpr(hi32), mkexpr(lo32)));
   22131   return res;
   22132 }
   22133 
   22134 static IRExpr* mk_InterleaveLO16x4 ( IRTemp a3210, IRTemp b3210 )
   22135 {
   22136   // returns a1 b1 a0 b0
   22137   IRTemp a1, a0, b1, b0;
   22138   break64to16s(NULL, NULL, &a1, &a0, a3210);
   22139   break64to16s(NULL, NULL, &b1, &b0, b3210);
   22140   return mkexpr(mk64from16s(a1, b1, a0, b0));
   22141 }
   22142 
   22143 static IRExpr* mk_InterleaveHI16x4 ( IRTemp a3210, IRTemp b3210 )
   22144 {
   22145   // returns a3 b3 a2 b2
   22146   IRTemp a3, a2, b3, b2;
   22147   break64to16s(&a3, &a2, NULL, NULL, a3210);
   22148   break64to16s(&b3, &b2, NULL, NULL, b3210);
   22149   return mkexpr(mk64from16s(a3, b3, a2, b2));
   22150 }
   22151 
   22152 static IRExpr* mk_CatEvenLanes16x4 ( IRTemp a3210, IRTemp b3210 )
   22153 {
   22154   // returns a2 a0 b2 b0
   22155   IRTemp a2, a0, b2, b0;
   22156   break64to16s(NULL, &a2, NULL, &a0, a3210);
   22157   break64to16s(NULL, &b2, NULL, &b0, b3210);
   22158   return mkexpr(mk64from16s(a2, a0, b2, b0));
   22159 }
   22160 
   22161 static IRExpr* mk_CatOddLanes16x4 ( IRTemp a3210, IRTemp b3210 )
   22162 {
   22163   // returns a3 a1 b3 b1
   22164   IRTemp a3, a1, b3, b1;
   22165   break64to16s(&a3, NULL, &a1, NULL, a3210);
   22166   break64to16s(&b3, NULL, &b1, NULL, b3210);
   22167   return mkexpr(mk64from16s(a3, a1, b3, b1));
   22168 }
   22169 
   22170 static IRExpr* mk_InterleaveOddLanes16x4 ( IRTemp a3210, IRTemp b3210 )
   22171 {
   22172   // returns a3 b3 a1 b1
   22173   IRTemp a3, b3, a1, b1;
   22174   break64to16s(&a3, NULL, &a1, NULL, a3210);
   22175   break64to16s(&b3, NULL, &b1, NULL, b3210);
   22176   return mkexpr(mk64from16s(a3, b3, a1, b1));
   22177 }
   22178 
   22179 static IRExpr* mk_InterleaveEvenLanes16x4 ( IRTemp a3210, IRTemp b3210 )
   22180 {
   22181   // returns a2 b2 a0 b0
   22182   IRTemp a2, b2, a0, b0;
   22183   break64to16s(NULL, &a2, NULL, &a0, a3210);
   22184   break64to16s(NULL, &b2, NULL, &b0, b3210);
   22185   return mkexpr(mk64from16s(a2, b2, a0, b0));
   22186 }
   22187 
   22188 static void break64to8s ( IRTemp* out7, IRTemp* out6, IRTemp* out5,
   22189                           IRTemp* out4, IRTemp* out3, IRTemp* out2,
   22190                           IRTemp* out1,IRTemp* out0, IRTemp v64 )
   22191 {
   22192   if (out7) *out7 = newTemp(Ity_I32);
   22193   if (out6) *out6 = newTemp(Ity_I32);
   22194   if (out5) *out5 = newTemp(Ity_I32);
   22195   if (out4) *out4 = newTemp(Ity_I32);
   22196   if (out3) *out3 = newTemp(Ity_I32);
   22197   if (out2) *out2 = newTemp(Ity_I32);
   22198   if (out1) *out1 = newTemp(Ity_I32);
   22199   if (out0) *out0 = newTemp(Ity_I32);
   22200   IRTemp hi32 = newTemp(Ity_I32);
   22201   IRTemp lo32 = newTemp(Ity_I32);
   22202   assign(hi32, unop(Iop_64HIto32, mkexpr(v64)) );
   22203   assign(lo32, unop(Iop_64to32, mkexpr(v64)) );
   22204   if (out7)
   22205     assign(*out7, binop(Iop_And32,
   22206                         binop(Iop_Shr32, mkexpr(hi32), mkU8(24)),
   22207                         mkU32(0xFF)));
   22208   if (out6)
   22209     assign(*out6, binop(Iop_And32,
   22210                         binop(Iop_Shr32, mkexpr(hi32), mkU8(16)),
   22211                         mkU32(0xFF)));
   22212   if (out5)
   22213     assign(*out5, binop(Iop_And32,
   22214                         binop(Iop_Shr32, mkexpr(hi32), mkU8(8)),
   22215                         mkU32(0xFF)));
   22216   if (out4)
   22217     assign(*out4, binop(Iop_And32, mkexpr(hi32), mkU32(0xFF)));
   22218   if (out3)
   22219     assign(*out3, binop(Iop_And32,
   22220                         binop(Iop_Shr32, mkexpr(lo32), mkU8(24)),
   22221                         mkU32(0xFF)));
   22222   if (out2)
   22223     assign(*out2, binop(Iop_And32,
   22224                         binop(Iop_Shr32, mkexpr(lo32), mkU8(16)),
   22225                         mkU32(0xFF)));
   22226   if (out1)
   22227     assign(*out1, binop(Iop_And32,
   22228                         binop(Iop_Shr32, mkexpr(lo32), mkU8(8)),
   22229                         mkU32(0xFF)));
   22230   if (out0)
   22231     assign(*out0, binop(Iop_And32, mkexpr(lo32), mkU32(0xFF)));
   22232 }
   22233 
   22234 static IRTemp mk64from8s ( IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4,
   22235                            IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
   22236 {
   22237   IRTemp hi32 = newTemp(Ity_I32);
   22238   IRTemp lo32 = newTemp(Ity_I32);
   22239   assign(hi32,
   22240          binop(Iop_Or32,
   22241                binop(Iop_Or32,
   22242                      binop(Iop_Shl32,
   22243                            binop(Iop_And32, mkexpr(in7), mkU32(0xFF)),
   22244                            mkU8(24)),
   22245                      binop(Iop_Shl32,
   22246                            binop(Iop_And32, mkexpr(in6), mkU32(0xFF)),
   22247                            mkU8(16))),
   22248                binop(Iop_Or32,
   22249                      binop(Iop_Shl32,
   22250                            binop(Iop_And32, mkexpr(in5), mkU32(0xFF)), mkU8(8)),
   22251                      binop(Iop_And32,
   22252                            mkexpr(in4), mkU32(0xFF)))));
   22253   assign(lo32,
   22254          binop(Iop_Or32,
   22255                binop(Iop_Or32,
   22256                      binop(Iop_Shl32,
   22257                            binop(Iop_And32, mkexpr(in3), mkU32(0xFF)),
   22258                            mkU8(24)),
   22259                      binop(Iop_Shl32,
   22260                            binop(Iop_And32, mkexpr(in2), mkU32(0xFF)),
   22261                            mkU8(16))),
   22262                binop(Iop_Or32,
   22263                      binop(Iop_Shl32,
   22264                            binop(Iop_And32, mkexpr(in1), mkU32(0xFF)), mkU8(8)),
   22265                      binop(Iop_And32,
   22266                            mkexpr(in0), mkU32(0xFF)))));
   22267   IRTemp res = newTemp(Ity_I64);
   22268   assign(res, binop(Iop_32HLto64, mkexpr(hi32), mkexpr(lo32)));
   22269   return res;
   22270 }
   22271 
   22272 static IRExpr* mk_InterleaveLO8x8 ( IRTemp a76543210, IRTemp b76543210 )
   22273 {
   22274   // returns a3 b3 a2 b2 a1 b1 a0 b0
   22275   IRTemp a3, b3, a2, b2, a1, a0, b1, b0;
   22276   break64to8s(NULL, NULL, NULL, NULL, &a3, &a2, &a1, &a0, a76543210);
   22277   break64to8s(NULL, NULL, NULL, NULL, &b3, &b2, &b1, &b0, b76543210);
   22278   return mkexpr(mk64from8s(a3, b3, a2, b2, a1, b1, a0, b0));
   22279 }
   22280 
   22281 static IRExpr* mk_InterleaveHI8x8 ( IRTemp a76543210, IRTemp b76543210 )
   22282 {
   22283   // returns a7 b7 a6 b6 a5 b5 a4 b4
   22284   IRTemp a7, b7, a6, b6, a5, b5, a4, b4;
   22285   break64to8s(&a7, &a6, &a5, &a4, NULL, NULL, NULL, NULL, a76543210);
   22286   break64to8s(&b7, &b6, &b5, &b4, NULL, NULL, NULL, NULL, b76543210);
   22287   return mkexpr(mk64from8s(a7, b7, a6, b6, a5, b5, a4, b4));
   22288 }
   22289 
   22290 static IRExpr* mk_CatEvenLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
   22291 {
   22292   // returns a6 a4 a2 a0 b6 b4 b2 b0
   22293   IRTemp a6, a4, a2, a0, b6, b4, b2, b0;
   22294   break64to8s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
   22295   break64to8s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
   22296   return mkexpr(mk64from8s(a6, a4, a2, a0, b6, b4, b2, b0));
   22297 }
   22298 
   22299 static IRExpr* mk_CatOddLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
   22300 {
   22301   // returns a7 a5 a3 a1 b7 b5 b3 b1
   22302   IRTemp a7, a5, a3, a1, b7, b5, b3, b1;
   22303   break64to8s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
   22304   break64to8s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
   22305   return mkexpr(mk64from8s(a7, a5, a3, a1, b7, b5, b3, b1));
   22306 }
   22307 
   22308 static IRExpr* mk_InterleaveEvenLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
   22309 {
   22310   // returns a6 b6 a4 b4 a2 b2 a0 b0
   22311   IRTemp a6, b6, a4, b4, a2, b2, a0, b0;
   22312   break64to8s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
   22313   break64to8s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
   22314   return mkexpr(mk64from8s(a6, b6, a4, b4, a2, b2, a0, b0));
   22315 }
   22316 
   22317 static IRExpr* mk_InterleaveOddLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
   22318 {
   22319   // returns a7 b7 a5 b5 a3 b3 a1 b1
   22320   IRTemp a7, b7, a5, b5, a3, b3, a1, b1;
   22321   break64to8s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
   22322   break64to8s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
   22323   return mkexpr(mk64from8s(a7, b7, a5, b5, a3, b3, a1, b1));
   22324 }
   22325 
   22326 static IRExpr* mk_InterleaveLO32x2 ( IRTemp a10, IRTemp b10 )
   22327 {
   22328   // returns a0 b0
   22329   return binop(Iop_32HLto64, unop(Iop_64to32, mkexpr(a10)),
   22330                              unop(Iop_64to32, mkexpr(b10)));
   22331 }
   22332 
   22333 static IRExpr* mk_InterleaveHI32x2 ( IRTemp a10, IRTemp b10 )
   22334 {
   22335   // returns a1 b1
   22336   return binop(Iop_32HLto64, unop(Iop_64HIto32, mkexpr(a10)),
   22337                              unop(Iop_64HIto32, mkexpr(b10)));
   22338 }
   22339 */
   22340 
   22341 /*--------------------------------------------------------------------*/
   22342 /*--- end                                         guest_arm_toIR.c ---*/
   22343 /*--------------------------------------------------------------------*/
   22344