Home | History | Annotate | Download | only in priv
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- begin                                       guest_arm_toIR.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2017 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    NEON support is
     14    Copyright (C) 2010-2017 Samsung Electronics
     15    contributed by Dmitry Zhurikhin <zhur (at) ispras.ru>
     16               and Kirill Batuzov <batuzovk (at) ispras.ru>
     17 
     18    This program is free software; you can redistribute it and/or
     19    modify it under the terms of the GNU General Public License as
     20    published by the Free Software Foundation; either version 2 of the
     21    License, or (at your option) any later version.
     22 
     23    This program is distributed in the hope that it will be useful, but
     24    WITHOUT ANY WARRANTY; without even the implied warranty of
     25    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     26    General Public License for more details.
     27 
     28    You should have received a copy of the GNU General Public License
     29    along with this program; if not, write to the Free Software
     30    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     31    02110-1301, USA.
     32 
     33    The GNU General Public License is contained in the file COPYING.
     34 */
     35 
     36 /* XXXX thumb to check:
     37    that all cases where putIRegT writes r15, we generate a jump.
     38 
     39    All uses of newTemp assign to an IRTemp and not a UInt
     40 
     41    For all thumb loads and stores, including VFP ones, new-ITSTATE is
     42    backed out before the memory op, and restored afterwards.  This
     43    needs to happen even after we go uncond.  (and for sure it doesn't
     44    happen for VFP loads/stores right now).
     45 
     46    VFP on thumb: check that we exclude all r13/r15 cases that we
     47    should.
     48 
     49    XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
     50    taking into account the number of insns guarded by an IT.
     51 
     52    remove the nasty hack, in the spechelper, of looking for Or32(...,
     53    0xE0) in as the first arg to armg_calculate_condition, and instead
     54    use Slice44 as specified in comments in the spechelper.
     55 
     56    add specialisations for armg_calculate_flag_c and _v, as they
     57    are moderately often needed in Thumb code.
     58 
     59    Correctness: ITSTATE handling in Thumb SVCs is wrong.
     60 
     61    Correctness (obscure): in m_transtab, when invalidating code
     62    address ranges, invalidate up to 18 bytes after the end of the
     63    range.  This is because the ITSTATE optimisation at the top of
     64    _THUMB_WRK below analyses up to 18 bytes before the start of any
     65    given instruction, and so might depend on the invalidated area.
     66 */
     67 
     68 /* Limitations, etc
     69 
     70    - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD.
     71      These instructions are non-restartable in the case where the
     72      transfer(s) fault.
     73 
     74    - SWP: the restart jump back is Ijk_Boring; it should be
     75      Ijk_NoRedir but that's expensive.  See comments on casLE() in
     76      guest_x86_toIR.c.
     77 */
     78 
     79 /* "Special" instructions.
     80 
     81    This instruction decoder can decode four special instructions
     82    which mean nothing natively (are no-ops as far as regs/mem are
     83    concerned) but have meaning for supporting Valgrind.  A special
     84    instruction is flagged by a 16-byte preamble:
     85 
     86       E1A0C1EC E1A0C6EC E1A0CEEC E1A0C9EC
     87       (mov r12, r12, ROR #3;   mov r12, r12, ROR #13;
     88        mov r12, r12, ROR #29;  mov r12, r12, ROR #19)
     89 
     90    Following that, one of the following 3 are allowed
     91    (standard interpretation in parentheses):
     92 
     93       E18AA00A (orr r10,r10,r10)   R3 = client_request ( R4 )
     94       E18BB00B (orr r11,r11,r11)   R3 = guest_NRADDR
     95       E18CC00C (orr r12,r12,r12)   branch-and-link-to-noredir R4
     96       E1899009 (orr r9,r9,r9)      IR injection
     97 
     98    Any other bytes following the 16-byte preamble are illegal and
     99    constitute a failure in instruction decoding.  This all assumes
    100    that the preamble will never occur except in specific code
    101    fragments designed for Valgrind to catch.
    102 */
    103 
    104 /* Translates ARM(v5) code to IR. */
    105 
    106 #include "libvex_basictypes.h"
    107 #include "libvex_ir.h"
    108 #include "libvex.h"
    109 #include "libvex_guest_arm.h"
    110 
    111 #include "main_util.h"
    112 #include "main_globals.h"
    113 #include "guest_generic_bb_to_IR.h"
    114 #include "guest_arm_defs.h"
    115 
    116 
    117 /*------------------------------------------------------------*/
    118 /*--- Globals                                              ---*/
    119 /*------------------------------------------------------------*/
    120 
    121 /* These are set at the start of the translation of a instruction, so
    122    that we don't have to pass them around endlessly.  CONST means does
    123    not change during translation of the instruction.
    124 */
    125 
    126 /* CONST: what is the host's endianness?  This has to do with float vs
    127    double register accesses on VFP, but it's complex and not properly
    128    thought out. */
    129 static VexEndness host_endness;
    130 
    131 /* CONST: The guest address for the instruction currently being
    132    translated.  This is the real, "decoded" address (not subject
    133    to the CPSR.T kludge). */
    134 static Addr32 guest_R15_curr_instr_notENC;
    135 
    136 /* CONST, FOR ASSERTIONS ONLY.  Indicates whether currently processed
    137    insn is Thumb (True) or ARM (False). */
    138 static Bool __curr_is_Thumb;
    139 
    140 /* MOD: The IRSB* into which we're generating code. */
    141 static IRSB* irsb;
    142 
    143 /* These are to do with handling writes to r15.  They are initially
    144    set at the start of disInstr_ARM_WRK to indicate no update,
    145    possibly updated during the routine, and examined again at the end.
    146    If they have been set to indicate a r15 update then a jump is
    147    generated.  Note, "explicit" jumps (b, bx, etc) are generated
    148    directly, not using this mechanism -- this is intended to handle
    149    the implicit-style jumps resulting from (eg) assigning to r15 as
    150    the result of insns we wouldn't normally consider branchy. */
    151 
    152 /* MOD.  Initially False; set to True iff abovementioned handling is
    153    required. */
    154 static Bool r15written;
    155 
    156 /* MOD.  Initially IRTemp_INVALID.  If the r15 branch to be generated
    157    is conditional, this holds the gating IRTemp :: Ity_I32.  If the
    158    branch to be generated is unconditional, this remains
    159    IRTemp_INVALID. */
    160 static IRTemp r15guard; /* :: Ity_I32, 0 or 1 */
    161 
    162 /* MOD.  Initially Ijk_Boring.  If an r15 branch is to be generated,
    163    this holds the jump kind. */
    164 static IRTemp r15kind;
    165 
    166 
    167 /*------------------------------------------------------------*/
    168 /*--- Debugging output                                     ---*/
    169 /*------------------------------------------------------------*/
    170 
    171 #define DIP(format, args...)           \
    172    if (vex_traceflags & VEX_TRACE_FE)  \
    173       vex_printf(format, ## args)
    174 
    175 #define DIS(buf, format, args...)      \
    176    if (vex_traceflags & VEX_TRACE_FE)  \
    177       vex_sprintf(buf, format, ## args)
    178 
    179 #define ASSERT_IS_THUMB \
    180    do { vassert(__curr_is_Thumb); } while (0)
    181 
    182 #define ASSERT_IS_ARM \
    183    do { vassert(! __curr_is_Thumb); } while (0)
    184 
    185 
    186 /*------------------------------------------------------------*/
    187 /*--- Helper bits and pieces for deconstructing the        ---*/
    188 /*--- arm insn stream.                                     ---*/
    189 /*------------------------------------------------------------*/
    190 
    191 /* Do a little-endian load of a 32-bit word, regardless of the
    192    endianness of the underlying host. */
    193 static inline UInt getUIntLittleEndianly ( const UChar* p )
    194 {
    195    UInt w = 0;
    196    w = (w << 8) | p[3];
    197    w = (w << 8) | p[2];
    198    w = (w << 8) | p[1];
    199    w = (w << 8) | p[0];
    200    return w;
    201 }
    202 
    203 /* Do a little-endian load of a 16-bit word, regardless of the
    204    endianness of the underlying host. */
    205 static inline UShort getUShortLittleEndianly ( const UChar* p )
    206 {
    207    UShort w = 0;
    208    w = (w << 8) | p[1];
    209    w = (w << 8) | p[0];
    210    return w;
    211 }
    212 
    213 static UInt ROR32 ( UInt x, UInt sh ) {
    214    vassert(sh >= 0 && sh < 32);
    215    if (sh == 0)
    216       return x;
    217    else
    218       return (x << (32-sh)) | (x >> sh);
    219 }
    220 
    221 static Int popcount32 ( UInt x )
    222 {
    223    Int res = 0, i;
    224    for (i = 0; i < 32; i++) {
    225       res += (x & 1);
    226       x >>= 1;
    227    }
    228    return res;
    229 }
    230 
    231 static UInt setbit32 ( UInt x, Int ix, UInt b )
    232 {
    233    UInt mask = 1 << ix;
    234    x &= ~mask;
    235    x |= ((b << ix) & mask);
    236    return x;
    237 }
    238 
    239 #define BITS2(_b1,_b0) \
    240    (((_b1) << 1) | (_b0))
    241 
    242 #define BITS3(_b2,_b1,_b0)                      \
    243   (((_b2) << 2) | ((_b1) << 1) | (_b0))
    244 
    245 #define BITS4(_b3,_b2,_b1,_b0) \
    246    (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
    247 
    248 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    249    ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
    250     | BITS4((_b3),(_b2),(_b1),(_b0)))
    251 
    252 #define BITS5(_b4,_b3,_b2,_b1,_b0)  \
    253    (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
    254 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0)  \
    255    (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    256 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    257    (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    258 
    259 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)      \
    260    (((_b8) << 8) \
    261     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    262 
    263 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    264    (((_b9) << 9) | ((_b8) << 8)                                \
    265     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    266 
    267 #define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    268    ( ((_b10) << 10) | ((_b9) << 9) | ((_b8) << 8)              \
    269     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    270 
    271 #define BITS12(_b11,_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    272    ( ((_b11) << 11) | ((_b10) << 10) | ((_b9) << 9) | ((_b8) << 8) \
    273     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    274 
    275 /* produces _uint[_bMax:_bMin] */
    276 #define SLICE_UInt(_uint,_bMax,_bMin) \
    277    (( ((UInt)(_uint)) >> (_bMin)) \
    278     & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
    279 
    280 
    281 /*------------------------------------------------------------*/
    282 /*--- Helper bits and pieces for creating IR fragments.    ---*/
    283 /*------------------------------------------------------------*/
    284 
    285 static IRExpr* mkU64 ( ULong i )
    286 {
    287    return IRExpr_Const(IRConst_U64(i));
    288 }
    289 
    290 static IRExpr* mkU32 ( UInt i )
    291 {
    292    return IRExpr_Const(IRConst_U32(i));
    293 }
    294 
    295 static IRExpr* mkU8 ( UInt i )
    296 {
    297    vassert(i < 256);
    298    return IRExpr_Const(IRConst_U8( (UChar)i ));
    299 }
    300 
    301 static IRExpr* mkexpr ( IRTemp tmp )
    302 {
    303    return IRExpr_RdTmp(tmp);
    304 }
    305 
    306 static IRExpr* unop ( IROp op, IRExpr* a )
    307 {
    308    return IRExpr_Unop(op, a);
    309 }
    310 
    311 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
    312 {
    313    return IRExpr_Binop(op, a1, a2);
    314 }
    315 
    316 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
    317 {
    318    return IRExpr_Triop(op, a1, a2, a3);
    319 }
    320 
    321 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
    322 {
    323    return IRExpr_Load(Iend_LE, ty, addr);
    324 }
    325 
    326 /* Add a statement to the list held by "irbb". */
    327 static void stmt ( IRStmt* st )
    328 {
    329    addStmtToIRSB( irsb, st );
    330 }
    331 
    332 static void assign ( IRTemp dst, IRExpr* e )
    333 {
    334    stmt( IRStmt_WrTmp(dst, e) );
    335 }
    336 
    337 static void storeLE ( IRExpr* addr, IRExpr* data )
    338 {
    339    stmt( IRStmt_Store(Iend_LE, addr, data) );
    340 }
    341 
    342 static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
    343 {
    344    if (guardT == IRTemp_INVALID) {
    345       /* unconditional */
    346       storeLE(addr, data);
    347    } else {
    348       stmt( IRStmt_StoreG(Iend_LE, addr, data,
    349                           binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
    350    }
    351 }
    352 
    353 static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
    354                             IRExpr* addr, IRExpr* alt,
    355                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
    356 {
    357    if (guardT == IRTemp_INVALID) {
    358       /* unconditional */
    359       IRExpr* loaded = NULL;
    360       switch (cvt) {
    361          case ILGop_Ident32:
    362             loaded = loadLE(Ity_I32, addr); break;
    363          case ILGop_8Uto32:
    364             loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
    365          case ILGop_8Sto32:
    366             loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
    367          case ILGop_16Uto32:
    368             loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
    369          case ILGop_16Sto32:
    370             loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
    371          default:
    372             vassert(0);
    373       }
    374       vassert(loaded != NULL);
    375       assign(dst, loaded);
    376    } else {
    377       /* Generate a guarded load into 'dst', but apply 'cvt' to the
    378          loaded data before putting the data in 'dst'.  If the load
    379          does not take place, 'alt' is placed directly in 'dst'. */
    380       stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
    381                          binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
    382    }
    383 }
    384 
    385 /* Generate a new temporary of the given type. */
    386 static IRTemp newTemp ( IRType ty )
    387 {
    388    vassert(isPlausibleIRType(ty));
    389    return newIRTemp( irsb->tyenv, ty );
    390 }
    391 
    392 /* Produces a value in 0 .. 3, which is encoded as per the type
    393    IRRoundingMode. */
    394 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
    395 {
    396    return mkU32(Irrm_NEAREST);
    397 }
    398 
    399 /* Generate an expression for SRC rotated right by ROT. */
    400 static IRExpr* genROR32( IRTemp src, Int rot )
    401 {
    402    vassert(rot >= 0 && rot < 32);
    403    if (rot == 0)
    404       return mkexpr(src);
    405    return
    406       binop(Iop_Or32,
    407             binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
    408             binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
    409 }
    410 
    411 static IRExpr* mkU128 ( ULong i )
    412 {
    413    return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
    414 }
    415 
    416 /* Generate a 4-aligned version of the given expression if
    417    the given condition is true.  Else return it unchanged. */
    418 static IRExpr* align4if ( IRExpr* e, Bool b )
    419 {
    420    if (b)
    421       return binop(Iop_And32, e, mkU32(~3));
    422    else
    423       return e;
    424 }
    425 
    426 
    427 /*------------------------------------------------------------*/
    428 /*--- Helpers for accessing guest registers.               ---*/
    429 /*------------------------------------------------------------*/
    430 
    431 #define OFFB_R0       offsetof(VexGuestARMState,guest_R0)
    432 #define OFFB_R1       offsetof(VexGuestARMState,guest_R1)
    433 #define OFFB_R2       offsetof(VexGuestARMState,guest_R2)
    434 #define OFFB_R3       offsetof(VexGuestARMState,guest_R3)
    435 #define OFFB_R4       offsetof(VexGuestARMState,guest_R4)
    436 #define OFFB_R5       offsetof(VexGuestARMState,guest_R5)
    437 #define OFFB_R6       offsetof(VexGuestARMState,guest_R6)
    438 #define OFFB_R7       offsetof(VexGuestARMState,guest_R7)
    439 #define OFFB_R8       offsetof(VexGuestARMState,guest_R8)
    440 #define OFFB_R9       offsetof(VexGuestARMState,guest_R9)
    441 #define OFFB_R10      offsetof(VexGuestARMState,guest_R10)
    442 #define OFFB_R11      offsetof(VexGuestARMState,guest_R11)
    443 #define OFFB_R12      offsetof(VexGuestARMState,guest_R12)
    444 #define OFFB_R13      offsetof(VexGuestARMState,guest_R13)
    445 #define OFFB_R14      offsetof(VexGuestARMState,guest_R14)
    446 #define OFFB_R15T     offsetof(VexGuestARMState,guest_R15T)
    447 
    448 #define OFFB_CC_OP    offsetof(VexGuestARMState,guest_CC_OP)
    449 #define OFFB_CC_DEP1  offsetof(VexGuestARMState,guest_CC_DEP1)
    450 #define OFFB_CC_DEP2  offsetof(VexGuestARMState,guest_CC_DEP2)
    451 #define OFFB_CC_NDEP  offsetof(VexGuestARMState,guest_CC_NDEP)
    452 #define OFFB_NRADDR   offsetof(VexGuestARMState,guest_NRADDR)
    453 
    454 #define OFFB_D0       offsetof(VexGuestARMState,guest_D0)
    455 #define OFFB_D1       offsetof(VexGuestARMState,guest_D1)
    456 #define OFFB_D2       offsetof(VexGuestARMState,guest_D2)
    457 #define OFFB_D3       offsetof(VexGuestARMState,guest_D3)
    458 #define OFFB_D4       offsetof(VexGuestARMState,guest_D4)
    459 #define OFFB_D5       offsetof(VexGuestARMState,guest_D5)
    460 #define OFFB_D6       offsetof(VexGuestARMState,guest_D6)
    461 #define OFFB_D7       offsetof(VexGuestARMState,guest_D7)
    462 #define OFFB_D8       offsetof(VexGuestARMState,guest_D8)
    463 #define OFFB_D9       offsetof(VexGuestARMState,guest_D9)
    464 #define OFFB_D10      offsetof(VexGuestARMState,guest_D10)
    465 #define OFFB_D11      offsetof(VexGuestARMState,guest_D11)
    466 #define OFFB_D12      offsetof(VexGuestARMState,guest_D12)
    467 #define OFFB_D13      offsetof(VexGuestARMState,guest_D13)
    468 #define OFFB_D14      offsetof(VexGuestARMState,guest_D14)
    469 #define OFFB_D15      offsetof(VexGuestARMState,guest_D15)
    470 #define OFFB_D16      offsetof(VexGuestARMState,guest_D16)
    471 #define OFFB_D17      offsetof(VexGuestARMState,guest_D17)
    472 #define OFFB_D18      offsetof(VexGuestARMState,guest_D18)
    473 #define OFFB_D19      offsetof(VexGuestARMState,guest_D19)
    474 #define OFFB_D20      offsetof(VexGuestARMState,guest_D20)
    475 #define OFFB_D21      offsetof(VexGuestARMState,guest_D21)
    476 #define OFFB_D22      offsetof(VexGuestARMState,guest_D22)
    477 #define OFFB_D23      offsetof(VexGuestARMState,guest_D23)
    478 #define OFFB_D24      offsetof(VexGuestARMState,guest_D24)
    479 #define OFFB_D25      offsetof(VexGuestARMState,guest_D25)
    480 #define OFFB_D26      offsetof(VexGuestARMState,guest_D26)
    481 #define OFFB_D27      offsetof(VexGuestARMState,guest_D27)
    482 #define OFFB_D28      offsetof(VexGuestARMState,guest_D28)
    483 #define OFFB_D29      offsetof(VexGuestARMState,guest_D29)
    484 #define OFFB_D30      offsetof(VexGuestARMState,guest_D30)
    485 #define OFFB_D31      offsetof(VexGuestARMState,guest_D31)
    486 
    487 #define OFFB_FPSCR    offsetof(VexGuestARMState,guest_FPSCR)
    488 #define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO)
    489 #define OFFB_ITSTATE  offsetof(VexGuestARMState,guest_ITSTATE)
    490 #define OFFB_QFLAG32  offsetof(VexGuestARMState,guest_QFLAG32)
    491 #define OFFB_GEFLAG0  offsetof(VexGuestARMState,guest_GEFLAG0)
    492 #define OFFB_GEFLAG1  offsetof(VexGuestARMState,guest_GEFLAG1)
    493 #define OFFB_GEFLAG2  offsetof(VexGuestARMState,guest_GEFLAG2)
    494 #define OFFB_GEFLAG3  offsetof(VexGuestARMState,guest_GEFLAG3)
    495 
    496 #define OFFB_CMSTART  offsetof(VexGuestARMState,guest_CMSTART)
    497 #define OFFB_CMLEN    offsetof(VexGuestARMState,guest_CMLEN)
    498 
    499 
    500 /* ---------------- Integer registers ---------------- */
    501 
    502 static Int integerGuestRegOffset ( UInt iregNo )
    503 {
    504    /* Do we care about endianness here?  We do if sub-parts of integer
    505       registers are accessed, but I don't think that ever happens on
    506       ARM. */
    507    switch (iregNo) {
    508       case 0:  return OFFB_R0;
    509       case 1:  return OFFB_R1;
    510       case 2:  return OFFB_R2;
    511       case 3:  return OFFB_R3;
    512       case 4:  return OFFB_R4;
    513       case 5:  return OFFB_R5;
    514       case 6:  return OFFB_R6;
    515       case 7:  return OFFB_R7;
    516       case 8:  return OFFB_R8;
    517       case 9:  return OFFB_R9;
    518       case 10: return OFFB_R10;
    519       case 11: return OFFB_R11;
    520       case 12: return OFFB_R12;
    521       case 13: return OFFB_R13;
    522       case 14: return OFFB_R14;
    523       case 15: return OFFB_R15T;
    524       default: vassert(0);
    525    }
    526 }
    527 
    528 /* Plain ("low level") read from a reg; no +8 offset magic for r15. */
    529 static IRExpr* llGetIReg ( UInt iregNo )
    530 {
    531    vassert(iregNo < 16);
    532    return IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
    533 }
    534 
    535 /* Architected read from a reg in ARM mode.  This automagically adds 8
    536    to all reads of r15. */
    537 static IRExpr* getIRegA ( UInt iregNo )
    538 {
    539    IRExpr* e;
    540    ASSERT_IS_ARM;
    541    vassert(iregNo < 16);
    542    if (iregNo == 15) {
    543       /* If asked for r15, don't read the guest state value, as that
    544          may not be up to date in the case where loop unrolling has
    545          happened, because the first insn's write to the block is
    546          omitted; hence in the 2nd and subsequent unrollings we don't
    547          have a correct value in guest r15.  Instead produce the
    548          constant that we know would be produced at this point. */
    549       vassert(0 == (guest_R15_curr_instr_notENC & 3));
    550       e = mkU32(guest_R15_curr_instr_notENC + 8);
    551    } else {
    552       e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
    553    }
    554    return e;
    555 }
    556 
    557 /* Architected read from a reg in Thumb mode.  This automagically adds
    558    4 to all reads of r15. */
    559 static IRExpr* getIRegT ( UInt iregNo )
    560 {
    561    IRExpr* e;
    562    ASSERT_IS_THUMB;
    563    vassert(iregNo < 16);
    564    if (iregNo == 15) {
    565       /* Ditto comment in getIReg. */
    566       vassert(0 == (guest_R15_curr_instr_notENC & 1));
    567       e = mkU32(guest_R15_curr_instr_notENC + 4);
    568    } else {
    569       e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
    570    }
    571    return e;
    572 }
    573 
    574 /* Plain ("low level") write to a reg; no jump or alignment magic for
    575    r15. */
    576 static void llPutIReg ( UInt iregNo, IRExpr* e )
    577 {
    578    vassert(iregNo < 16);
    579    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
    580    stmt( IRStmt_Put(integerGuestRegOffset(iregNo), e) );
    581 }
    582 
    583 /* Architected write to an integer register in ARM mode.  If it is to
    584    r15, record info so at the end of this insn's translation, a branch
    585    to it can be made.  Also handles conditional writes to the
    586    register: if guardT == IRTemp_INVALID then the write is
    587    unconditional.  If writing r15, also 4-align it. */
    588 static void putIRegA ( UInt       iregNo,
    589                        IRExpr*    e,
    590                        IRTemp     guardT /* :: Ity_I32, 0 or 1 */,
    591                        IRJumpKind jk /* if a jump is generated */ )
    592 {
    593    /* if writing r15, force e to be 4-aligned. */
    594    // INTERWORKING FIXME.  this needs to be relaxed so that
    595    // puts caused by LDMxx which load r15 interwork right.
    596    // but is no aligned too relaxed?
    597    //if (iregNo == 15)
    598    //   e = binop(Iop_And32, e, mkU32(~3));
    599    ASSERT_IS_ARM;
    600    /* So, generate either an unconditional or a conditional write to
    601       the reg. */
    602    if (guardT == IRTemp_INVALID) {
    603       /* unconditional write */
    604       llPutIReg( iregNo, e );
    605    } else {
    606       llPutIReg( iregNo,
    607                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    608                              e, llGetIReg(iregNo) ));
    609    }
    610    if (iregNo == 15) {
    611       // assert against competing r15 updates.  Shouldn't
    612       // happen; should be ruled out by the instr matching
    613       // logic.
    614       vassert(r15written == False);
    615       vassert(r15guard   == IRTemp_INVALID);
    616       vassert(r15kind    == Ijk_Boring);
    617       r15written = True;
    618       r15guard   = guardT;
    619       r15kind    = jk;
    620    }
    621 }
    622 
    623 
    624 /* Architected write to an integer register in Thumb mode.  Writes to
    625    r15 are not allowed.  Handles conditional writes to the register:
    626    if guardT == IRTemp_INVALID then the write is unconditional. */
    627 static void putIRegT ( UInt       iregNo,
    628                        IRExpr*    e,
    629                        IRTemp     guardT /* :: Ity_I32, 0 or 1 */ )
    630 {
    631    /* So, generate either an unconditional or a conditional write to
    632       the reg. */
    633    ASSERT_IS_THUMB;
    634    vassert(iregNo >= 0 && iregNo <= 14);
    635    if (guardT == IRTemp_INVALID) {
    636       /* unconditional write */
    637       llPutIReg( iregNo, e );
    638    } else {
    639       llPutIReg( iregNo,
    640                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    641                              e, llGetIReg(iregNo) ));
    642    }
    643 }
    644 
    645 
    646 /* Thumb16 and Thumb32 only.
    647    Returns true if reg is 13 or 15.  Implements the BadReg
    648    predicate in the ARM ARM. */
    649 static Bool isBadRegT ( UInt r )
    650 {
    651    vassert(r <= 15);
    652    ASSERT_IS_THUMB;
    653    return r == 13 || r == 15;
    654 }
    655 
    656 
    657 /* ---------------- Double registers ---------------- */
    658 
    659 static Int doubleGuestRegOffset ( UInt dregNo )
    660 {
    661    /* Do we care about endianness here?  Probably do if we ever get
    662       into the situation of dealing with the single-precision VFP
    663       registers. */
    664    switch (dregNo) {
    665       case 0:  return OFFB_D0;
    666       case 1:  return OFFB_D1;
    667       case 2:  return OFFB_D2;
    668       case 3:  return OFFB_D3;
    669       case 4:  return OFFB_D4;
    670       case 5:  return OFFB_D5;
    671       case 6:  return OFFB_D6;
    672       case 7:  return OFFB_D7;
    673       case 8:  return OFFB_D8;
    674       case 9:  return OFFB_D9;
    675       case 10: return OFFB_D10;
    676       case 11: return OFFB_D11;
    677       case 12: return OFFB_D12;
    678       case 13: return OFFB_D13;
    679       case 14: return OFFB_D14;
    680       case 15: return OFFB_D15;
    681       case 16: return OFFB_D16;
    682       case 17: return OFFB_D17;
    683       case 18: return OFFB_D18;
    684       case 19: return OFFB_D19;
    685       case 20: return OFFB_D20;
    686       case 21: return OFFB_D21;
    687       case 22: return OFFB_D22;
    688       case 23: return OFFB_D23;
    689       case 24: return OFFB_D24;
    690       case 25: return OFFB_D25;
    691       case 26: return OFFB_D26;
    692       case 27: return OFFB_D27;
    693       case 28: return OFFB_D28;
    694       case 29: return OFFB_D29;
    695       case 30: return OFFB_D30;
    696       case 31: return OFFB_D31;
    697       default: vassert(0);
    698    }
    699 }
    700 
    701 /* Plain ("low level") read from a VFP Dreg. */
    702 static IRExpr* llGetDReg ( UInt dregNo )
    703 {
    704    vassert(dregNo < 32);
    705    return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_F64 );
    706 }
    707 
    708 /* Architected read from a VFP Dreg. */
    709 static IRExpr* getDReg ( UInt dregNo ) {
    710    return llGetDReg( dregNo );
    711 }
    712 
    713 /* Plain ("low level") write to a VFP Dreg. */
    714 static void llPutDReg ( UInt dregNo, IRExpr* e )
    715 {
    716    vassert(dregNo < 32);
    717    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64);
    718    stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
    719 }
    720 
    721 /* Architected write to a VFP Dreg.  Handles conditional writes to the
    722    register: if guardT == IRTemp_INVALID then the write is
    723    unconditional. */
    724 static void putDReg ( UInt    dregNo,
    725                       IRExpr* e,
    726                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    727 {
    728    /* So, generate either an unconditional or a conditional write to
    729       the reg. */
    730    if (guardT == IRTemp_INVALID) {
    731       /* unconditional write */
    732       llPutDReg( dregNo, e );
    733    } else {
    734       llPutDReg( dregNo,
    735                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    736                              e, llGetDReg(dregNo) ));
    737    }
    738 }
    739 
    740 /* And now exactly the same stuff all over again, but this time
    741    taking/returning I64 rather than F64, to support 64-bit Neon
    742    ops. */
    743 
    744 /* Plain ("low level") read from a Neon Integer Dreg. */
    745 static IRExpr* llGetDRegI64 ( UInt dregNo )
    746 {
    747    vassert(dregNo < 32);
    748    return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_I64 );
    749 }
    750 
    751 /* Architected read from a Neon Integer Dreg. */
    752 static IRExpr* getDRegI64 ( UInt dregNo ) {
    753    return llGetDRegI64( dregNo );
    754 }
    755 
    756 /* Plain ("low level") write to a Neon Integer Dreg. */
    757 static void llPutDRegI64 ( UInt dregNo, IRExpr* e )
    758 {
    759    vassert(dregNo < 32);
    760    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
    761    stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
    762 }
    763 
    764 /* Architected write to a Neon Integer Dreg.  Handles conditional
    765    writes to the register: if guardT == IRTemp_INVALID then the write
    766    is unconditional. */
    767 static void putDRegI64 ( UInt    dregNo,
    768                          IRExpr* e,
    769                          IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    770 {
    771    /* So, generate either an unconditional or a conditional write to
    772       the reg. */
    773    if (guardT == IRTemp_INVALID) {
    774       /* unconditional write */
    775       llPutDRegI64( dregNo, e );
    776    } else {
    777       llPutDRegI64( dregNo,
    778                     IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    779                                 e, llGetDRegI64(dregNo) ));
    780    }
    781 }
    782 
    783 /* ---------------- Quad registers ---------------- */
    784 
    785 static Int quadGuestRegOffset ( UInt qregNo )
    786 {
    787    /* Do we care about endianness here?  Probably do if we ever get
    788       into the situation of dealing with the 64 bit Neon registers. */
    789    switch (qregNo) {
    790       case 0:  return OFFB_D0;
    791       case 1:  return OFFB_D2;
    792       case 2:  return OFFB_D4;
    793       case 3:  return OFFB_D6;
    794       case 4:  return OFFB_D8;
    795       case 5:  return OFFB_D10;
    796       case 6:  return OFFB_D12;
    797       case 7:  return OFFB_D14;
    798       case 8:  return OFFB_D16;
    799       case 9:  return OFFB_D18;
    800       case 10: return OFFB_D20;
    801       case 11: return OFFB_D22;
    802       case 12: return OFFB_D24;
    803       case 13: return OFFB_D26;
    804       case 14: return OFFB_D28;
    805       case 15: return OFFB_D30;
    806       default: vassert(0);
    807    }
    808 }
    809 
    810 /* Plain ("low level") read from a Neon Qreg. */
    811 static IRExpr* llGetQReg ( UInt qregNo )
    812 {
    813    vassert(qregNo < 16);
    814    return IRExpr_Get( quadGuestRegOffset(qregNo), Ity_V128 );
    815 }
    816 
    817 /* Architected read from a Neon Qreg. */
    818 static IRExpr* getQReg ( UInt qregNo ) {
    819    return llGetQReg( qregNo );
    820 }
    821 
    822 /* Plain ("low level") write to a Neon Qreg. */
    823 static void llPutQReg ( UInt qregNo, IRExpr* e )
    824 {
    825    vassert(qregNo < 16);
    826    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
    827    stmt( IRStmt_Put(quadGuestRegOffset(qregNo), e) );
    828 }
    829 
    830 /* Architected write to a Neon Qreg.  Handles conditional writes to the
    831    register: if guardT == IRTemp_INVALID then the write is
    832    unconditional. */
    833 static void putQReg ( UInt    qregNo,
    834                       IRExpr* e,
    835                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    836 {
    837    /* So, generate either an unconditional or a conditional write to
    838       the reg. */
    839    if (guardT == IRTemp_INVALID) {
    840       /* unconditional write */
    841       llPutQReg( qregNo, e );
    842    } else {
    843       llPutQReg( qregNo,
    844                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    845                              e, llGetQReg(qregNo) ));
    846    }
    847 }
    848 
    849 
    850 /* ---------------- Float registers ---------------- */
    851 
    852 static Int floatGuestRegOffset ( UInt fregNo )
    853 {
    854    /* Start with the offset of the containing double, and then correct
    855       for endianness.  Actually this is completely bogus and needs
    856       careful thought. */
    857    Int off;
    858    /* NB! Limit is 64, not 32, because we might be pulling F32 bits
    859       out of SIMD registers, and there are 16 SIMD registers each of
    860       128 bits (4 x F32). */
    861    vassert(fregNo < 64);
    862    off = doubleGuestRegOffset(fregNo >> 1);
    863    if (host_endness == VexEndnessLE) {
    864       if (fregNo & 1)
    865          off += 4;
    866    } else {
    867       vassert(0);
    868    }
    869    return off;
    870 }
    871 
    872 /* Plain ("low level") read from a VFP Freg. */
    873 static IRExpr* llGetFReg ( UInt fregNo )
    874 {
    875    vassert(fregNo < 32);
    876    return IRExpr_Get( floatGuestRegOffset(fregNo), Ity_F32 );
    877 }
    878 
    879 static IRExpr* llGetFReg_up_to_64 ( UInt fregNo )
    880 {
    881    vassert(fregNo < 64);
    882    return IRExpr_Get( floatGuestRegOffset(fregNo), Ity_F32 );
    883 }
    884 
    885 /* Architected read from a VFP Freg. */
    886 static IRExpr* getFReg ( UInt fregNo ) {
    887    return llGetFReg( fregNo );
    888 }
    889 
    890 /* Plain ("low level") write to a VFP Freg. */
    891 static void llPutFReg ( UInt fregNo, IRExpr* e )
    892 {
    893    vassert(fregNo < 32);
    894    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32);
    895    stmt( IRStmt_Put(floatGuestRegOffset(fregNo), e) );
    896 }
    897 
    898 static void llPutFReg_up_to_64 ( UInt fregNo, IRExpr* e )
    899 {
    900    vassert(fregNo < 64);
    901    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32);
    902    stmt( IRStmt_Put(floatGuestRegOffset(fregNo), e) );
    903 }
    904 
    905 /* Architected write to a VFP Freg.  Handles conditional writes to the
    906    register: if guardT == IRTemp_INVALID then the write is
    907    unconditional. */
    908 static void putFReg ( UInt    fregNo,
    909                       IRExpr* e,
    910                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    911 {
    912    /* So, generate either an unconditional or a conditional write to
    913       the reg. */
    914    if (guardT == IRTemp_INVALID) {
    915       /* unconditional write */
    916       llPutFReg( fregNo, e );
    917    } else {
    918       llPutFReg( fregNo,
    919                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    920                              e, llGetFReg(fregNo) ));
    921    }
    922 }
    923 
    924 
    925 /* ---------------- Misc registers ---------------- */
    926 
    927 static void putMiscReg32 ( UInt    gsoffset,
    928                            IRExpr* e, /* :: Ity_I32 */
    929                            IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    930 {
    931    switch (gsoffset) {
    932       case OFFB_FPSCR:   break;
    933       case OFFB_QFLAG32: break;
    934       case OFFB_GEFLAG0: break;
    935       case OFFB_GEFLAG1: break;
    936       case OFFB_GEFLAG2: break;
    937       case OFFB_GEFLAG3: break;
    938       default: vassert(0); /* awaiting more cases */
    939    }
    940    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
    941 
    942    if (guardT == IRTemp_INVALID) {
    943       /* unconditional write */
    944       stmt(IRStmt_Put(gsoffset, e));
    945    } else {
    946       stmt(IRStmt_Put(
    947          gsoffset,
    948          IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
    949                      e, IRExpr_Get(gsoffset, Ity_I32) )
    950       ));
    951    }
    952 }
    953 
    954 static IRTemp get_ITSTATE ( void )
    955 {
    956    ASSERT_IS_THUMB;
    957    IRTemp t = newTemp(Ity_I32);
    958    assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
    959    return t;
    960 }
    961 
    962 static void put_ITSTATE ( IRTemp t )
    963 {
    964    ASSERT_IS_THUMB;
    965    stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
    966 }
    967 
    968 static IRTemp get_QFLAG32 ( void )
    969 {
    970    IRTemp t = newTemp(Ity_I32);
    971    assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
    972    return t;
    973 }
    974 
    975 static void put_QFLAG32 ( IRTemp t, IRTemp condT )
    976 {
    977    putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
    978 }
    979 
    980 /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
    981    Status Register) to indicate that overflow or saturation occurred.
    982    Nb: t must be zero to denote no saturation, and any nonzero
    983    value to indicate saturation. */
    984 static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
    985 {
    986    IRTemp old = get_QFLAG32();
    987    IRTemp nyu = newTemp(Ity_I32);
    988    assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
    989    put_QFLAG32(nyu, condT);
    990 }
    991 
    992 /* Generate code to set APSR.GE[flagNo]. Each fn call sets 1 bit.
    993    flagNo: which flag bit to set [3...0]
    994    lowbits_to_ignore:  0 = look at all 32 bits
    995                        8 = look at top 24 bits only
    996                       16 = look at top 16 bits only
    997                       31 = look at the top bit only
    998    e: input value to be evaluated.
    999    The new value is taken from 'e' with the lowest 'lowbits_to_ignore'
   1000    masked out.  If the resulting value is zero then the GE flag is
   1001    set to 0; any other value sets the flag to 1. */
   1002 static void put_GEFLAG32 ( Int flagNo,            /* 0, 1, 2 or 3 */
   1003                            Int lowbits_to_ignore, /* 0, 8, 16 or 31   */
   1004                            IRExpr* e,             /* Ity_I32 */
   1005                            IRTemp condT )
   1006 {
   1007    vassert( flagNo >= 0 && flagNo <= 3 );
   1008    vassert( lowbits_to_ignore == 0  ||
   1009             lowbits_to_ignore == 8  ||
   1010             lowbits_to_ignore == 16 ||
   1011             lowbits_to_ignore == 31 );
   1012    IRTemp masked = newTemp(Ity_I32);
   1013    assign(masked, binop(Iop_Shr32, e, mkU8(lowbits_to_ignore)));
   1014 
   1015    switch (flagNo) {
   1016       case 0: putMiscReg32(OFFB_GEFLAG0, mkexpr(masked), condT); break;
   1017       case 1: putMiscReg32(OFFB_GEFLAG1, mkexpr(masked), condT); break;
   1018       case 2: putMiscReg32(OFFB_GEFLAG2, mkexpr(masked), condT); break;
   1019       case 3: putMiscReg32(OFFB_GEFLAG3, mkexpr(masked), condT); break;
   1020       default: vassert(0);
   1021    }
   1022 }
   1023 
   1024 /* Return the (32-bit, zero-or-nonzero representation scheme) of
   1025    the specified GE flag. */
   1026 static IRExpr* get_GEFLAG32( Int flagNo /* 0, 1, 2, 3 */ )
   1027 {
   1028    switch (flagNo) {
   1029       case 0: return IRExpr_Get( OFFB_GEFLAG0, Ity_I32 );
   1030       case 1: return IRExpr_Get( OFFB_GEFLAG1, Ity_I32 );
   1031       case 2: return IRExpr_Get( OFFB_GEFLAG2, Ity_I32 );
   1032       case 3: return IRExpr_Get( OFFB_GEFLAG3, Ity_I32 );
   1033       default: vassert(0);
   1034    }
   1035 }
   1036 
   1037 /* Set all 4 GE flags from the given 32-bit value as follows: GE 3 and
   1038    2 are set from bit 31 of the value, and GE 1 and 0 are set from bit
   1039    15 of the value.  All other bits are ignored. */
   1040 static void set_GE_32_10_from_bits_31_15 ( IRTemp t32, IRTemp condT )
   1041 {
   1042    IRTemp ge10 = newTemp(Ity_I32);
   1043    IRTemp ge32 = newTemp(Ity_I32);
   1044    assign(ge10, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
   1045    assign(ge32, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
   1046    put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
   1047    put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
   1048    put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
   1049    put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
   1050 }
   1051 
   1052 
   1053 /* Set all 4 GE flags from the given 32-bit value as follows: GE 3
   1054    from bit 31, GE 2 from bit 23, GE 1 from bit 15, and GE0 from
   1055    bit 7.  All other bits are ignored. */
   1056 static void set_GE_3_2_1_0_from_bits_31_23_15_7 ( IRTemp t32, IRTemp condT )
   1057 {
   1058    IRTemp ge0 = newTemp(Ity_I32);
   1059    IRTemp ge1 = newTemp(Ity_I32);
   1060    IRTemp ge2 = newTemp(Ity_I32);
   1061    IRTemp ge3 = newTemp(Ity_I32);
   1062    assign(ge0, binop(Iop_And32, mkexpr(t32), mkU32(0x00000080)));
   1063    assign(ge1, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
   1064    assign(ge2, binop(Iop_And32, mkexpr(t32), mkU32(0x00800000)));
   1065    assign(ge3, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
   1066    put_GEFLAG32( 0, 0, mkexpr(ge0), condT );
   1067    put_GEFLAG32( 1, 0, mkexpr(ge1), condT );
   1068    put_GEFLAG32( 2, 0, mkexpr(ge2), condT );
   1069    put_GEFLAG32( 3, 0, mkexpr(ge3), condT );
   1070 }
   1071 
   1072 
   1073 /* ---------------- FPSCR stuff ---------------- */
   1074 
   1075 /* Generate IR to get hold of the rounding mode bits in FPSCR, and
   1076    convert them to IR format.  Bind the final result to the
   1077    returned temp. */
   1078 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
   1079 {
   1080    /* The ARMvfp encoding for rounding mode bits is:
   1081          00  to nearest
   1082          01  to +infinity
   1083          10  to -infinity
   1084          11  to zero
   1085       We need to convert that to the IR encoding:
   1086          00  to nearest (the default)
   1087          10  to +infinity
   1088          01  to -infinity
   1089          11  to zero
   1090       Which can be done by swapping bits 0 and 1.
   1091       The rmode bits are at 23:22 in FPSCR.
   1092    */
   1093    IRTemp armEncd = newTemp(Ity_I32);
   1094    IRTemp swapped = newTemp(Ity_I32);
   1095    /* Fish FPSCR[23:22] out, and slide to bottom.  Doesn't matter that
   1096       we don't zero out bits 24 and above, since the assignment to
   1097       'swapped' will mask them out anyway. */
   1098    assign(armEncd,
   1099           binop(Iop_Shr32, IRExpr_Get(OFFB_FPSCR, Ity_I32), mkU8(22)));
   1100    /* Now swap them. */
   1101    assign(swapped,
   1102           binop(Iop_Or32,
   1103                 binop(Iop_And32,
   1104                       binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
   1105                       mkU32(2)),
   1106                 binop(Iop_And32,
   1107                       binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
   1108                       mkU32(1))
   1109          ));
   1110    return swapped;
   1111 }
   1112 
   1113 
   1114 /*------------------------------------------------------------*/
   1115 /*--- Helpers for flag handling and conditional insns      ---*/
   1116 /*------------------------------------------------------------*/
   1117 
   1118 static const HChar* name_ARMCondcode ( ARMCondcode cond )
   1119 {
   1120    switch (cond) {
   1121       case ARMCondEQ:  return "{eq}";
   1122       case ARMCondNE:  return "{ne}";
   1123       case ARMCondHS:  return "{hs}";  // or 'cs'
   1124       case ARMCondLO:  return "{lo}";  // or 'cc'
   1125       case ARMCondMI:  return "{mi}";
   1126       case ARMCondPL:  return "{pl}";
   1127       case ARMCondVS:  return "{vs}";
   1128       case ARMCondVC:  return "{vc}";
   1129       case ARMCondHI:  return "{hi}";
   1130       case ARMCondLS:  return "{ls}";
   1131       case ARMCondGE:  return "{ge}";
   1132       case ARMCondLT:  return "{lt}";
   1133       case ARMCondGT:  return "{gt}";
   1134       case ARMCondLE:  return "{le}";
   1135       case ARMCondAL:  return ""; // {al}: is the default
   1136       case ARMCondNV:  return "{nv}";
   1137       default: vpanic("name_ARMCondcode");
   1138    }
   1139 }
   1140 /* and a handy shorthand for it */
   1141 static const HChar* nCC ( ARMCondcode cond ) {
   1142    return name_ARMCondcode(cond);
   1143 }
   1144 
   1145 
   1146 /* Build IR to calculate some particular condition from stored
   1147    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
   1148    Ity_I32, suitable for narrowing.  Although the return type is
   1149    Ity_I32, the returned value is either 0 or 1.  'cond' must be
   1150    :: Ity_I32 and must denote the condition to compute in
   1151    bits 7:4, and be zero everywhere else.
   1152 */
   1153 static IRExpr* mk_armg_calculate_condition_dyn ( IRExpr* cond )
   1154 {
   1155    vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I32);
   1156    /* And 'cond' had better produce a value in which only bits 7:4 are
   1157       nonzero.  However, obviously we can't assert for that. */
   1158 
   1159    /* So what we're constructing for the first argument is
   1160       "(cond << 4) | stored-operation".
   1161       However, as per comments above, 'cond' must be supplied
   1162       pre-shifted to this function.
   1163 
   1164       This pairing scheme requires that the ARM_CC_OP_ values all fit
   1165       in 4 bits.  Hence we are passing a (COND, OP) pair in the lowest
   1166       8 bits of the first argument. */
   1167    IRExpr** args
   1168       = mkIRExprVec_4(
   1169            binop(Iop_Or32, IRExpr_Get(OFFB_CC_OP, Ity_I32), cond),
   1170            IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1171            IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1172            IRExpr_Get(OFFB_CC_NDEP, Ity_I32)
   1173         );
   1174    IRExpr* call
   1175       = mkIRExprCCall(
   1176            Ity_I32,
   1177            0/*regparm*/,
   1178            "armg_calculate_condition", &armg_calculate_condition,
   1179            args
   1180         );
   1181 
   1182    /* Exclude the requested condition, OP and NDEP from definedness
   1183       checking.  We're only interested in DEP1 and DEP2. */
   1184    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1185    return call;
   1186 }
   1187 
   1188 
   1189 /* Build IR to calculate some particular condition from stored
   1190    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
   1191    Ity_I32, suitable for narrowing.  Although the return type is
   1192    Ity_I32, the returned value is either 0 or 1.
   1193 */
   1194 static IRExpr* mk_armg_calculate_condition ( ARMCondcode cond )
   1195 {
   1196   /* First arg is "(cond << 4) | condition".  This requires that the
   1197      ARM_CC_OP_ values all fit in 4 bits.  Hence we are passing a
   1198      (COND, OP) pair in the lowest 8 bits of the first argument. */
   1199    vassert(cond >= 0 && cond <= 15);
   1200    return mk_armg_calculate_condition_dyn( mkU32(cond << 4) );
   1201 }
   1202 
   1203 
   1204 /* Build IR to calculate just the carry flag from stored
   1205    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
   1206    Ity_I32. */
   1207 static IRExpr* mk_armg_calculate_flag_c ( void )
   1208 {
   1209    IRExpr** args
   1210       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
   1211                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1212                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1213                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
   1214    IRExpr* call
   1215       = mkIRExprCCall(
   1216            Ity_I32,
   1217            0/*regparm*/,
   1218            "armg_calculate_flag_c", &armg_calculate_flag_c,
   1219            args
   1220         );
   1221    /* Exclude OP and NDEP from definedness checking.  We're only
   1222       interested in DEP1 and DEP2. */
   1223    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1224    return call;
   1225 }
   1226 
   1227 
   1228 /* Build IR to calculate just the overflow flag from stored
   1229    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
   1230    Ity_I32. */
   1231 static IRExpr* mk_armg_calculate_flag_v ( void )
   1232 {
   1233    IRExpr** args
   1234       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
   1235                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1236                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1237                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
   1238    IRExpr* call
   1239       = mkIRExprCCall(
   1240            Ity_I32,
   1241            0/*regparm*/,
   1242            "armg_calculate_flag_v", &armg_calculate_flag_v,
   1243            args
   1244         );
   1245    /* Exclude OP and NDEP from definedness checking.  We're only
   1246       interested in DEP1 and DEP2. */
   1247    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1248    return call;
   1249 }
   1250 
   1251 
   1252 /* Build IR to calculate N Z C V in bits 31:28 of the
   1253    returned word. */
   1254 static IRExpr* mk_armg_calculate_flags_nzcv ( void )
   1255 {
   1256    IRExpr** args
   1257       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
   1258                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1259                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1260                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
   1261    IRExpr* call
   1262       = mkIRExprCCall(
   1263            Ity_I32,
   1264            0/*regparm*/,
   1265            "armg_calculate_flags_nzcv", &armg_calculate_flags_nzcv,
   1266            args
   1267         );
   1268    /* Exclude OP and NDEP from definedness checking.  We're only
   1269       interested in DEP1 and DEP2. */
   1270    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1271    return call;
   1272 }
   1273 
   1274 static IRExpr* mk_armg_calculate_flag_qc ( IRExpr* resL, IRExpr* resR, Bool Q )
   1275 {
   1276    IRExpr** args1;
   1277    IRExpr** args2;
   1278    IRExpr *call1, *call2, *res;
   1279 
   1280    if (Q) {
   1281       args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(0)),
   1282                               binop(Iop_GetElem32x4, resL, mkU8(1)),
   1283                               binop(Iop_GetElem32x4, resR, mkU8(0)),
   1284                               binop(Iop_GetElem32x4, resR, mkU8(1)) );
   1285       args2 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(2)),
   1286                               binop(Iop_GetElem32x4, resL, mkU8(3)),
   1287                               binop(Iop_GetElem32x4, resR, mkU8(2)),
   1288                               binop(Iop_GetElem32x4, resR, mkU8(3)) );
   1289    } else {
   1290       args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x2, resL, mkU8(0)),
   1291                               binop(Iop_GetElem32x2, resL, mkU8(1)),
   1292                               binop(Iop_GetElem32x2, resR, mkU8(0)),
   1293                               binop(Iop_GetElem32x2, resR, mkU8(1)) );
   1294    }
   1295 
   1296    call1 = mkIRExprCCall(
   1297              Ity_I32,
   1298              0/*regparm*/,
   1299              "armg_calculate_flag_qc", &armg_calculate_flag_qc,
   1300              args1
   1301           );
   1302    if (Q) {
   1303       call2 = mkIRExprCCall(
   1304                 Ity_I32,
   1305                 0/*regparm*/,
   1306                 "armg_calculate_flag_qc", &armg_calculate_flag_qc,
   1307                 args2
   1308              );
   1309    }
   1310    if (Q) {
   1311       res = binop(Iop_Or32, call1, call2);
   1312    } else {
   1313       res = call1;
   1314    }
   1315    return res;
   1316 }
   1317 
   1318 // FIXME: this is named wrongly .. looks like a sticky set of
   1319 // QC, not a write to it.
   1320 static void setFlag_QC ( IRExpr* resL, IRExpr* resR, Bool Q,
   1321                          IRTemp condT )
   1322 {
   1323    putMiscReg32 (OFFB_FPSCR,
   1324                  binop(Iop_Or32,
   1325                        IRExpr_Get(OFFB_FPSCR, Ity_I32),
   1326                        binop(Iop_Shl32,
   1327                              mk_armg_calculate_flag_qc(resL, resR, Q),
   1328                              mkU8(27))),
   1329                  condT);
   1330 }
   1331 
   1332 /* Build IR to conditionally set the flags thunk.  As with putIReg, if
   1333    guard is IRTemp_INVALID then it's unconditional, else it holds a
   1334    condition :: Ity_I32. */
   1335 static
   1336 void setFlags_D1_D2_ND ( UInt cc_op, IRTemp t_dep1,
   1337                          IRTemp t_dep2, IRTemp t_ndep,
   1338                          IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   1339 {
   1340    vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I32));
   1341    vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I32));
   1342    vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I32));
   1343    vassert(cc_op >= ARMG_CC_OP_COPY && cc_op < ARMG_CC_OP_NUMBER);
   1344    if (guardT == IRTemp_INVALID) {
   1345       /* unconditional */
   1346       stmt( IRStmt_Put( OFFB_CC_OP,   mkU32(cc_op) ));
   1347       stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
   1348       stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
   1349       stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
   1350    } else {
   1351       /* conditional */
   1352       IRTemp c1 = newTemp(Ity_I1);
   1353       assign( c1, binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)) );
   1354       stmt( IRStmt_Put(
   1355                OFFB_CC_OP,
   1356                IRExpr_ITE( mkexpr(c1),
   1357                            mkU32(cc_op),
   1358                            IRExpr_Get(OFFB_CC_OP, Ity_I32) ) ));
   1359       stmt( IRStmt_Put(
   1360                OFFB_CC_DEP1,
   1361                IRExpr_ITE( mkexpr(c1),
   1362                            mkexpr(t_dep1),
   1363                            IRExpr_Get(OFFB_CC_DEP1, Ity_I32) ) ));
   1364       stmt( IRStmt_Put(
   1365                OFFB_CC_DEP2,
   1366                IRExpr_ITE( mkexpr(c1),
   1367                            mkexpr(t_dep2),
   1368                            IRExpr_Get(OFFB_CC_DEP2, Ity_I32) ) ));
   1369       stmt( IRStmt_Put(
   1370                OFFB_CC_NDEP,
   1371                IRExpr_ITE( mkexpr(c1),
   1372                            mkexpr(t_ndep),
   1373                            IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ) ));
   1374    }
   1375 }
   1376 
   1377 
   1378 /* Minor variant of the above that sets NDEP to zero (if it
   1379    sets it at all) */
   1380 static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
   1381                              IRTemp t_dep2,
   1382                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   1383 {
   1384    IRTemp z32 = newTemp(Ity_I32);
   1385    assign( z32, mkU32(0) );
   1386    setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
   1387 }
   1388 
   1389 
   1390 /* Minor variant of the above that sets DEP2 to zero (if it
   1391    sets it at all) */
   1392 static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
   1393                              IRTemp t_ndep,
   1394                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   1395 {
   1396    IRTemp z32 = newTemp(Ity_I32);
   1397    assign( z32, mkU32(0) );
   1398    setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
   1399 }
   1400 
   1401 
   1402 /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
   1403    sets them at all) */
   1404 static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
   1405                           IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   1406 {
   1407    IRTemp z32 = newTemp(Ity_I32);
   1408    assign( z32, mkU32(0) );
   1409    setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
   1410 }
   1411 
   1412 
   1413 /* ARM only */
   1414 /* Generate a side-exit to the next instruction, if the given guard
   1415    expression :: Ity_I32 is 0 (note!  the side exit is taken if the
   1416    condition is false!)  This is used to skip over conditional
   1417    instructions which we can't generate straight-line code for, either
   1418    because they are too complex or (more likely) they potentially
   1419    generate exceptions.
   1420 */
   1421 static void mk_skip_over_A32_if_cond_is_false (
   1422                IRTemp guardT /* :: Ity_I32, 0 or 1 */
   1423             )
   1424 {
   1425    ASSERT_IS_ARM;
   1426    vassert(guardT != IRTemp_INVALID);
   1427    vassert(0 == (guest_R15_curr_instr_notENC & 3));
   1428    stmt( IRStmt_Exit(
   1429             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
   1430             Ijk_Boring,
   1431             IRConst_U32(toUInt(guest_R15_curr_instr_notENC + 4)),
   1432             OFFB_R15T
   1433        ));
   1434 }
   1435 
   1436 /* Thumb16 only */
   1437 /* ditto, but jump over a 16-bit thumb insn */
   1438 static void mk_skip_over_T16_if_cond_is_false (
   1439                IRTemp guardT /* :: Ity_I32, 0 or 1 */
   1440             )
   1441 {
   1442    ASSERT_IS_THUMB;
   1443    vassert(guardT != IRTemp_INVALID);
   1444    vassert(0 == (guest_R15_curr_instr_notENC & 1));
   1445    stmt( IRStmt_Exit(
   1446             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
   1447             Ijk_Boring,
   1448             IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 2) | 1)),
   1449             OFFB_R15T
   1450        ));
   1451 }
   1452 
   1453 
   1454 /* Thumb32 only */
   1455 /* ditto, but jump over a 32-bit thumb insn */
   1456 static void mk_skip_over_T32_if_cond_is_false (
   1457                IRTemp guardT /* :: Ity_I32, 0 or 1 */
   1458             )
   1459 {
   1460    ASSERT_IS_THUMB;
   1461    vassert(guardT != IRTemp_INVALID);
   1462    vassert(0 == (guest_R15_curr_instr_notENC & 1));
   1463    stmt( IRStmt_Exit(
   1464             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
   1465             Ijk_Boring,
   1466             IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 4) | 1)),
   1467             OFFB_R15T
   1468        ));
   1469 }
   1470 
   1471 
   1472 /* Thumb16 and Thumb32 only
   1473    Generate a SIGILL followed by a restart of the current instruction
   1474    if the given temp is nonzero. */
   1475 static void gen_SIGILL_T_if_nonzero ( IRTemp t /* :: Ity_I32 */ )
   1476 {
   1477    ASSERT_IS_THUMB;
   1478    vassert(t != IRTemp_INVALID);
   1479    vassert(0 == (guest_R15_curr_instr_notENC & 1));
   1480    stmt(
   1481       IRStmt_Exit(
   1482          binop(Iop_CmpNE32, mkexpr(t), mkU32(0)),
   1483          Ijk_NoDecode,
   1484          IRConst_U32(toUInt(guest_R15_curr_instr_notENC | 1)),
   1485          OFFB_R15T
   1486       )
   1487    );
   1488 }
   1489 
   1490 
   1491 /* Inspect the old_itstate, and generate a SIGILL if it indicates that
   1492    we are currently in an IT block and are not the last in the block.
   1493    This also rolls back guest_ITSTATE to its old value before the exit
   1494    and restores it to its new value afterwards.  This is so that if
   1495    the exit is taken, we have an up to date version of ITSTATE
   1496    available.  Without doing that, we have no hope of making precise
   1497    exceptions work. */
   1498 static void gen_SIGILL_T_if_in_but_NLI_ITBlock (
   1499                IRTemp old_itstate /* :: Ity_I32 */,
   1500                IRTemp new_itstate /* :: Ity_I32 */
   1501             )
   1502 {
   1503    ASSERT_IS_THUMB;
   1504    put_ITSTATE(old_itstate); // backout
   1505    IRTemp guards_for_next3 = newTemp(Ity_I32);
   1506    assign(guards_for_next3,
   1507           binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
   1508    gen_SIGILL_T_if_nonzero(guards_for_next3);
   1509    put_ITSTATE(new_itstate); //restore
   1510 }
   1511 
   1512 
   1513 /* Simpler version of the above, which generates a SIGILL if
   1514    we're anywhere within an IT block. */
   1515 static void gen_SIGILL_T_if_in_ITBlock (
   1516                IRTemp old_itstate /* :: Ity_I32 */,
   1517                IRTemp new_itstate /* :: Ity_I32 */
   1518             )
   1519 {
   1520    put_ITSTATE(old_itstate); // backout
   1521    gen_SIGILL_T_if_nonzero(old_itstate);
   1522    put_ITSTATE(new_itstate); //restore
   1523 }
   1524 
   1525 
   1526 /* Generate an APSR value, from the NZCV thunk, and
   1527    from QFLAG32 and GEFLAG0 .. GEFLAG3. */
   1528 static IRTemp synthesise_APSR ( void )
   1529 {
   1530    IRTemp res1 = newTemp(Ity_I32);
   1531    // Get NZCV
   1532    assign( res1, mk_armg_calculate_flags_nzcv() );
   1533    // OR in the Q value
   1534    IRTemp res2 = newTemp(Ity_I32);
   1535    assign(
   1536       res2,
   1537       binop(Iop_Or32,
   1538             mkexpr(res1),
   1539             binop(Iop_Shl32,
   1540                   unop(Iop_1Uto32,
   1541                        binop(Iop_CmpNE32,
   1542                              mkexpr(get_QFLAG32()),
   1543                              mkU32(0))),
   1544                   mkU8(ARMG_CC_SHIFT_Q)))
   1545    );
   1546    // OR in GE0 .. GE3
   1547    IRExpr* ge0
   1548       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(0), mkU32(0)));
   1549    IRExpr* ge1
   1550       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(1), mkU32(0)));
   1551    IRExpr* ge2
   1552       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(2), mkU32(0)));
   1553    IRExpr* ge3
   1554       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(3), mkU32(0)));
   1555    IRTemp res3 = newTemp(Ity_I32);
   1556    assign(res3,
   1557           binop(Iop_Or32,
   1558                 mkexpr(res2),
   1559                 binop(Iop_Or32,
   1560                       binop(Iop_Or32,
   1561                             binop(Iop_Shl32, ge0, mkU8(16)),
   1562                             binop(Iop_Shl32, ge1, mkU8(17))),
   1563                       binop(Iop_Or32,
   1564                             binop(Iop_Shl32, ge2, mkU8(18)),
   1565                             binop(Iop_Shl32, ge3, mkU8(19))) )));
   1566    return res3;
   1567 }
   1568 
   1569 
   1570 /* and the inverse transformation: given an APSR value,
   1571    set the NZCV thunk, the Q flag, and the GE flags. */
   1572 static void desynthesise_APSR ( Bool write_nzcvq, Bool write_ge,
   1573                                 IRTemp apsrT, IRTemp condT )
   1574 {
   1575    vassert(write_nzcvq || write_ge);
   1576    if (write_nzcvq) {
   1577       // Do NZCV
   1578       IRTemp immT = newTemp(Ity_I32);
   1579       assign(immT, binop(Iop_And32, mkexpr(apsrT), mkU32(0xF0000000)) );
   1580       setFlags_D1(ARMG_CC_OP_COPY, immT, condT);
   1581       // Do Q
   1582       IRTemp qnewT = newTemp(Ity_I32);
   1583       assign(qnewT, binop(Iop_And32, mkexpr(apsrT), mkU32(ARMG_CC_MASK_Q)));
   1584       put_QFLAG32(qnewT, condT);
   1585    }
   1586    if (write_ge) {
   1587       // Do GE3..0
   1588       put_GEFLAG32(0, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<16)),
   1589                    condT);
   1590       put_GEFLAG32(1, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<17)),
   1591                    condT);
   1592       put_GEFLAG32(2, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<18)),
   1593                    condT);
   1594       put_GEFLAG32(3, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<19)),
   1595                    condT);
   1596    }
   1597 }
   1598 
   1599 
   1600 /*------------------------------------------------------------*/
   1601 /*--- Helpers for saturation                               ---*/
   1602 /*------------------------------------------------------------*/
   1603 
   1604 /* FIXME: absolutely the only diff. between (a) armUnsignedSatQ and
   1605    (b) armSignedSatQ is that in (a) the floor is set to 0, whereas in
   1606    (b) the floor is computed from the value of imm5.  these two fnsn
   1607    should be commoned up. */
   1608 
   1609 /* UnsignedSatQ(): 'clamp' each value so it lies between 0 <= x <= (2^N)-1
   1610    Optionally return flag resQ saying whether saturation occurred.
   1611    See definition in manual, section A2.2.1, page 41
   1612    (bits(N), boolean) UnsignedSatQ( integer i, integer N )
   1613    {
   1614      if ( i > (2^N)-1 ) { result = (2^N)-1; saturated = TRUE; }
   1615      elsif ( i < 0 )    { result = 0; saturated = TRUE; }
   1616      else               { result = i; saturated = FALSE; }
   1617      return ( result<N-1:0>, saturated );
   1618    }
   1619 */
   1620 static void armUnsignedSatQ( IRTemp* res,  /* OUT - Ity_I32 */
   1621                              IRTemp* resQ, /* OUT - Ity_I32  */
   1622                              IRTemp regT,  /* value to clamp - Ity_I32 */
   1623                              UInt imm5 )   /* saturation ceiling */
   1624 {
   1625    ULong ceil64  = (1ULL << imm5) - 1;    // (2^imm5)-1
   1626    UInt  ceil    = (UInt)ceil64;
   1627    UInt  floor   = 0;
   1628 
   1629    IRTemp nd0 = newTemp(Ity_I32);
   1630    IRTemp nd1 = newTemp(Ity_I32);
   1631    IRTemp nd2 = newTemp(Ity_I1);
   1632    IRTemp nd3 = newTemp(Ity_I32);
   1633    IRTemp nd4 = newTemp(Ity_I32);
   1634    IRTemp nd5 = newTemp(Ity_I1);
   1635    IRTemp nd6 = newTemp(Ity_I32);
   1636 
   1637    assign( nd0, mkexpr(regT) );
   1638    assign( nd1, mkU32(ceil) );
   1639    assign( nd2, binop( Iop_CmpLT32S, mkexpr(nd1), mkexpr(nd0) ) );
   1640    assign( nd3, IRExpr_ITE(mkexpr(nd2), mkexpr(nd1), mkexpr(nd0)) );
   1641    assign( nd4, mkU32(floor) );
   1642    assign( nd5, binop( Iop_CmpLT32S, mkexpr(nd3), mkexpr(nd4) ) );
   1643    assign( nd6, IRExpr_ITE(mkexpr(nd5), mkexpr(nd4), mkexpr(nd3)) );
   1644    assign( *res, mkexpr(nd6) );
   1645 
   1646    /* if saturation occurred, then resQ is set to some nonzero value
   1647       if sat did not occur, resQ is guaranteed to be zero. */
   1648    if (resQ) {
   1649       assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
   1650    }
   1651 }
   1652 
   1653 
   1654 /* SignedSatQ(): 'clamp' each value so it lies between  -2^N <= x <= (2^N) - 1
   1655    Optionally return flag resQ saying whether saturation occurred.
   1656    - see definition in manual, section A2.2.1, page 41
   1657    (bits(N), boolean ) SignedSatQ( integer i, integer N )
   1658    {
   1659      if ( i > 2^(N-1) - 1 )    { result = 2^(N-1) - 1; saturated = TRUE; }
   1660      elsif ( i < -(2^(N-1)) )  { result = -(2^(N-1));  saturated = FALSE; }
   1661      else                      { result = i;           saturated = FALSE; }
   1662      return ( result[N-1:0], saturated );
   1663    }
   1664 */
   1665 static void armSignedSatQ( IRTemp regT,    /* value to clamp - Ity_I32 */
   1666                            UInt imm5,      /* saturation ceiling */
   1667                            IRTemp* res,    /* OUT - Ity_I32 */
   1668                            IRTemp* resQ )  /* OUT - Ity_I32  */
   1669 {
   1670    Long ceil64  =  (1LL << (imm5-1)) - 1;  //  (2^(imm5-1))-1
   1671    Long floor64 = -(1LL << (imm5-1));      // -(2^(imm5-1))
   1672    Int  ceil    = (Int)ceil64;
   1673    Int  floor   = (Int)floor64;
   1674 
   1675    IRTemp nd0 = newTemp(Ity_I32);
   1676    IRTemp nd1 = newTemp(Ity_I32);
   1677    IRTemp nd2 = newTemp(Ity_I1);
   1678    IRTemp nd3 = newTemp(Ity_I32);
   1679    IRTemp nd4 = newTemp(Ity_I32);
   1680    IRTemp nd5 = newTemp(Ity_I1);
   1681    IRTemp nd6 = newTemp(Ity_I32);
   1682 
   1683    assign( nd0, mkexpr(regT) );
   1684    assign( nd1, mkU32(ceil) );
   1685    assign( nd2, binop( Iop_CmpLT32S, mkexpr(nd1), mkexpr(nd0) ) );
   1686    assign( nd3, IRExpr_ITE( mkexpr(nd2), mkexpr(nd1), mkexpr(nd0) ) );
   1687    assign( nd4, mkU32(floor) );
   1688    assign( nd5, binop( Iop_CmpLT32S, mkexpr(nd3), mkexpr(nd4) ) );
   1689    assign( nd6, IRExpr_ITE( mkexpr(nd5), mkexpr(nd4), mkexpr(nd3) ) );
   1690    assign( *res, mkexpr(nd6) );
   1691 
   1692    /* if saturation occurred, then resQ is set to some nonzero value
   1693       if sat did not occur, resQ is guaranteed to be zero. */
   1694    if (resQ) {
   1695      assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
   1696    }
   1697 }
   1698 
   1699 
   1700 /* Compute a value 0 :: I32 or 1 :: I32, indicating whether signed
   1701    overflow occurred for 32-bit addition.  Needs both args and the
   1702    result.  HD p27. */
   1703 static
   1704 IRExpr* signed_overflow_after_Add32 ( IRExpr* resE,
   1705                                       IRTemp argL, IRTemp argR )
   1706 {
   1707    IRTemp res = newTemp(Ity_I32);
   1708    assign(res, resE);
   1709    return
   1710       binop( Iop_Shr32,
   1711              binop( Iop_And32,
   1712                     binop( Iop_Xor32, mkexpr(res), mkexpr(argL) ),
   1713                     binop( Iop_Xor32, mkexpr(res), mkexpr(argR) )),
   1714              mkU8(31) );
   1715 }
   1716 
   1717 /* Similarly .. also from HD p27 .. */
   1718 static
   1719 IRExpr* signed_overflow_after_Sub32 ( IRExpr* resE,
   1720                                       IRTemp argL, IRTemp argR )
   1721 {
   1722    IRTemp res = newTemp(Ity_I32);
   1723    assign(res, resE);
   1724    return
   1725       binop( Iop_Shr32,
   1726              binop( Iop_And32,
   1727                     binop( Iop_Xor32, mkexpr(argL), mkexpr(argR) ),
   1728                     binop( Iop_Xor32, mkexpr(res),  mkexpr(argL) )),
   1729              mkU8(31) );
   1730 }
   1731 
   1732 
   1733 /*------------------------------------------------------------*/
   1734 /*--- Larger helpers                                       ---*/
   1735 /*------------------------------------------------------------*/
   1736 
   1737 /* Compute both the result and new C flag value for a LSL by an imm5
   1738    or by a register operand.  May generate reads of the old C value
   1739    (hence only safe to use before any writes to guest state happen).
   1740    Are factored out so can be used by both ARM and Thumb.
   1741 
   1742    Note that in compute_result_and_C_after_{LSL,LSR,ASR}_by{imm5,reg},
   1743    "res" (the result)  is a.k.a. "shop", shifter operand
   1744    "newC" (the new C)  is a.k.a. "shco", shifter carry out
   1745 
   1746    The calling convention for res and newC is a bit funny.  They could
   1747    be passed by value, but instead are passed by ref.
   1748 
   1749    The C (shco) value computed must be zero in bits 31:1, as the IR
   1750    optimisations for flag handling (guest_arm_spechelper) rely on
   1751    that, and the slow-path handlers (armg_calculate_flags_nzcv) assert
   1752    for it.  Same applies to all these functions that compute shco
   1753    after a shift or rotate, not just this one.
   1754 */
   1755 
   1756 static void compute_result_and_C_after_LSL_by_imm5 (
   1757                /*OUT*/HChar* buf,
   1758                IRTemp* res,
   1759                IRTemp* newC,
   1760                IRTemp rMt, UInt shift_amt, /* operands */
   1761                UInt rM      /* only for debug printing */
   1762             )
   1763 {
   1764    if (shift_amt == 0) {
   1765       if (newC) {
   1766          assign( *newC, mk_armg_calculate_flag_c() );
   1767       }
   1768       assign( *res, mkexpr(rMt) );
   1769       DIS(buf, "r%u", rM);
   1770    } else {
   1771       vassert(shift_amt >= 1 && shift_amt <= 31);
   1772       if (newC) {
   1773          assign( *newC,
   1774                  binop(Iop_And32,
   1775                        binop(Iop_Shr32, mkexpr(rMt),
   1776                                         mkU8(32 - shift_amt)),
   1777                        mkU32(1)));
   1778       }
   1779       assign( *res,
   1780               binop(Iop_Shl32, mkexpr(rMt), mkU8(shift_amt)) );
   1781       DIS(buf, "r%u, LSL #%u", rM, shift_amt);
   1782    }
   1783 }
   1784 
   1785 
   1786 static void compute_result_and_C_after_LSL_by_reg (
   1787                /*OUT*/HChar* buf,
   1788                IRTemp* res,
   1789                IRTemp* newC,
   1790                IRTemp rMt, IRTemp rSt,  /* operands */
   1791                UInt rM,    UInt rS      /* only for debug printing */
   1792             )
   1793 {
   1794    // shift left in range 0 .. 255
   1795    // amt  = rS & 255
   1796    // res  = amt < 32 ?  Rm << amt  : 0
   1797    // newC = amt == 0     ? oldC  :
   1798    //        amt in 1..32 ?  Rm[32-amt]  : 0
   1799    IRTemp amtT = newTemp(Ity_I32);
   1800    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   1801    if (newC) {
   1802       /* mux0X(amt == 0,
   1803                mux0X(amt < 32,
   1804                      0,
   1805                      Rm[(32-amt) & 31]),
   1806                oldC)
   1807       */
   1808       /* About the best you can do is pray that iropt is able
   1809          to nuke most or all of the following junk. */
   1810       IRTemp oldC = newTemp(Ity_I32);
   1811       assign(oldC, mk_armg_calculate_flag_c() );
   1812       assign(
   1813          *newC,
   1814          IRExpr_ITE(
   1815             binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
   1816             mkexpr(oldC),
   1817             IRExpr_ITE(
   1818                binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
   1819                binop(Iop_And32,
   1820                      binop(Iop_Shr32,
   1821                            mkexpr(rMt),
   1822                            unop(Iop_32to8,
   1823                                 binop(Iop_And32,
   1824                                       binop(Iop_Sub32,
   1825                                             mkU32(32),
   1826                                             mkexpr(amtT)),
   1827                                       mkU32(31)
   1828                                 )
   1829                            )
   1830                      ),
   1831                      mkU32(1)
   1832                      ),
   1833                mkU32(0)
   1834             )
   1835          )
   1836       );
   1837    }
   1838    // (Rm << (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
   1839    // Lhs of the & limits the shift to 31 bits, so as to
   1840    // give known IR semantics.  Rhs of the & is all 1s for
   1841    // Rs <= 31 and all 0s for Rs >= 32.
   1842    assign(
   1843       *res,
   1844       binop(
   1845          Iop_And32,
   1846          binop(Iop_Shl32,
   1847                mkexpr(rMt),
   1848                unop(Iop_32to8,
   1849                     binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
   1850          binop(Iop_Sar32,
   1851                binop(Iop_Sub32,
   1852                      mkexpr(amtT),
   1853                      mkU32(32)),
   1854                mkU8(31))));
   1855     DIS(buf, "r%u, LSL r%u", rM, rS);
   1856 }
   1857 
   1858 
   1859 static void compute_result_and_C_after_LSR_by_imm5 (
   1860                /*OUT*/HChar* buf,
   1861                IRTemp* res,
   1862                IRTemp* newC,
   1863                IRTemp rMt, UInt shift_amt, /* operands */
   1864                UInt rM      /* only for debug printing */
   1865             )
   1866 {
   1867    if (shift_amt == 0) {
   1868       // conceptually a 32-bit shift, however:
   1869       // res  = 0
   1870       // newC = Rm[31]
   1871       if (newC) {
   1872          assign( *newC,
   1873                  binop(Iop_And32,
   1874                        binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
   1875                        mkU32(1)));
   1876       }
   1877       assign( *res, mkU32(0) );
   1878       DIS(buf, "r%u, LSR #0(a.k.a. 32)", rM);
   1879    } else {
   1880       // shift in range 1..31
   1881       // res  = Rm >>u shift_amt
   1882       // newC = Rm[shift_amt - 1]
   1883       vassert(shift_amt >= 1 && shift_amt <= 31);
   1884       if (newC) {
   1885          assign( *newC,
   1886                  binop(Iop_And32,
   1887                        binop(Iop_Shr32, mkexpr(rMt),
   1888                                         mkU8(shift_amt - 1)),
   1889                        mkU32(1)));
   1890       }
   1891       assign( *res,
   1892               binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)) );
   1893       DIS(buf, "r%u, LSR #%u", rM, shift_amt);
   1894    }
   1895 }
   1896 
   1897 
   1898 static void compute_result_and_C_after_LSR_by_reg (
   1899                /*OUT*/HChar* buf,
   1900                IRTemp* res,
   1901                IRTemp* newC,
   1902                IRTemp rMt, IRTemp rSt,  /* operands */
   1903                UInt rM,    UInt rS      /* only for debug printing */
   1904             )
   1905 {
   1906    // shift right in range 0 .. 255
   1907    // amt = rS & 255
   1908    // res  = amt < 32 ?  Rm >>u amt  : 0
   1909    // newC = amt == 0     ? oldC  :
   1910    //        amt in 1..32 ?  Rm[amt-1]  : 0
   1911    IRTemp amtT = newTemp(Ity_I32);
   1912    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   1913    if (newC) {
   1914       /* mux0X(amt == 0,
   1915                mux0X(amt < 32,
   1916                      0,
   1917                      Rm[(amt-1) & 31]),
   1918                oldC)
   1919       */
   1920       IRTemp oldC = newTemp(Ity_I32);
   1921       assign(oldC, mk_armg_calculate_flag_c() );
   1922       assign(
   1923          *newC,
   1924          IRExpr_ITE(
   1925             binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
   1926             mkexpr(oldC),
   1927             IRExpr_ITE(
   1928                binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
   1929                binop(Iop_And32,
   1930                      binop(Iop_Shr32,
   1931                            mkexpr(rMt),
   1932                            unop(Iop_32to8,
   1933                                 binop(Iop_And32,
   1934                                       binop(Iop_Sub32,
   1935                                             mkexpr(amtT),
   1936                                             mkU32(1)),
   1937                                       mkU32(31)
   1938                                 )
   1939                            )
   1940                      ),
   1941                      mkU32(1)
   1942                      ),
   1943                mkU32(0)
   1944             )
   1945          )
   1946       );
   1947    }
   1948    // (Rm >>u (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
   1949    // Lhs of the & limits the shift to 31 bits, so as to
   1950    // give known IR semantics.  Rhs of the & is all 1s for
   1951    // Rs <= 31 and all 0s for Rs >= 32.
   1952    assign(
   1953       *res,
   1954       binop(
   1955          Iop_And32,
   1956          binop(Iop_Shr32,
   1957                mkexpr(rMt),
   1958                unop(Iop_32to8,
   1959                     binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
   1960          binop(Iop_Sar32,
   1961                binop(Iop_Sub32,
   1962                      mkexpr(amtT),
   1963                      mkU32(32)),
   1964                mkU8(31))));
   1965     DIS(buf, "r%u, LSR r%u", rM, rS);
   1966 }
   1967 
   1968 
   1969 static void compute_result_and_C_after_ASR_by_imm5 (
   1970                /*OUT*/HChar* buf,
   1971                IRTemp* res,
   1972                IRTemp* newC,
   1973                IRTemp rMt, UInt shift_amt, /* operands */
   1974                UInt rM      /* only for debug printing */
   1975             )
   1976 {
   1977    if (shift_amt == 0) {
   1978       // conceptually a 32-bit shift, however:
   1979       // res  = Rm >>s 31
   1980       // newC = Rm[31]
   1981       if (newC) {
   1982          assign( *newC,
   1983                  binop(Iop_And32,
   1984                        binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
   1985                        mkU32(1)));
   1986       }
   1987       assign( *res, binop(Iop_Sar32, mkexpr(rMt), mkU8(31)) );
   1988       DIS(buf, "r%u, ASR #0(a.k.a. 32)", rM);
   1989    } else {
   1990       // shift in range 1..31
   1991       // res = Rm >>s shift_amt
   1992       // newC = Rm[shift_amt - 1]
   1993       vassert(shift_amt >= 1 && shift_amt <= 31);
   1994       if (newC) {
   1995          assign( *newC,
   1996                  binop(Iop_And32,
   1997                        binop(Iop_Shr32, mkexpr(rMt),
   1998                                         mkU8(shift_amt - 1)),
   1999                        mkU32(1)));
   2000       }
   2001       assign( *res,
   2002               binop(Iop_Sar32, mkexpr(rMt), mkU8(shift_amt)) );
   2003       DIS(buf, "r%u, ASR #%u", rM, shift_amt);
   2004    }
   2005 }
   2006 
   2007 
   2008 static void compute_result_and_C_after_ASR_by_reg (
   2009                /*OUT*/HChar* buf,
   2010                IRTemp* res,
   2011                IRTemp* newC,
   2012                IRTemp rMt, IRTemp rSt,  /* operands */
   2013                UInt rM,    UInt rS      /* only for debug printing */
   2014             )
   2015 {
   2016    // arithmetic shift right in range 0 .. 255
   2017    // amt = rS & 255
   2018    // res  = amt < 32 ?  Rm >>s amt  : Rm >>s 31
   2019    // newC = amt == 0     ? oldC  :
   2020    //        amt in 1..32 ?  Rm[amt-1]  : Rm[31]
   2021    IRTemp amtT = newTemp(Ity_I32);
   2022    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   2023    if (newC) {
   2024       /* mux0X(amt == 0,
   2025                mux0X(amt < 32,
   2026                      Rm[31],
   2027                      Rm[(amt-1) & 31])
   2028                oldC)
   2029       */
   2030       IRTemp oldC = newTemp(Ity_I32);
   2031       assign(oldC, mk_armg_calculate_flag_c() );
   2032       assign(
   2033          *newC,
   2034          IRExpr_ITE(
   2035             binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
   2036             mkexpr(oldC),
   2037             IRExpr_ITE(
   2038                binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
   2039                binop(Iop_And32,
   2040                      binop(Iop_Shr32,
   2041                            mkexpr(rMt),
   2042                            unop(Iop_32to8,
   2043                                 binop(Iop_And32,
   2044                                       binop(Iop_Sub32,
   2045                                             mkexpr(amtT),
   2046                                             mkU32(1)),
   2047                                       mkU32(31)
   2048                                 )
   2049                            )
   2050                      ),
   2051                      mkU32(1)
   2052                      ),
   2053                binop(Iop_And32,
   2054                      binop(Iop_Shr32,
   2055                            mkexpr(rMt),
   2056                            mkU8(31)
   2057                      ),
   2058                      mkU32(1)
   2059                )
   2060             )
   2061          )
   2062       );
   2063    }
   2064    // (Rm >>s (amt <u 32 ? amt : 31))
   2065    assign(
   2066       *res,
   2067       binop(
   2068          Iop_Sar32,
   2069          mkexpr(rMt),
   2070          unop(
   2071             Iop_32to8,
   2072             IRExpr_ITE(
   2073                binop(Iop_CmpLT32U, mkexpr(amtT), mkU32(32)),
   2074                mkexpr(amtT),
   2075                mkU32(31)))));
   2076     DIS(buf, "r%u, ASR r%u", rM, rS);
   2077 }
   2078 
   2079 
   2080 static void compute_result_and_C_after_ROR_by_reg (
   2081                /*OUT*/HChar* buf,
   2082                IRTemp* res,
   2083                IRTemp* newC,
   2084                IRTemp rMt, IRTemp rSt,  /* operands */
   2085                UInt rM,    UInt rS      /* only for debug printing */
   2086             )
   2087 {
   2088    // rotate right in range 0 .. 255
   2089    // amt = rS & 255
   2090    // shop =  Rm `ror` (amt & 31)
   2091    // shco =  amt == 0 ? oldC : Rm[(amt-1) & 31]
   2092    IRTemp amtT = newTemp(Ity_I32);
   2093    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   2094    IRTemp amt5T = newTemp(Ity_I32);
   2095    assign( amt5T, binop(Iop_And32, mkexpr(rSt), mkU32(31)) );
   2096    IRTemp oldC = newTemp(Ity_I32);
   2097    assign(oldC, mk_armg_calculate_flag_c() );
   2098    if (newC) {
   2099       assign(
   2100          *newC,
   2101          IRExpr_ITE(
   2102             binop(Iop_CmpNE32, mkexpr(amtT), mkU32(0)),
   2103             binop(Iop_And32,
   2104                   binop(Iop_Shr32,
   2105                         mkexpr(rMt),
   2106                         unop(Iop_32to8,
   2107                              binop(Iop_And32,
   2108                                    binop(Iop_Sub32,
   2109                                          mkexpr(amtT),
   2110                                          mkU32(1)
   2111                                    ),
   2112                                    mkU32(31)
   2113                              )
   2114                         )
   2115                   ),
   2116                   mkU32(1)
   2117             ),
   2118             mkexpr(oldC)
   2119          )
   2120       );
   2121    }
   2122    assign(
   2123       *res,
   2124       IRExpr_ITE(
   2125          binop(Iop_CmpNE32, mkexpr(amt5T), mkU32(0)),
   2126          binop(Iop_Or32,
   2127                binop(Iop_Shr32,
   2128                      mkexpr(rMt),
   2129                      unop(Iop_32to8, mkexpr(amt5T))
   2130                ),
   2131                binop(Iop_Shl32,
   2132                      mkexpr(rMt),
   2133                      unop(Iop_32to8,
   2134                           binop(Iop_Sub32, mkU32(32), mkexpr(amt5T))
   2135                      )
   2136                )
   2137                ),
   2138          mkexpr(rMt)
   2139       )
   2140    );
   2141    DIS(buf, "r%u, ROR r#%u", rM, rS);
   2142 }
   2143 
   2144 
   2145 /* Generate an expression corresponding to the immediate-shift case of
   2146    a shifter operand.  This is used both for ARM and Thumb2.
   2147 
   2148    Bind it to a temporary, and return that via *res.  If newC is
   2149    non-NULL, also compute a value for the shifter's carry out (in the
   2150    LSB of a word), bind it to a temporary, and return that via *shco.
   2151 
   2152    Generates GETs from the guest state and is therefore not safe to
   2153    use once we start doing PUTs to it, for any given instruction.
   2154 
   2155    'how' is encoded thusly:
   2156       00b LSL,  01b LSR,  10b ASR,  11b ROR
   2157    Most but not all ARM and Thumb integer insns use this encoding.
   2158    Be careful to ensure the right value is passed here.
   2159 */
   2160 static void compute_result_and_C_after_shift_by_imm5 (
   2161                /*OUT*/HChar* buf,
   2162                /*OUT*/IRTemp* res,
   2163                /*OUT*/IRTemp* newC,
   2164                IRTemp  rMt,       /* reg to shift */
   2165                UInt    how,       /* what kind of shift */
   2166                UInt    shift_amt, /* shift amount (0..31) */
   2167                UInt    rM         /* only for debug printing */
   2168             )
   2169 {
   2170    vassert(shift_amt < 32);
   2171    vassert(how < 4);
   2172 
   2173    switch (how) {
   2174 
   2175       case 0:
   2176          compute_result_and_C_after_LSL_by_imm5(
   2177             buf, res, newC, rMt, shift_amt, rM
   2178          );
   2179          break;
   2180 
   2181       case 1:
   2182          compute_result_and_C_after_LSR_by_imm5(
   2183             buf, res, newC, rMt, shift_amt, rM
   2184          );
   2185          break;
   2186 
   2187       case 2:
   2188          compute_result_and_C_after_ASR_by_imm5(
   2189             buf, res, newC, rMt, shift_amt, rM
   2190          );
   2191          break;
   2192 
   2193       case 3:
   2194          if (shift_amt == 0) {
   2195             IRTemp oldcT = newTemp(Ity_I32);
   2196             // rotate right 1 bit through carry (?)
   2197             // RRX -- described at ARM ARM A5-17
   2198             // res  = (oldC << 31) | (Rm >>u 1)
   2199             // newC = Rm[0]
   2200             if (newC) {
   2201                assign( *newC,
   2202                        binop(Iop_And32, mkexpr(rMt), mkU32(1)));
   2203             }
   2204             assign( oldcT, mk_armg_calculate_flag_c() );
   2205             assign( *res,
   2206                     binop(Iop_Or32,
   2207                           binop(Iop_Shl32, mkexpr(oldcT), mkU8(31)),
   2208                           binop(Iop_Shr32, mkexpr(rMt), mkU8(1))) );
   2209             DIS(buf, "r%u, RRX", rM);
   2210          } else {
   2211             // rotate right in range 1..31
   2212             // res  = Rm `ror` shift_amt
   2213             // newC = Rm[shift_amt - 1]
   2214             vassert(shift_amt >= 1 && shift_amt <= 31);
   2215             if (newC) {
   2216                assign( *newC,
   2217                        binop(Iop_And32,
   2218                              binop(Iop_Shr32, mkexpr(rMt),
   2219                                               mkU8(shift_amt - 1)),
   2220                              mkU32(1)));
   2221             }
   2222             assign( *res,
   2223                     binop(Iop_Or32,
   2224                           binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)),
   2225                           binop(Iop_Shl32, mkexpr(rMt),
   2226                                            mkU8(32-shift_amt))));
   2227             DIS(buf, "r%u, ROR #%u", rM, shift_amt);
   2228          }
   2229          break;
   2230 
   2231       default:
   2232          /*NOTREACHED*/
   2233          vassert(0);
   2234    }
   2235 }
   2236 
   2237 
   2238 /* Generate an expression corresponding to the register-shift case of
   2239    a shifter operand.  This is used both for ARM and Thumb2.
   2240 
   2241    Bind it to a temporary, and return that via *res.  If newC is
   2242    non-NULL, also compute a value for the shifter's carry out (in the
   2243    LSB of a word), bind it to a temporary, and return that via *shco.
   2244 
   2245    Generates GETs from the guest state and is therefore not safe to
   2246    use once we start doing PUTs to it, for any given instruction.
   2247 
   2248    'how' is encoded thusly:
   2249       00b LSL,  01b LSR,  10b ASR,  11b ROR
   2250    Most but not all ARM and Thumb integer insns use this encoding.
   2251    Be careful to ensure the right value is passed here.
   2252 */
   2253 static void compute_result_and_C_after_shift_by_reg (
   2254                /*OUT*/HChar*  buf,
   2255                /*OUT*/IRTemp* res,
   2256                /*OUT*/IRTemp* newC,
   2257                IRTemp  rMt,       /* reg to shift */
   2258                UInt    how,       /* what kind of shift */
   2259                IRTemp  rSt,       /* shift amount */
   2260                UInt    rM,        /* only for debug printing */
   2261                UInt    rS         /* only for debug printing */
   2262             )
   2263 {
   2264    vassert(how < 4);
   2265    switch (how) {
   2266       case 0: { /* LSL */
   2267          compute_result_and_C_after_LSL_by_reg(
   2268             buf, res, newC, rMt, rSt, rM, rS
   2269          );
   2270          break;
   2271       }
   2272       case 1: { /* LSR */
   2273          compute_result_and_C_after_LSR_by_reg(
   2274             buf, res, newC, rMt, rSt, rM, rS
   2275          );
   2276          break;
   2277       }
   2278       case 2: { /* ASR */
   2279          compute_result_and_C_after_ASR_by_reg(
   2280             buf, res, newC, rMt, rSt, rM, rS
   2281          );
   2282          break;
   2283       }
   2284       case 3: { /* ROR */
   2285          compute_result_and_C_after_ROR_by_reg(
   2286              buf, res, newC, rMt, rSt, rM, rS
   2287          );
   2288          break;
   2289       }
   2290       default:
   2291          /*NOTREACHED*/
   2292          vassert(0);
   2293    }
   2294 }
   2295 
   2296 
   2297 /* Generate an expression corresponding to a shifter_operand, bind it
   2298    to a temporary, and return that via *shop.  If shco is non-NULL,
   2299    also compute a value for the shifter's carry out (in the LSB of a
   2300    word), bind it to a temporary, and return that via *shco.
   2301 
   2302    If for some reason we can't come up with a shifter operand (missing
   2303    case?  not really a shifter operand?) return False.
   2304 
   2305    Generates GETs from the guest state and is therefore not safe to
   2306    use once we start doing PUTs to it, for any given instruction.
   2307 
   2308    For ARM insns only; not for Thumb.
   2309 */
   2310 static Bool mk_shifter_operand ( UInt insn_25, UInt insn_11_0,
   2311                                  /*OUT*/IRTemp* shop,
   2312                                  /*OUT*/IRTemp* shco,
   2313                                  /*OUT*/HChar* buf )
   2314 {
   2315    UInt insn_4 = (insn_11_0 >> 4) & 1;
   2316    UInt insn_7 = (insn_11_0 >> 7) & 1;
   2317    vassert(insn_25 <= 0x1);
   2318    vassert(insn_11_0 <= 0xFFF);
   2319 
   2320    vassert(shop && *shop == IRTemp_INVALID);
   2321    *shop = newTemp(Ity_I32);
   2322 
   2323    if (shco) {
   2324       vassert(*shco == IRTemp_INVALID);
   2325       *shco = newTemp(Ity_I32);
   2326    }
   2327 
   2328    /* 32-bit immediate */
   2329 
   2330    if (insn_25 == 1) {
   2331       /* immediate: (7:0) rotated right by 2 * (11:8) */
   2332       UInt imm = (insn_11_0 >> 0) & 0xFF;
   2333       UInt rot = 2 * ((insn_11_0 >> 8) & 0xF);
   2334       vassert(rot <= 30);
   2335       imm = ROR32(imm, rot);
   2336       if (shco) {
   2337          if (rot == 0) {
   2338             assign( *shco, mk_armg_calculate_flag_c() );
   2339          } else {
   2340             assign( *shco, mkU32( (imm >> 31) & 1 ) );
   2341          }
   2342       }
   2343       DIS(buf, "#0x%x", imm);
   2344       assign( *shop, mkU32(imm) );
   2345       return True;
   2346    }
   2347 
   2348    /* Shift/rotate by immediate */
   2349 
   2350    if (insn_25 == 0 && insn_4 == 0) {
   2351       /* Rm (3:0) shifted (6:5) by immediate (11:7) */
   2352       UInt shift_amt = (insn_11_0 >> 7) & 0x1F;
   2353       UInt rM        = (insn_11_0 >> 0) & 0xF;
   2354       UInt how       = (insn_11_0 >> 5) & 3;
   2355       /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
   2356       IRTemp rMt = newTemp(Ity_I32);
   2357       assign(rMt, getIRegA(rM));
   2358 
   2359       vassert(shift_amt <= 31);
   2360 
   2361       compute_result_and_C_after_shift_by_imm5(
   2362          buf, shop, shco, rMt, how, shift_amt, rM
   2363       );
   2364       return True;
   2365    }
   2366 
   2367    /* Shift/rotate by register */
   2368    if (insn_25 == 0 && insn_4 == 1) {
   2369       /* Rm (3:0) shifted (6:5) by Rs (11:8) */
   2370       UInt rM  = (insn_11_0 >> 0) & 0xF;
   2371       UInt rS  = (insn_11_0 >> 8) & 0xF;
   2372       UInt how = (insn_11_0 >> 5) & 3;
   2373       /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
   2374       IRTemp rMt = newTemp(Ity_I32);
   2375       IRTemp rSt = newTemp(Ity_I32);
   2376 
   2377       if (insn_7 == 1)
   2378          return False; /* not really a shifter operand */
   2379 
   2380       assign(rMt, getIRegA(rM));
   2381       assign(rSt, getIRegA(rS));
   2382 
   2383       compute_result_and_C_after_shift_by_reg(
   2384          buf, shop, shco, rMt, how, rSt, rM, rS
   2385       );
   2386       return True;
   2387    }
   2388 
   2389    vex_printf("mk_shifter_operand(0x%x,0x%x)\n", insn_25, insn_11_0 );
   2390    return False;
   2391 }
   2392 
   2393 
   2394 /* ARM only */
   2395 static
   2396 IRExpr* mk_EA_reg_plusminus_imm12 ( UInt rN, UInt bU, UInt imm12,
   2397                                     /*OUT*/HChar* buf )
   2398 {
   2399    vassert(rN < 16);
   2400    vassert(bU < 2);
   2401    vassert(imm12 < 0x1000);
   2402    HChar opChar = bU == 1 ? '+' : '-';
   2403    DIS(buf, "[r%u, #%c%u]", rN, opChar, imm12);
   2404    return
   2405       binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
   2406              getIRegA(rN),
   2407              mkU32(imm12) );
   2408 }
   2409 
   2410 
   2411 /* ARM only.
   2412    NB: This is "DecodeImmShift" in newer versions of the the ARM ARM.
   2413 */
   2414 static
   2415 IRExpr* mk_EA_reg_plusminus_shifted_reg ( UInt rN, UInt bU, UInt rM,
   2416                                           UInt sh2, UInt imm5,
   2417                                           /*OUT*/HChar* buf )
   2418 {
   2419    vassert(rN < 16);
   2420    vassert(bU < 2);
   2421    vassert(rM < 16);
   2422    vassert(sh2 < 4);
   2423    vassert(imm5 < 32);
   2424    HChar   opChar = bU == 1 ? '+' : '-';
   2425    IRExpr* index  = NULL;
   2426    switch (sh2) {
   2427       case 0: /* LSL */
   2428          /* imm5 can be in the range 0 .. 31 inclusive. */
   2429          index = binop(Iop_Shl32, getIRegA(rM), mkU8(imm5));
   2430          DIS(buf, "[r%u, %c r%u LSL #%u]", rN, opChar, rM, imm5);
   2431          break;
   2432       case 1: /* LSR */
   2433          if (imm5 == 0) {
   2434             index = mkU32(0);
   2435             vassert(0); // ATC
   2436          } else {
   2437             index = binop(Iop_Shr32, getIRegA(rM), mkU8(imm5));
   2438          }
   2439          DIS(buf, "[r%u, %cr%u, LSR #%u]",
   2440                   rN, opChar, rM, imm5 == 0 ? 32 : imm5);
   2441          break;
   2442       case 2: /* ASR */
   2443          /* Doesn't this just mean that the behaviour with imm5 == 0
   2444             is the same as if it had been 31 ? */
   2445          if (imm5 == 0) {
   2446             index = binop(Iop_Sar32, getIRegA(rM), mkU8(31));
   2447             vassert(0); // ATC
   2448          } else {
   2449             index = binop(Iop_Sar32, getIRegA(rM), mkU8(imm5));
   2450          }
   2451          DIS(buf, "[r%u, %cr%u, ASR #%u]",
   2452                   rN, opChar, rM, imm5 == 0 ? 32 : imm5);
   2453          break;
   2454       case 3: /* ROR or RRX */
   2455          if (imm5 == 0) {
   2456             IRTemp rmT    = newTemp(Ity_I32);
   2457             IRTemp cflagT = newTemp(Ity_I32);
   2458             assign(rmT, getIRegA(rM));
   2459             assign(cflagT, mk_armg_calculate_flag_c());
   2460             index = binop(Iop_Or32,
   2461                           binop(Iop_Shl32, mkexpr(cflagT), mkU8(31)),
   2462                           binop(Iop_Shr32, mkexpr(rmT), mkU8(1)));
   2463             DIS(buf, "[r%u, %cr%u, RRX]", rN, opChar, rM);
   2464          } else {
   2465             IRTemp rmT = newTemp(Ity_I32);
   2466             assign(rmT, getIRegA(rM));
   2467             vassert(imm5 >= 1 && imm5 <= 31);
   2468             index = binop(Iop_Or32,
   2469                           binop(Iop_Shl32, mkexpr(rmT), mkU8(32-imm5)),
   2470                           binop(Iop_Shr32, mkexpr(rmT), mkU8(imm5)));
   2471             DIS(buf, "[r%u, %cr%u, ROR #%u]", rN, opChar, rM, imm5);
   2472          }
   2473          break;
   2474       default:
   2475          vassert(0);
   2476    }
   2477    vassert(index);
   2478    return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
   2479                 getIRegA(rN), index);
   2480 }
   2481 
   2482 
   2483 /* ARM only */
   2484 static
   2485 IRExpr* mk_EA_reg_plusminus_imm8 ( UInt rN, UInt bU, UInt imm8,
   2486                                    /*OUT*/HChar* buf )
   2487 {
   2488    vassert(rN < 16);
   2489    vassert(bU < 2);
   2490    vassert(imm8 < 0x100);
   2491    HChar opChar = bU == 1 ? '+' : '-';
   2492    DIS(buf, "[r%u, #%c%u]", rN, opChar, imm8);
   2493    return
   2494       binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
   2495              getIRegA(rN),
   2496              mkU32(imm8) );
   2497 }
   2498 
   2499 
   2500 /* ARM only */
   2501 static
   2502 IRExpr* mk_EA_reg_plusminus_reg ( UInt rN, UInt bU, UInt rM,
   2503                                   /*OUT*/HChar* buf )
   2504 {
   2505    vassert(rN < 16);
   2506    vassert(bU < 2);
   2507    vassert(rM < 16);
   2508    HChar   opChar = bU == 1 ? '+' : '-';
   2509    IRExpr* index  = getIRegA(rM);
   2510    DIS(buf, "[r%u, %c r%u]", rN, opChar, rM);
   2511    return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
   2512                 getIRegA(rN), index);
   2513 }
   2514 
   2515 
   2516 /* irRes :: Ity_I32 holds a floating point comparison result encoded
   2517    as an IRCmpF64Result.  Generate code to convert it to an
   2518    ARM-encoded (N,Z,C,V) group in the lowest 4 bits of an I32 value.
   2519    Assign a new temp to hold that value, and return the temp. */
   2520 static
   2521 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes )
   2522 {
   2523    IRTemp ix       = newTemp(Ity_I32);
   2524    IRTemp termL    = newTemp(Ity_I32);
   2525    IRTemp termR    = newTemp(Ity_I32);
   2526    IRTemp nzcv     = newTemp(Ity_I32);
   2527 
   2528    /* This is where the fun starts.  We have to convert 'irRes' from
   2529       an IR-convention return result (IRCmpF64Result) to an
   2530       ARM-encoded (N,Z,C,V) group.  The final result is in the bottom
   2531       4 bits of 'nzcv'. */
   2532    /* Map compare result from IR to ARM(nzcv) */
   2533    /*
   2534       FP cmp result | IR   | ARM(nzcv)
   2535       --------------------------------
   2536       UN              0x45   0011
   2537       LT              0x01   1000
   2538       GT              0x00   0010
   2539       EQ              0x40   0110
   2540    */
   2541    /* Now since you're probably wondering WTF ..
   2542 
   2543       ix fishes the useful bits out of the IR value, bits 6 and 0, and
   2544       places them side by side, giving a number which is 0, 1, 2 or 3.
   2545 
   2546       termL is a sequence cooked up by GNU superopt.  It converts ix
   2547          into an almost correct value NZCV value (incredibly), except
   2548          for the case of UN, where it produces 0100 instead of the
   2549          required 0011.
   2550 
   2551       termR is therefore a correction term, also computed from ix.  It
   2552          is 1 in the UN case and 0 for LT, GT and UN.  Hence, to get
   2553          the final correct value, we subtract termR from termL.
   2554 
   2555       Don't take my word for it.  There's a test program at the bottom
   2556       of this file, to try this out with.
   2557    */
   2558    assign(
   2559       ix,
   2560       binop(Iop_Or32,
   2561             binop(Iop_And32,
   2562                   binop(Iop_Shr32, mkexpr(irRes), mkU8(5)),
   2563                   mkU32(3)),
   2564             binop(Iop_And32, mkexpr(irRes), mkU32(1))));
   2565 
   2566    assign(
   2567       termL,
   2568       binop(Iop_Add32,
   2569             binop(Iop_Shr32,
   2570                   binop(Iop_Sub32,
   2571                         binop(Iop_Shl32,
   2572                               binop(Iop_Xor32, mkexpr(ix), mkU32(1)),
   2573                               mkU8(30)),
   2574                         mkU32(1)),
   2575                   mkU8(29)),
   2576             mkU32(1)));
   2577 
   2578    assign(
   2579       termR,
   2580       binop(Iop_And32,
   2581             binop(Iop_And32,
   2582                   mkexpr(ix),
   2583                   binop(Iop_Shr32, mkexpr(ix), mkU8(1))),
   2584             mkU32(1)));
   2585 
   2586    assign(nzcv, binop(Iop_Sub32, mkexpr(termL), mkexpr(termR)));
   2587    return nzcv;
   2588 }
   2589 
   2590 
   2591 /* Thumb32 only.  This is "ThumbExpandImm" in the ARM ARM.  If
   2592    updatesC is non-NULL, a boolean is written to it indicating whether
   2593    or not the C flag is updated, as per ARM ARM "ThumbExpandImm_C".
   2594 */
   2595 static UInt thumbExpandImm ( Bool* updatesC,
   2596                              UInt imm1, UInt imm3, UInt imm8 )
   2597 {
   2598    vassert(imm1 < (1<<1));
   2599    vassert(imm3 < (1<<3));
   2600    vassert(imm8 < (1<<8));
   2601    UInt i_imm3_a = (imm1 << 4) | (imm3 << 1) | ((imm8 >> 7) & 1);
   2602    UInt abcdefgh = imm8;
   2603    UInt lbcdefgh = imm8 | 0x80;
   2604    if (updatesC) {
   2605       *updatesC = i_imm3_a >= 8;
   2606    }
   2607    switch (i_imm3_a) {
   2608       case 0: case 1:
   2609          return abcdefgh;
   2610       case 2: case 3:
   2611          return (abcdefgh << 16) | abcdefgh;
   2612       case 4: case 5:
   2613          return (abcdefgh << 24) | (abcdefgh << 8);
   2614       case 6: case 7:
   2615          return (abcdefgh << 24) | (abcdefgh << 16)
   2616                 | (abcdefgh << 8) | abcdefgh;
   2617       case 8 ... 31:
   2618          return lbcdefgh << (32 - i_imm3_a);
   2619       default:
   2620          break;
   2621    }
   2622    /*NOTREACHED*/vassert(0);
   2623 }
   2624 
   2625 
   2626 /* Version of thumbExpandImm where we simply feed it the
   2627    instruction halfwords (the lowest addressed one is I0). */
   2628 static UInt thumbExpandImm_from_I0_I1 ( Bool* updatesC,
   2629                                         UShort i0s, UShort i1s )
   2630 {
   2631    UInt i0    = (UInt)i0s;
   2632    UInt i1    = (UInt)i1s;
   2633    UInt imm1  = SLICE_UInt(i0,10,10);
   2634    UInt imm3  = SLICE_UInt(i1,14,12);
   2635    UInt imm8  = SLICE_UInt(i1,7,0);
   2636    return thumbExpandImm(updatesC, imm1, imm3, imm8);
   2637 }
   2638 
   2639 
   2640 /* Thumb16 only.  Given the firstcond and mask fields from an IT
   2641    instruction, compute the 32-bit ITSTATE value implied, as described
   2642    in libvex_guest_arm.h.  This is not the ARM ARM representation.
   2643    Also produce the t/e chars for the 2nd, 3rd, 4th insns, for
   2644    disassembly printing.  Returns False if firstcond or mask
   2645    denote something invalid.
   2646 
   2647    The number and conditions for the instructions to be
   2648    conditionalised depend on firstcond and mask:
   2649 
   2650    mask      cond 1    cond 2      cond 3      cond 4
   2651 
   2652    1000      fc[3:0]
   2653    x100      fc[3:0]   fc[3:1]:x
   2654    xy10      fc[3:0]   fc[3:1]:x   fc[3:1]:y
   2655    xyz1      fc[3:0]   fc[3:1]:x   fc[3:1]:y   fc[3:1]:z
   2656 
   2657    The condition fields are assembled in *itstate backwards (cond 4 at
   2658    the top, cond 1 at the bottom).  Conditions are << 4'd and then
   2659    ^0xE'd, and those fields that correspond to instructions in the IT
   2660    block are tagged with a 1 bit.
   2661 */
   2662 static Bool compute_ITSTATE ( /*OUT*/UInt*  itstate,
   2663                               /*OUT*/HChar* ch1,
   2664                               /*OUT*/HChar* ch2,
   2665                               /*OUT*/HChar* ch3,
   2666                               UInt firstcond, UInt mask )
   2667 {
   2668    vassert(firstcond <= 0xF);
   2669    vassert(mask <= 0xF);
   2670    *itstate = 0;
   2671    *ch1 = *ch2 = *ch3 = '.';
   2672    if (mask == 0)
   2673       return False; /* the logic below actually ensures this anyway,
   2674                        but clearer to make it explicit. */
   2675    if (firstcond == 0xF)
   2676       return False; /* NV is not allowed */
   2677    if (firstcond == 0xE && popcount32(mask) != 1)
   2678       return False; /* if firstcond is AL then all the rest must be too */
   2679 
   2680    UInt m3 = (mask >> 3) & 1;
   2681    UInt m2 = (mask >> 2) & 1;
   2682    UInt m1 = (mask >> 1) & 1;
   2683    UInt m0 = (mask >> 0) & 1;
   2684 
   2685    UInt fc = (firstcond << 4) | 1/*in-IT-block*/;
   2686    UInt ni = (0xE/*AL*/ << 4) | 0/*not-in-IT-block*/;
   2687 
   2688    if (m3 == 1 && (m2|m1|m0) == 0) {
   2689       *itstate = (ni << 24) | (ni << 16) | (ni << 8) | fc;
   2690       *itstate ^= 0xE0E0E0E0;
   2691       return True;
   2692    }
   2693 
   2694    if (m2 == 1 && (m1|m0) == 0) {
   2695       *itstate = (ni << 24) | (ni << 16) | (setbit32(fc, 4, m3) << 8) | fc;
   2696       *itstate ^= 0xE0E0E0E0;
   2697       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
   2698       return True;
   2699    }
   2700 
   2701    if (m1 == 1 && m0 == 0) {
   2702       *itstate = (ni << 24)
   2703                  | (setbit32(fc, 4, m2) << 16)
   2704                  | (setbit32(fc, 4, m3) << 8) | fc;
   2705       *itstate ^= 0xE0E0E0E0;
   2706       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
   2707       *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
   2708       return True;
   2709    }
   2710 
   2711    if (m0 == 1) {
   2712       *itstate = (setbit32(fc, 4, m1) << 24)
   2713                  | (setbit32(fc, 4, m2) << 16)
   2714                  | (setbit32(fc, 4, m3) << 8) | fc;
   2715       *itstate ^= 0xE0E0E0E0;
   2716       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
   2717       *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
   2718       *ch3 = m1 == (firstcond & 1) ? 't' : 'e';
   2719       return True;
   2720    }
   2721 
   2722    return False;
   2723 }
   2724 
   2725 
   2726 /* Generate IR to do 32-bit bit reversal, a la Hacker's Delight
   2727    Chapter 7 Section 1. */
   2728 static IRTemp gen_BITREV ( IRTemp x0 )
   2729 {
   2730    IRTemp x1 = newTemp(Ity_I32);
   2731    IRTemp x2 = newTemp(Ity_I32);
   2732    IRTemp x3 = newTemp(Ity_I32);
   2733    IRTemp x4 = newTemp(Ity_I32);
   2734    IRTemp x5 = newTemp(Ity_I32);
   2735    UInt   c1 = 0x55555555;
   2736    UInt   c2 = 0x33333333;
   2737    UInt   c3 = 0x0F0F0F0F;
   2738    UInt   c4 = 0x00FF00FF;
   2739    UInt   c5 = 0x0000FFFF;
   2740    assign(x1,
   2741           binop(Iop_Or32,
   2742                 binop(Iop_Shl32,
   2743                       binop(Iop_And32, mkexpr(x0), mkU32(c1)),
   2744                       mkU8(1)),
   2745                 binop(Iop_Shr32,
   2746                       binop(Iop_And32, mkexpr(x0), mkU32(~c1)),
   2747                       mkU8(1))
   2748    ));
   2749    assign(x2,
   2750           binop(Iop_Or32,
   2751                 binop(Iop_Shl32,
   2752                       binop(Iop_And32, mkexpr(x1), mkU32(c2)),
   2753                       mkU8(2)),
   2754                 binop(Iop_Shr32,
   2755                       binop(Iop_And32, mkexpr(x1), mkU32(~c2)),
   2756                       mkU8(2))
   2757    ));
   2758    assign(x3,
   2759           binop(Iop_Or32,
   2760                 binop(Iop_Shl32,
   2761                       binop(Iop_And32, mkexpr(x2), mkU32(c3)),
   2762                       mkU8(4)),
   2763                 binop(Iop_Shr32,
   2764                       binop(Iop_And32, mkexpr(x2), mkU32(~c3)),
   2765                       mkU8(4))
   2766    ));
   2767    assign(x4,
   2768           binop(Iop_Or32,
   2769                 binop(Iop_Shl32,
   2770                       binop(Iop_And32, mkexpr(x3), mkU32(c4)),
   2771                       mkU8(8)),
   2772                 binop(Iop_Shr32,
   2773                       binop(Iop_And32, mkexpr(x3), mkU32(~c4)),
   2774                       mkU8(8))
   2775    ));
   2776    assign(x5,
   2777           binop(Iop_Or32,
   2778                 binop(Iop_Shl32,
   2779                       binop(Iop_And32, mkexpr(x4), mkU32(c5)),
   2780                       mkU8(16)),
   2781                 binop(Iop_Shr32,
   2782                       binop(Iop_And32, mkexpr(x4), mkU32(~c5)),
   2783                       mkU8(16))
   2784    ));
   2785    return x5;
   2786 }
   2787 
   2788 
   2789 /* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
   2790    0:1:2:3 (aka byte-swap). */
   2791 static IRTemp gen_REV ( IRTemp arg )
   2792 {
   2793    IRTemp res = newTemp(Ity_I32);
   2794    assign(res,
   2795           binop(Iop_Or32,
   2796                 binop(Iop_Shl32, mkexpr(arg), mkU8(24)),
   2797           binop(Iop_Or32,
   2798                 binop(Iop_And32, binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
   2799                                  mkU32(0x00FF0000)),
   2800           binop(Iop_Or32,
   2801                 binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
   2802                                        mkU32(0x0000FF00)),
   2803                 binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(24)),
   2804                                        mkU32(0x000000FF) )
   2805    ))));
   2806    return res;
   2807 }
   2808 
   2809 
   2810 /* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
   2811    2:3:0:1 (swap within lo and hi halves). */
   2812 static IRTemp gen_REV16 ( IRTemp arg )
   2813 {
   2814    IRTemp res = newTemp(Ity_I32);
   2815    assign(res,
   2816           binop(Iop_Or32,
   2817                 binop(Iop_And32,
   2818                       binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
   2819                       mkU32(0xFF00FF00)),
   2820                 binop(Iop_And32,
   2821                       binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
   2822                       mkU32(0x00FF00FF))));
   2823    return res;
   2824 }
   2825 
   2826 
   2827 /*------------------------------------------------------------*/
   2828 /*--- Advanced SIMD (NEON) instructions                    ---*/
   2829 /*------------------------------------------------------------*/
   2830 
   2831 /*------------------------------------------------------------*/
   2832 /*--- NEON data processing                                 ---*/
   2833 /*------------------------------------------------------------*/
   2834 
   2835 /* For all NEON DP ops, we use the normal scheme to handle conditional
   2836    writes to registers -- pass in condT and hand that on to the
   2837    put*Reg functions.  In ARM mode condT is always IRTemp_INVALID
   2838    since NEON is unconditional for ARM.  In Thumb mode condT is
   2839    derived from the ITSTATE shift register in the normal way. */
   2840 
   2841 static
   2842 UInt get_neon_d_regno(UInt theInstr)
   2843 {
   2844    UInt x = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
   2845    if (theInstr & 0x40) {
   2846       if (x & 1) {
   2847          x = x + 0x100;
   2848       } else {
   2849          x = x >> 1;
   2850       }
   2851    }
   2852    return x;
   2853 }
   2854 
   2855 static
   2856 UInt get_neon_n_regno(UInt theInstr)
   2857 {
   2858    UInt x = ((theInstr >> 3) & 0x10) | ((theInstr >> 16) & 0xF);
   2859    if (theInstr & 0x40) {
   2860       if (x & 1) {
   2861          x = x + 0x100;
   2862       } else {
   2863          x = x >> 1;
   2864       }
   2865    }
   2866    return x;
   2867 }
   2868 
   2869 static
   2870 UInt get_neon_m_regno(UInt theInstr)
   2871 {
   2872    UInt x = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
   2873    if (theInstr & 0x40) {
   2874       if (x & 1) {
   2875          x = x + 0x100;
   2876       } else {
   2877          x = x >> 1;
   2878       }
   2879    }
   2880    return x;
   2881 }
   2882 
   2883 static
   2884 Bool dis_neon_vext ( UInt theInstr, IRTemp condT )
   2885 {
   2886    UInt dreg = get_neon_d_regno(theInstr);
   2887    UInt mreg = get_neon_m_regno(theInstr);
   2888    UInt nreg = get_neon_n_regno(theInstr);
   2889    UInt imm4 = (theInstr >> 8) & 0xf;
   2890    UInt Q = (theInstr >> 6) & 1;
   2891    HChar reg_t = Q ? 'q' : 'd';
   2892 
   2893    if (Q) {
   2894       putQReg(dreg, triop(Iop_SliceV128, /*hiV128*/getQReg(mreg),
   2895                           /*loV128*/getQReg(nreg), mkU8(imm4)), condT);
   2896    } else {
   2897       putDRegI64(dreg, triop(Iop_Slice64, /*hiI64*/getDRegI64(mreg),
   2898                              /*loI64*/getDRegI64(nreg), mkU8(imm4)), condT);
   2899    }
   2900    DIP("vext.8 %c%u, %c%u, %c%u, #%u\n", reg_t, dreg, reg_t, nreg,
   2901                                          reg_t, mreg, imm4);
   2902    return True;
   2903 }
   2904 
   2905 /* Generate specific vector FP binary ops, possibly with a fake
   2906    rounding mode as required by the primop. */
   2907 static
   2908 IRExpr* binop_w_fake_RM ( IROp op, IRExpr* argL, IRExpr* argR )
   2909 {
   2910    switch (op) {
   2911       case Iop_Add32Fx4:
   2912       case Iop_Sub32Fx4:
   2913       case Iop_Mul32Fx4:
   2914          return triop(op, get_FAKE_roundingmode(), argL, argR );
   2915       case Iop_Add32x4: case Iop_Add16x8:
   2916       case Iop_Sub32x4: case Iop_Sub16x8:
   2917       case Iop_Mul32x4: case Iop_Mul16x8:
   2918       case Iop_Mul32x2: case Iop_Mul16x4:
   2919       case Iop_Add32Fx2:
   2920       case Iop_Sub32Fx2:
   2921       case Iop_Mul32Fx2:
   2922       case Iop_PwAdd32Fx2:
   2923          return binop(op, argL, argR);
   2924       default:
   2925         ppIROp(op);
   2926         vassert(0);
   2927    }
   2928 }
   2929 
   2930 /* VTBL, VTBX */
   2931 static
   2932 Bool dis_neon_vtb ( UInt theInstr, IRTemp condT )
   2933 {
   2934    UInt op = (theInstr >> 6) & 1;
   2935    UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
   2936    UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
   2937    UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
   2938    UInt len = (theInstr >> 8) & 3;
   2939    Int i;
   2940    IROp cmp;
   2941    ULong imm;
   2942    IRTemp arg_l;
   2943    IRTemp old_mask, new_mask, cur_mask;
   2944    IRTemp old_res, new_res;
   2945    IRTemp old_arg, new_arg;
   2946 
   2947    if (dreg >= 0x100 || mreg >= 0x100 || nreg >= 0x100)
   2948       return False;
   2949    if (nreg + len > 31)
   2950       return False;
   2951 
   2952    cmp = Iop_CmpGT8Ux8;
   2953 
   2954    old_mask = newTemp(Ity_I64);
   2955    old_res = newTemp(Ity_I64);
   2956    old_arg = newTemp(Ity_I64);
   2957    assign(old_mask, mkU64(0));
   2958    assign(old_res, mkU64(0));
   2959    assign(old_arg, getDRegI64(mreg));
   2960    imm = 8;
   2961    imm = (imm <<  8) | imm;
   2962    imm = (imm << 16) | imm;
   2963    imm = (imm << 32) | imm;
   2964 
   2965    for (i = 0; i <= len; i++) {
   2966       arg_l = newTemp(Ity_I64);
   2967       new_mask = newTemp(Ity_I64);
   2968       cur_mask = newTemp(Ity_I64);
   2969       new_res = newTemp(Ity_I64);
   2970       new_arg = newTemp(Ity_I64);
   2971       assign(arg_l, getDRegI64(nreg+i));
   2972       assign(new_arg, binop(Iop_Sub8x8, mkexpr(old_arg), mkU64(imm)));
   2973       assign(cur_mask, binop(cmp, mkU64(imm), mkexpr(old_arg)));
   2974       assign(new_mask, binop(Iop_Or64, mkexpr(old_mask), mkexpr(cur_mask)));
   2975       assign(new_res, binop(Iop_Or64,
   2976                             mkexpr(old_res),
   2977                             binop(Iop_And64,
   2978                                   binop(Iop_Perm8x8,
   2979                                         mkexpr(arg_l),
   2980                                         binop(Iop_And64,
   2981                                               mkexpr(old_arg),
   2982                                               mkexpr(cur_mask))),
   2983                                   mkexpr(cur_mask))));
   2984 
   2985       old_arg = new_arg;
   2986       old_mask = new_mask;
   2987       old_res = new_res;
   2988    }
   2989    if (op) {
   2990       new_res = newTemp(Ity_I64);
   2991       assign(new_res, binop(Iop_Or64,
   2992                             binop(Iop_And64,
   2993                                   getDRegI64(dreg),
   2994                                   unop(Iop_Not64, mkexpr(old_mask))),
   2995                             mkexpr(old_res)));
   2996       old_res = new_res;
   2997    }
   2998 
   2999    putDRegI64(dreg, mkexpr(old_res), condT);
   3000    DIP("vtb%c.8 d%u, {", op ? 'x' : 'l', dreg);
   3001    if (len > 0) {
   3002       DIP("d%u-d%u", nreg, nreg + len);
   3003    } else {
   3004       DIP("d%u", nreg);
   3005    }
   3006    DIP("}, d%u\n", mreg);
   3007    return True;
   3008 }
   3009 
   3010 /* VDUP (scalar)  */
   3011 static
   3012 Bool dis_neon_vdup ( UInt theInstr, IRTemp condT )
   3013 {
   3014    UInt Q = (theInstr >> 6) & 1;
   3015    UInt dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
   3016    UInt mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
   3017    UInt imm4 = (theInstr >> 16) & 0xF;
   3018    UInt index;
   3019    UInt size;
   3020    IRTemp arg_m;
   3021    IRTemp res;
   3022    IROp op, op2;
   3023 
   3024    if ((imm4 == 0) || (imm4 == 8))
   3025       return False;
   3026    if ((Q == 1) && ((dreg & 1) == 1))
   3027       return False;
   3028    if (Q)
   3029       dreg >>= 1;
   3030    arg_m = newTemp(Ity_I64);
   3031    assign(arg_m, getDRegI64(mreg));
   3032    if (Q)
   3033       res = newTemp(Ity_V128);
   3034    else
   3035       res = newTemp(Ity_I64);
   3036    if ((imm4 & 1) == 1) {
   3037       op = Q ? Iop_Dup8x16 : Iop_Dup8x8;
   3038       op2 = Iop_GetElem8x8;
   3039       index = imm4 >> 1;
   3040       size = 8;
   3041    } else if ((imm4 & 3) == 2) {
   3042       op = Q ? Iop_Dup16x8 : Iop_Dup16x4;
   3043       op2 = Iop_GetElem16x4;
   3044       index = imm4 >> 2;
   3045       size = 16;
   3046    } else if ((imm4 & 7) == 4) {
   3047       op = Q ? Iop_Dup32x4 : Iop_Dup32x2;
   3048       op2 = Iop_GetElem32x2;
   3049       index = imm4 >> 3;
   3050       size = 32;
   3051    } else {
   3052       return False; // can this ever happen?
   3053    }
   3054    assign(res, unop(op, binop(op2, mkexpr(arg_m), mkU8(index))));
   3055    if (Q) {
   3056       putQReg(dreg, mkexpr(res), condT);
   3057    } else {
   3058       putDRegI64(dreg, mkexpr(res), condT);
   3059    }
   3060    DIP("vdup.%u %c%u, d%u[%u]\n", size, Q ? 'q' : 'd', dreg, mreg, index);
   3061    return True;
   3062 }
   3063 
   3064 /* A7.4.1 Three registers of the same length */
   3065 static
   3066 Bool dis_neon_data_3same ( UInt theInstr, IRTemp condT )
   3067 {
   3068    /* In paths where this returns False, indicating a non-decodable
   3069       instruction, there may still be some IR assignments to temporaries
   3070       generated.  This is inconvenient but harmless, and the post-front-end
   3071       IR optimisation pass will just remove them anyway.  So there's no
   3072       effort made here to tidy it up.
   3073    */
   3074    UInt Q = (theInstr >> 6) & 1;
   3075    UInt dreg = get_neon_d_regno(theInstr);
   3076    UInt nreg = get_neon_n_regno(theInstr);
   3077    UInt mreg = get_neon_m_regno(theInstr);
   3078    UInt A = (theInstr >> 8) & 0xF;
   3079    UInt B = (theInstr >> 4) & 1;
   3080    UInt C = (theInstr >> 20) & 0x3;
   3081    UInt U = (theInstr >> 24) & 1;
   3082    UInt size = C;
   3083 
   3084    IRTemp arg_n;
   3085    IRTemp arg_m;
   3086    IRTemp res;
   3087 
   3088    if (Q) {
   3089       arg_n = newTemp(Ity_V128);
   3090       arg_m = newTemp(Ity_V128);
   3091       res = newTemp(Ity_V128);
   3092       assign(arg_n, getQReg(nreg));
   3093       assign(arg_m, getQReg(mreg));
   3094    } else {
   3095       arg_n = newTemp(Ity_I64);
   3096       arg_m = newTemp(Ity_I64);
   3097       res = newTemp(Ity_I64);
   3098       assign(arg_n, getDRegI64(nreg));
   3099       assign(arg_m, getDRegI64(mreg));
   3100    }
   3101 
   3102    switch(A) {
   3103       case 0:
   3104          if (B == 0) {
   3105             /* VHADD */
   3106             ULong imm = 0;
   3107             IRExpr *imm_val;
   3108             IROp addOp;
   3109             IROp andOp;
   3110             IROp shOp;
   3111             HChar regType = Q ? 'q' : 'd';
   3112 
   3113             if (size == 3)
   3114                return False;
   3115             switch(size) {
   3116                case 0: imm = 0x101010101010101LL; break;
   3117                case 1: imm = 0x1000100010001LL; break;
   3118                case 2: imm = 0x100000001LL; break;
   3119                default: vassert(0);
   3120             }
   3121             if (Q) {
   3122                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
   3123                andOp = Iop_AndV128;
   3124             } else {
   3125                imm_val = mkU64(imm);
   3126                andOp = Iop_And64;
   3127             }
   3128             if (U) {
   3129                switch(size) {
   3130                   case 0:
   3131                      addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
   3132                      shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3133                      break;
   3134                   case 1:
   3135                      addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
   3136                      shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3137                      break;
   3138                   case 2:
   3139                      addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
   3140                      shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3141                      break;
   3142                   default:
   3143                      vassert(0);
   3144                }
   3145             } else {
   3146                switch(size) {
   3147                   case 0:
   3148                      addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
   3149                      shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
   3150                      break;
   3151                   case 1:
   3152                      addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
   3153                      shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
   3154                      break;
   3155                   case 2:
   3156                      addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
   3157                      shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
   3158                      break;
   3159                   default:
   3160                      vassert(0);
   3161                }
   3162             }
   3163             assign(res,
   3164                    binop(addOp,
   3165                          binop(addOp,
   3166                                binop(shOp, mkexpr(arg_m), mkU8(1)),
   3167                                binop(shOp, mkexpr(arg_n), mkU8(1))),
   3168                          binop(shOp,
   3169                                binop(addOp,
   3170                                      binop(andOp, mkexpr(arg_m), imm_val),
   3171                                      binop(andOp, mkexpr(arg_n), imm_val)),
   3172                                mkU8(1))));
   3173             DIP("vhadd.%c%d %c%u, %c%u, %c%u\n",
   3174                 U ? 'u' : 's', 8 << size, regType,
   3175                 dreg, regType, nreg, regType, mreg);
   3176          } else {
   3177             /* VQADD */
   3178             IROp op, op2;
   3179             IRTemp tmp;
   3180             HChar reg_t = Q ? 'q' : 'd';
   3181             if (Q) {
   3182                switch (size) {
   3183                   case 0:
   3184                      op = U ? Iop_QAdd8Ux16 : Iop_QAdd8Sx16;
   3185                      op2 = Iop_Add8x16;
   3186                      break;
   3187                   case 1:
   3188                      op = U ? Iop_QAdd16Ux8 : Iop_QAdd16Sx8;
   3189                      op2 = Iop_Add16x8;
   3190                      break;
   3191                   case 2:
   3192                      op = U ? Iop_QAdd32Ux4 : Iop_QAdd32Sx4;
   3193                      op2 = Iop_Add32x4;
   3194                      break;
   3195                   case 3:
   3196                      op = U ? Iop_QAdd64Ux2 : Iop_QAdd64Sx2;
   3197                      op2 = Iop_Add64x2;
   3198                      break;
   3199                   default:
   3200                      vassert(0);
   3201                }
   3202             } else {
   3203                switch (size) {
   3204                   case 0:
   3205                      op = U ? Iop_QAdd8Ux8 : Iop_QAdd8Sx8;
   3206                      op2 = Iop_Add8x8;
   3207                      break;
   3208                   case 1:
   3209                      op = U ? Iop_QAdd16Ux4 : Iop_QAdd16Sx4;
   3210                      op2 = Iop_Add16x4;
   3211                      break;
   3212                   case 2:
   3213                      op = U ? Iop_QAdd32Ux2 : Iop_QAdd32Sx2;
   3214                      op2 = Iop_Add32x2;
   3215                      break;
   3216                   case 3:
   3217                      op = U ? Iop_QAdd64Ux1 : Iop_QAdd64Sx1;
   3218                      op2 = Iop_Add64;
   3219                      break;
   3220                   default:
   3221                      vassert(0);
   3222                }
   3223             }
   3224             if (Q) {
   3225                tmp = newTemp(Ity_V128);
   3226             } else {
   3227                tmp = newTemp(Ity_I64);
   3228             }
   3229             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   3230             assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
   3231             setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
   3232             DIP("vqadd.%c%d %c%u %c%u, %c%u\n",
   3233                 U ? 'u' : 's',
   3234                 8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3235          }
   3236          break;
   3237       case 1:
   3238          if (B == 0) {
   3239             /* VRHADD */
   3240             /* VRHADD C, A, B ::=
   3241                  C = (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1) */
   3242             IROp shift_op, add_op;
   3243             IRTemp cc;
   3244             ULong one = 1;
   3245             HChar reg_t = Q ? 'q' : 'd';
   3246             switch (size) {
   3247                case 0: one = (one <<  8) | one; /* fall through */
   3248                case 1: one = (one << 16) | one; /* fall through */
   3249                case 2: one = (one << 32) | one; break;
   3250                case 3: return False;
   3251                default: vassert(0);
   3252             }
   3253             if (Q) {
   3254                switch (size) {
   3255                   case 0:
   3256                      shift_op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
   3257                      add_op = Iop_Add8x16;
   3258                      break;
   3259                   case 1:
   3260                      shift_op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
   3261                      add_op = Iop_Add16x8;
   3262                      break;
   3263                   case 2:
   3264                      shift_op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
   3265                      add_op = Iop_Add32x4;
   3266                      break;
   3267                   case 3:
   3268                      return False;
   3269                   default:
   3270                      vassert(0);
   3271                }
   3272             } else {
   3273                switch (size) {
   3274                   case 0:
   3275                      shift_op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
   3276                      add_op = Iop_Add8x8;
   3277                      break;
   3278                   case 1:
   3279                      shift_op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
   3280                      add_op = Iop_Add16x4;
   3281                      break;
   3282                   case 2:
   3283                      shift_op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
   3284                      add_op = Iop_Add32x2;
   3285                      break;
   3286                   case 3:
   3287                      return False;
   3288                   default:
   3289                      vassert(0);
   3290                }
   3291             }
   3292             if (Q) {
   3293                cc = newTemp(Ity_V128);
   3294                assign(cc, binop(shift_op,
   3295                                 binop(add_op,
   3296                                       binop(add_op,
   3297                                             binop(Iop_AndV128,
   3298                                                   mkexpr(arg_n),
   3299                                                   binop(Iop_64HLtoV128,
   3300                                                         mkU64(one),
   3301                                                         mkU64(one))),
   3302                                             binop(Iop_AndV128,
   3303                                                   mkexpr(arg_m),
   3304                                                   binop(Iop_64HLtoV128,
   3305                                                         mkU64(one),
   3306                                                         mkU64(one)))),
   3307                                       binop(Iop_64HLtoV128,
   3308                                             mkU64(one),
   3309                                             mkU64(one))),
   3310                                 mkU8(1)));
   3311                assign(res, binop(add_op,
   3312                                  binop(add_op,
   3313                                        binop(shift_op,
   3314                                              mkexpr(arg_n),
   3315                                              mkU8(1)),
   3316                                        binop(shift_op,
   3317                                              mkexpr(arg_m),
   3318                                              mkU8(1))),
   3319                                  mkexpr(cc)));
   3320             } else {
   3321                cc = newTemp(Ity_I64);
   3322                assign(cc, binop(shift_op,
   3323                                 binop(add_op,
   3324                                       binop(add_op,
   3325                                             binop(Iop_And64,
   3326                                                   mkexpr(arg_n),
   3327                                                   mkU64(one)),
   3328                                             binop(Iop_And64,
   3329                                                   mkexpr(arg_m),
   3330                                                   mkU64(one))),
   3331                                       mkU64(one)),
   3332                                 mkU8(1)));
   3333                assign(res, binop(add_op,
   3334                                  binop(add_op,
   3335                                        binop(shift_op,
   3336                                              mkexpr(arg_n),
   3337                                              mkU8(1)),
   3338                                        binop(shift_op,
   3339                                              mkexpr(arg_m),
   3340                                              mkU8(1))),
   3341                                  mkexpr(cc)));
   3342             }
   3343             DIP("vrhadd.%c%d %c%u, %c%u, %c%u\n",
   3344                 U ? 'u' : 's',
   3345                 8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3346          } else {
   3347             if (U == 0)  {
   3348                switch(C) {
   3349                   case 0: {
   3350                      /* VAND  */
   3351                      HChar reg_t = Q ? 'q' : 'd';
   3352                      if (Q) {
   3353                         assign(res, binop(Iop_AndV128, mkexpr(arg_n),
   3354                                                        mkexpr(arg_m)));
   3355                      } else {
   3356                         assign(res, binop(Iop_And64, mkexpr(arg_n),
   3357                                                      mkexpr(arg_m)));
   3358                      }
   3359                      DIP("vand %c%u, %c%u, %c%u\n",
   3360                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3361                      break;
   3362                   }
   3363                   case 1: {
   3364                      /* VBIC  */
   3365                      HChar reg_t = Q ? 'q' : 'd';
   3366                      if (Q) {
   3367                         assign(res, binop(Iop_AndV128,mkexpr(arg_n),
   3368                                unop(Iop_NotV128, mkexpr(arg_m))));
   3369                      } else {
   3370                         assign(res, binop(Iop_And64, mkexpr(arg_n),
   3371                                unop(Iop_Not64, mkexpr(arg_m))));
   3372                      }
   3373                      DIP("vbic %c%u, %c%u, %c%u\n",
   3374                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3375                      break;
   3376                   }
   3377                   case 2:
   3378                      if ( nreg != mreg) {
   3379                         /* VORR  */
   3380                         HChar reg_t = Q ? 'q' : 'd';
   3381                         if (Q) {
   3382                            assign(res, binop(Iop_OrV128, mkexpr(arg_n),
   3383                                                          mkexpr(arg_m)));
   3384                         } else {
   3385                            assign(res, binop(Iop_Or64, mkexpr(arg_n),
   3386                                                        mkexpr(arg_m)));
   3387                         }
   3388                         DIP("vorr %c%u, %c%u, %c%u\n",
   3389                             reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3390                      } else {
   3391                         /* VMOV  */
   3392                         HChar reg_t = Q ? 'q' : 'd';
   3393                         assign(res, mkexpr(arg_m));
   3394                         DIP("vmov %c%u, %c%u\n", reg_t, dreg, reg_t, mreg);
   3395                      }
   3396                      break;
   3397                   case 3:{
   3398                      /* VORN  */
   3399                      HChar reg_t = Q ? 'q' : 'd';
   3400                      if (Q) {
   3401                         assign(res, binop(Iop_OrV128,mkexpr(arg_n),
   3402                                unop(Iop_NotV128, mkexpr(arg_m))));
   3403                      } else {
   3404                         assign(res, binop(Iop_Or64, mkexpr(arg_n),
   3405                                unop(Iop_Not64, mkexpr(arg_m))));
   3406                      }
   3407                      DIP("vorn %c%u, %c%u, %c%u\n",
   3408                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3409                      break;
   3410                   }
   3411                   default:
   3412                      vassert(0);
   3413                }
   3414             } else {
   3415                switch(C) {
   3416                   case 0:
   3417                      /* VEOR (XOR)  */
   3418                      if (Q) {
   3419                         assign(res, binop(Iop_XorV128, mkexpr(arg_n),
   3420                                                        mkexpr(arg_m)));
   3421                      } else {
   3422                         assign(res, binop(Iop_Xor64, mkexpr(arg_n),
   3423                                                      mkexpr(arg_m)));
   3424                      }
   3425                      DIP("veor %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
   3426                            Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   3427                      break;
   3428                   case 1:
   3429                      /* VBSL  */
   3430                      if (Q) {
   3431                         IRTemp reg_d = newTemp(Ity_V128);
   3432                         assign(reg_d, getQReg(dreg));
   3433                         assign(res,
   3434                                binop(Iop_OrV128,
   3435                                      binop(Iop_AndV128, mkexpr(arg_n),
   3436                                                         mkexpr(reg_d)),
   3437                                      binop(Iop_AndV128,
   3438                                            mkexpr(arg_m),
   3439                                            unop(Iop_NotV128,
   3440                                                  mkexpr(reg_d)) ) ) );
   3441                      } else {
   3442                         IRTemp reg_d = newTemp(Ity_I64);
   3443                         assign(reg_d, getDRegI64(dreg));
   3444                         assign(res,
   3445                                binop(Iop_Or64,
   3446                                      binop(Iop_And64, mkexpr(arg_n),
   3447                                                       mkexpr(reg_d)),
   3448                                      binop(Iop_And64,
   3449                                            mkexpr(arg_m),
   3450                                            unop(Iop_Not64, mkexpr(reg_d)))));
   3451                      }
   3452                      DIP("vbsl %c%u, %c%u, %c%u\n",
   3453                          Q ? 'q' : 'd', dreg,
   3454                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   3455                      break;
   3456                   case 2:
   3457                      /* VBIT  */
   3458                      if (Q) {
   3459                         IRTemp reg_d = newTemp(Ity_V128);
   3460                         assign(reg_d, getQReg(dreg));
   3461                         assign(res,
   3462                                binop(Iop_OrV128,
   3463                                      binop(Iop_AndV128, mkexpr(arg_n),
   3464                                                         mkexpr(arg_m)),
   3465                                      binop(Iop_AndV128,
   3466                                            mkexpr(reg_d),
   3467                                            unop(Iop_NotV128, mkexpr(arg_m)))));
   3468                      } else {
   3469                         IRTemp reg_d = newTemp(Ity_I64);
   3470                         assign(reg_d, getDRegI64(dreg));
   3471                         assign(res,
   3472                                binop(Iop_Or64,
   3473                                      binop(Iop_And64, mkexpr(arg_n),
   3474                                                       mkexpr(arg_m)),
   3475                                      binop(Iop_And64,
   3476                                            mkexpr(reg_d),
   3477                                            unop(Iop_Not64, mkexpr(arg_m)))));
   3478                      }
   3479                      DIP("vbit %c%u, %c%u, %c%u\n",
   3480                          Q ? 'q' : 'd', dreg,
   3481                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   3482                      break;
   3483                   case 3:
   3484                      /* VBIF  */
   3485                      if (Q) {
   3486                         IRTemp reg_d = newTemp(Ity_V128);
   3487                         assign(reg_d, getQReg(dreg));
   3488                         assign(res,
   3489                                binop(Iop_OrV128,
   3490                                      binop(Iop_AndV128, mkexpr(reg_d),
   3491                                                         mkexpr(arg_m)),
   3492                                      binop(Iop_AndV128,
   3493                                            mkexpr(arg_n),
   3494                                            unop(Iop_NotV128, mkexpr(arg_m)))));
   3495                      } else {
   3496                         IRTemp reg_d = newTemp(Ity_I64);
   3497                         assign(reg_d, getDRegI64(dreg));
   3498                         assign(res,
   3499                                binop(Iop_Or64,
   3500                                      binop(Iop_And64, mkexpr(reg_d),
   3501                                                       mkexpr(arg_m)),
   3502                                      binop(Iop_And64,
   3503                                            mkexpr(arg_n),
   3504                                            unop(Iop_Not64, mkexpr(arg_m)))));
   3505                      }
   3506                      DIP("vbif %c%u, %c%u, %c%u\n",
   3507                          Q ? 'q' : 'd', dreg,
   3508                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   3509                      break;
   3510                   default:
   3511                      vassert(0);
   3512                }
   3513             }
   3514          }
   3515          break;
   3516       case 2:
   3517          if (B == 0) {
   3518             /* VHSUB */
   3519             /* (A >> 1) - (B >> 1) - (NOT (A) & B & 1)   */
   3520             ULong imm = 0;
   3521             IRExpr *imm_val;
   3522             IROp subOp;
   3523             IROp notOp;
   3524             IROp andOp;
   3525             IROp shOp;
   3526             if (size == 3)
   3527                return False;
   3528             switch(size) {
   3529                case 0: imm = 0x101010101010101LL; break;
   3530                case 1: imm = 0x1000100010001LL; break;
   3531                case 2: imm = 0x100000001LL; break;
   3532                default: vassert(0);
   3533             }
   3534             if (Q) {
   3535                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
   3536                andOp = Iop_AndV128;
   3537                notOp = Iop_NotV128;
   3538             } else {
   3539                imm_val = mkU64(imm);
   3540                andOp = Iop_And64;
   3541                notOp = Iop_Not64;
   3542             }
   3543             if (U) {
   3544                switch(size) {
   3545                   case 0:
   3546                      subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   3547                      shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3548                      break;
   3549                   case 1:
   3550                      subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   3551                      shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3552                      break;
   3553                   case 2:
   3554                      subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   3555                      shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3556                      break;
   3557                   default:
   3558                      vassert(0);
   3559                }
   3560             } else {
   3561                switch(size) {
   3562                   case 0:
   3563                      subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   3564                      shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
   3565                      break;
   3566                   case 1:
   3567                      subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   3568                      shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
   3569                      break;
   3570                   case 2:
   3571                      subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   3572                      shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
   3573                      break;
   3574                   default:
   3575                      vassert(0);
   3576                }
   3577             }
   3578             assign(res,
   3579                    binop(subOp,
   3580                          binop(subOp,
   3581                                binop(shOp, mkexpr(arg_n), mkU8(1)),
   3582                                binop(shOp, mkexpr(arg_m), mkU8(1))),
   3583                          binop(andOp,
   3584                                binop(andOp,
   3585                                      unop(notOp, mkexpr(arg_n)),
   3586                                      mkexpr(arg_m)),
   3587                                imm_val)));
   3588             DIP("vhsub.%c%d %c%u, %c%u, %c%u\n",
   3589                 U ? 'u' : 's', 8 << size,
   3590                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   3591                 mreg);
   3592          } else {
   3593             /* VQSUB */
   3594             IROp op, op2;
   3595             IRTemp tmp;
   3596             if (Q) {
   3597                switch (size) {
   3598                   case 0:
   3599                      op = U ? Iop_QSub8Ux16 : Iop_QSub8Sx16;
   3600                      op2 = Iop_Sub8x16;
   3601                      break;
   3602                   case 1:
   3603                      op = U ? Iop_QSub16Ux8 : Iop_QSub16Sx8;
   3604                      op2 = Iop_Sub16x8;
   3605                      break;
   3606                   case 2:
   3607                      op = U ? Iop_QSub32Ux4 : Iop_QSub32Sx4;
   3608                      op2 = Iop_Sub32x4;
   3609                      break;
   3610                   case 3:
   3611                      op = U ? Iop_QSub64Ux2 : Iop_QSub64Sx2;
   3612                      op2 = Iop_Sub64x2;
   3613                      break;
   3614                   default:
   3615                      vassert(0);
   3616                }
   3617             } else {
   3618                switch (size) {
   3619                   case 0:
   3620                      op = U ? Iop_QSub8Ux8 : Iop_QSub8Sx8;
   3621                      op2 = Iop_Sub8x8;
   3622                      break;
   3623                   case 1:
   3624                      op = U ? Iop_QSub16Ux4 : Iop_QSub16Sx4;
   3625                      op2 = Iop_Sub16x4;
   3626                      break;
   3627                   case 2:
   3628                      op = U ? Iop_QSub32Ux2 : Iop_QSub32Sx2;
   3629                      op2 = Iop_Sub32x2;
   3630                      break;
   3631                   case 3:
   3632                      op = U ? Iop_QSub64Ux1 : Iop_QSub64Sx1;
   3633                      op2 = Iop_Sub64;
   3634                      break;
   3635                   default:
   3636                      vassert(0);
   3637                }
   3638             }
   3639             if (Q)
   3640                tmp = newTemp(Ity_V128);
   3641             else
   3642                tmp = newTemp(Ity_I64);
   3643             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   3644             assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
   3645             setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
   3646             DIP("vqsub.%c%d %c%u, %c%u, %c%u\n",
   3647                 U ? 'u' : 's', 8 << size,
   3648                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   3649                 mreg);
   3650          }
   3651          break;
   3652       case 3: {
   3653             IROp op;
   3654             if (Q) {
   3655                switch (size) {
   3656                   case 0: op = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16; break;
   3657                   case 1: op = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8; break;
   3658                   case 2: op = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4; break;
   3659                   case 3: return False;
   3660                   default: vassert(0);
   3661                }
   3662             } else {
   3663                switch (size) {
   3664                   case 0: op = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8; break;
   3665                   case 1: op = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4; break;
   3666                   case 2: op = U ? Iop_CmpGT32Ux2: Iop_CmpGT32Sx2; break;
   3667                   case 3: return False;
   3668                   default: vassert(0);
   3669                }
   3670             }
   3671             if (B == 0) {
   3672                /* VCGT  */
   3673                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   3674                DIP("vcgt.%c%d %c%u, %c%u, %c%u\n",
   3675                    U ? 'u' : 's', 8 << size,
   3676                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   3677                    mreg);
   3678             } else {
   3679                /* VCGE  */
   3680                /* VCGE res, argn, argm
   3681                     is equal to
   3682                   VCGT tmp, argm, argn
   3683                   VNOT res, tmp */
   3684                assign(res,
   3685                       unop(Q ? Iop_NotV128 : Iop_Not64,
   3686                            binop(op, mkexpr(arg_m), mkexpr(arg_n))));
   3687                DIP("vcge.%c%d %c%u, %c%u, %c%u\n",
   3688                    U ? 'u' : 's', 8 << size,
   3689                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   3690                    mreg);
   3691             }
   3692          }
   3693          break;
   3694       case 4:
   3695          if (B == 0) {
   3696             /* VSHL */
   3697             IROp op = Iop_INVALID, sub_op = Iop_INVALID;
   3698             IRTemp tmp = IRTemp_INVALID;
   3699             if (U) {
   3700                switch (size) {
   3701                   case 0: op = Q ? Iop_Shl8x16 : Iop_Shl8x8; break;
   3702                   case 1: op = Q ? Iop_Shl16x8 : Iop_Shl16x4; break;
   3703                   case 2: op = Q ? Iop_Shl32x4 : Iop_Shl32x2; break;
   3704                   case 3: op = Q ? Iop_Shl64x2 : Iop_Shl64; break;
   3705                   default: vassert(0);
   3706                }
   3707             } else {
   3708                tmp = newTemp(Q ? Ity_V128 : Ity_I64);
   3709                switch (size) {
   3710                   case 0:
   3711                      op = Q ? Iop_Sar8x16 : Iop_Sar8x8;
   3712                      sub_op = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   3713                      break;
   3714                   case 1:
   3715                      op = Q ? Iop_Sar16x8 : Iop_Sar16x4;
   3716                      sub_op = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   3717                      break;
   3718                   case 2:
   3719                      op = Q ? Iop_Sar32x4 : Iop_Sar32x2;
   3720                      sub_op = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   3721                      break;
   3722                   case 3:
   3723                      op = Q ? Iop_Sar64x2 : Iop_Sar64;
   3724                      sub_op = Q ? Iop_Sub64x2 : Iop_Sub64;
   3725                      break;
   3726                   default:
   3727                      vassert(0);
   3728                }
   3729             }
   3730             if (U) {
   3731                if (!Q && (size == 3))
   3732                   assign(res, binop(op, mkexpr(arg_m),
   3733                                         unop(Iop_64to8, mkexpr(arg_n))));
   3734                else
   3735                   assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
   3736             } else {
   3737                if (Q)
   3738                   assign(tmp, binop(sub_op,
   3739                                     binop(Iop_64HLtoV128, mkU64(0), mkU64(0)),
   3740                                     mkexpr(arg_n)));
   3741                else
   3742                   assign(tmp, binop(sub_op, mkU64(0), mkexpr(arg_n)));
   3743                if (!Q && (size == 3))
   3744                   assign(res, binop(op, mkexpr(arg_m),
   3745                                         unop(Iop_64to8, mkexpr(tmp))));
   3746                else
   3747                   assign(res, binop(op, mkexpr(arg_m), mkexpr(tmp)));
   3748             }
   3749             DIP("vshl.%c%d %c%u, %c%u, %c%u\n",
   3750                 U ? 'u' : 's', 8 << size,
   3751                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
   3752                 nreg);
   3753          } else {
   3754             /* VQSHL */
   3755             IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt;
   3756             IRTemp tmp, shval, mask, old_shval;
   3757             UInt i;
   3758             ULong esize;
   3759             cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
   3760             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
   3761             if (U) {
   3762                switch (size) {
   3763                   case 0:
   3764                      op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
   3765                      op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
   3766                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3767                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   3768                      break;
   3769                   case 1:
   3770                      op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
   3771                      op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
   3772                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3773                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   3774                      break;
   3775                   case 2:
   3776                      op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
   3777                      op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
   3778                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3779                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   3780                      break;
   3781                   case 3:
   3782                      op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
   3783                      op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
   3784                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   3785                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   3786                      break;
   3787                   default:
   3788                      vassert(0);
   3789                }
   3790             } else {
   3791                switch (size) {
   3792                   case 0:
   3793                      op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
   3794                      op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
   3795                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3796                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   3797                      break;
   3798                   case 1:
   3799                      op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
   3800                      op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
   3801                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3802                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   3803                      break;
   3804                   case 2:
   3805                      op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
   3806                      op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
   3807                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3808                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   3809                      break;
   3810                   case 3:
   3811                      op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
   3812                      op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
   3813                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   3814                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   3815                      break;
   3816                   default:
   3817                      vassert(0);
   3818                }
   3819             }
   3820             if (Q) {
   3821                tmp = newTemp(Ity_V128);
   3822                shval = newTemp(Ity_V128);
   3823                mask = newTemp(Ity_V128);
   3824             } else {
   3825                tmp = newTemp(Ity_I64);
   3826                shval = newTemp(Ity_I64);
   3827                mask = newTemp(Ity_I64);
   3828             }
   3829             assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
   3830             /* Only least significant byte from second argument is used.
   3831                Copy this byte to the whole vector element. */
   3832             assign(shval, binop(op_shrn,
   3833                                 binop(op_shln,
   3834                                        mkexpr(arg_n),
   3835                                        mkU8((8 << size) - 8)),
   3836                                 mkU8((8 << size) - 8)));
   3837             for(i = 0; i < size; i++) {
   3838                old_shval = shval;
   3839                shval = newTemp(Q ? Ity_V128 : Ity_I64);
   3840                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
   3841                                    mkexpr(old_shval),
   3842                                    binop(op_shln,
   3843                                          mkexpr(old_shval),
   3844                                          mkU8(8 << i))));
   3845             }
   3846             /* If shift is greater or equal to the element size and
   3847                element is non-zero, then QC flag should be set. */
   3848             esize = (8 << size) - 1;
   3849             esize = (esize <<  8) | esize;
   3850             esize = (esize << 16) | esize;
   3851             esize = (esize << 32) | esize;
   3852             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   3853                              binop(cmp_gt, mkexpr(shval),
   3854                                            Q ? mkU128(esize) : mkU64(esize)),
   3855                              unop(cmp_neq, mkexpr(arg_m))),
   3856                        Q ? mkU128(0) : mkU64(0),
   3857                        Q, condT);
   3858             /* Othervise QC flag should be set if shift value is positive and
   3859                result beign rightshifted the same value is not equal to left
   3860                argument. */
   3861             assign(mask, binop(cmp_gt, mkexpr(shval),
   3862                                        Q ? mkU128(0) : mkU64(0)));
   3863             if (!Q && size == 3)
   3864                assign(tmp, binop(op_rev, mkexpr(res),
   3865                                          unop(Iop_64to8, mkexpr(arg_n))));
   3866             else
   3867                assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
   3868             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   3869                              mkexpr(tmp), mkexpr(mask)),
   3870                        binop(Q ? Iop_AndV128 : Iop_And64,
   3871                              mkexpr(arg_m), mkexpr(mask)),
   3872                        Q, condT);
   3873             DIP("vqshl.%c%d %c%u, %c%u, %c%u\n",
   3874                 U ? 'u' : 's', 8 << size,
   3875                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
   3876                 nreg);
   3877          }
   3878          break;
   3879       case 5:
   3880          if (B == 0) {
   3881             /* VRSHL */
   3882             IROp op, op_shrn, op_shln, cmp_gt, op_add;
   3883             IRTemp shval, old_shval, imm_val, round;
   3884             UInt i;
   3885             ULong imm;
   3886             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
   3887             imm = 1L;
   3888             switch (size) {
   3889                case 0: imm = (imm <<  8) | imm; /* fall through */
   3890                case 1: imm = (imm << 16) | imm; /* fall through */
   3891                case 2: imm = (imm << 32) | imm; /* fall through */
   3892                case 3: break;
   3893                default: vassert(0);
   3894             }
   3895             imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
   3896             round = newTemp(Q ? Ity_V128 : Ity_I64);
   3897             assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
   3898             if (U) {
   3899                switch (size) {
   3900                   case 0:
   3901                      op = Q ? Iop_Shl8x16 : Iop_Shl8x8;
   3902                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
   3903                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3904                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   3905                      break;
   3906                   case 1:
   3907                      op = Q ? Iop_Shl16x8 : Iop_Shl16x4;
   3908                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
   3909                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3910                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   3911                      break;
   3912                   case 2:
   3913                      op = Q ? Iop_Shl32x4 : Iop_Shl32x2;
   3914                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
   3915                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3916                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   3917                      break;
   3918                   case 3:
   3919                      op = Q ? Iop_Shl64x2 : Iop_Shl64;
   3920                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
   3921                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   3922                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   3923                      break;
   3924                   default:
   3925                      vassert(0);
   3926                }
   3927             } else {
   3928                switch (size) {
   3929                   case 0:
   3930                      op = Q ? Iop_Sal8x16 : Iop_Sal8x8;
   3931                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
   3932                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3933                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   3934                      break;
   3935                   case 1:
   3936                      op = Q ? Iop_Sal16x8 : Iop_Sal16x4;
   3937                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
   3938                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3939                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   3940                      break;
   3941                   case 2:
   3942                      op = Q ? Iop_Sal32x4 : Iop_Sal32x2;
   3943                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
   3944                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3945                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   3946                      break;
   3947                   case 3:
   3948                      op = Q ? Iop_Sal64x2 : Iop_Sal64x1;
   3949                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
   3950                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   3951                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   3952                      break;
   3953                   default:
   3954                      vassert(0);
   3955                }
   3956             }
   3957             if (Q) {
   3958                shval = newTemp(Ity_V128);
   3959             } else {
   3960                shval = newTemp(Ity_I64);
   3961             }
   3962             /* Only least significant byte from second argument is used.
   3963                Copy this byte to the whole vector element. */
   3964             assign(shval, binop(op_shrn,
   3965                                 binop(op_shln,
   3966                                        mkexpr(arg_n),
   3967                                        mkU8((8 << size) - 8)),
   3968                                 mkU8((8 << size) - 8)));
   3969             for (i = 0; i < size; i++) {
   3970                old_shval = shval;
   3971                shval = newTemp(Q ? Ity_V128 : Ity_I64);
   3972                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
   3973                                    mkexpr(old_shval),
   3974                                    binop(op_shln,
   3975                                          mkexpr(old_shval),
   3976                                          mkU8(8 << i))));
   3977             }
   3978             /* Compute the result */
   3979             if (!Q && size == 3 && U) {
   3980                assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
   3981                                    binop(op,
   3982                                          mkexpr(arg_m),
   3983                                          unop(Iop_64to8,
   3984                                               binop(op_add,
   3985                                                     mkexpr(arg_n),
   3986                                                     mkexpr(imm_val)))),
   3987                                    binop(Q ? Iop_AndV128 : Iop_And64,
   3988                                          mkexpr(imm_val),
   3989                                          binop(cmp_gt,
   3990                                                Q ? mkU128(0) : mkU64(0),
   3991                                                mkexpr(arg_n)))));
   3992                assign(res, binop(op_add,
   3993                                  binop(op,
   3994                                        mkexpr(arg_m),
   3995                                        unop(Iop_64to8, mkexpr(arg_n))),
   3996                                  mkexpr(round)));
   3997             } else {
   3998                assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
   3999                                    binop(op,
   4000                                          mkexpr(arg_m),
   4001                                          binop(op_add,
   4002                                                mkexpr(arg_n),
   4003                                                mkexpr(imm_val))),
   4004                                    binop(Q ? Iop_AndV128 : Iop_And64,
   4005                                          mkexpr(imm_val),
   4006                                          binop(cmp_gt,
   4007                                                Q ? mkU128(0) : mkU64(0),
   4008                                                mkexpr(arg_n)))));
   4009                assign(res, binop(op_add,
   4010                                  binop(op, mkexpr(arg_m), mkexpr(arg_n)),
   4011                                  mkexpr(round)));
   4012             }
   4013             DIP("vrshl.%c%d %c%u, %c%u, %c%u\n",
   4014                 U ? 'u' : 's', 8 << size,
   4015                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
   4016                 nreg);
   4017          } else {
   4018             /* VQRSHL */
   4019             IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt, op_add;
   4020             IRTemp tmp, shval, mask, old_shval, imm_val, round;
   4021             UInt i;
   4022             ULong esize, imm;
   4023             cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
   4024             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
   4025             imm = 1L;
   4026             switch (size) {
   4027                case 0: imm = (imm <<  8) | imm; /* fall through */
   4028                case 1: imm = (imm << 16) | imm; /* fall through */
   4029                case 2: imm = (imm << 32) | imm; /* fall through */
   4030                case 3: break;
   4031                default: vassert(0);
   4032             }
   4033             imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
   4034             round = newTemp(Q ? Ity_V128 : Ity_I64);
   4035             assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
   4036             if (U) {
   4037                switch (size) {
   4038                   case 0:
   4039                      op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
   4040                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
   4041                      op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
   4042                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   4043                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   4044                      break;
   4045                   case 1:
   4046                      op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
   4047                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
   4048                      op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
   4049                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   4050                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   4051                      break;
   4052                   case 2:
   4053                      op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
   4054                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
   4055                      op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
   4056                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   4057                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   4058                      break;
   4059                   case 3:
   4060                      op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
   4061                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
   4062                      op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
   4063                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   4064                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   4065                      break;
   4066                   default:
   4067                      vassert(0);
   4068                }
   4069             } else {
   4070                switch (size) {
   4071                   case 0:
   4072                      op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
   4073                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
   4074                      op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
   4075                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   4076                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   4077                      break;
   4078                   case 1:
   4079                      op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
   4080                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
   4081                      op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
   4082                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   4083                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   4084                      break;
   4085                   case 2:
   4086                      op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
   4087                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
   4088                      op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
   4089                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   4090                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   4091                      break;
   4092                   case 3:
   4093                      op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
   4094                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
   4095                      op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
   4096                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   4097                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   4098                      break;
   4099                   default:
   4100                      vassert(0);
   4101                }
   4102             }
   4103             if (Q) {
   4104                tmp = newTemp(Ity_V128);
   4105                shval = newTemp(Ity_V128);
   4106                mask = newTemp(Ity_V128);
   4107             } else {
   4108                tmp = newTemp(Ity_I64);
   4109                shval = newTemp(Ity_I64);
   4110                mask = newTemp(Ity_I64);
   4111             }
   4112             /* Only least significant byte from second argument is used.
   4113                Copy this byte to the whole vector element. */
   4114             assign(shval, binop(op_shrn,
   4115                                 binop(op_shln,
   4116                                        mkexpr(arg_n),
   4117                                        mkU8((8 << size) - 8)),
   4118                                 mkU8((8 << size) - 8)));
   4119             for (i = 0; i < size; i++) {
   4120                old_shval = shval;
   4121                shval = newTemp(Q ? Ity_V128 : Ity_I64);
   4122                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
   4123                                    mkexpr(old_shval),
   4124                                    binop(op_shln,
   4125                                          mkexpr(old_shval),
   4126                                          mkU8(8 << i))));
   4127             }
   4128             /* Compute the result */
   4129             assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
   4130                                 binop(op,
   4131                                       mkexpr(arg_m),
   4132                                       binop(op_add,
   4133                                             mkexpr(arg_n),
   4134                                             mkexpr(imm_val))),
   4135                                 binop(Q ? Iop_AndV128 : Iop_And64,
   4136                                       mkexpr(imm_val),
   4137                                       binop(cmp_gt,
   4138                                             Q ? mkU128(0) : mkU64(0),
   4139                                             mkexpr(arg_n)))));
   4140             assign(res, binop(op_add,
   4141                               binop(op, mkexpr(arg_m), mkexpr(arg_n)),
   4142                               mkexpr(round)));
   4143             /* If shift is greater or equal to the element size and element is
   4144                non-zero, then QC flag should be set. */
   4145             esize = (8 << size) - 1;
   4146             esize = (esize <<  8) | esize;
   4147             esize = (esize << 16) | esize;
   4148             esize = (esize << 32) | esize;
   4149             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   4150                              binop(cmp_gt, mkexpr(shval),
   4151                                            Q ? mkU128(esize) : mkU64(esize)),
   4152                              unop(cmp_neq, mkexpr(arg_m))),
   4153                        Q ? mkU128(0) : mkU64(0),
   4154                        Q, condT);
   4155             /* Othervise QC flag should be set if shift value is positive and
   4156                result beign rightshifted the same value is not equal to left
   4157                argument. */
   4158             assign(mask, binop(cmp_gt, mkexpr(shval),
   4159                                Q ? mkU128(0) : mkU64(0)));
   4160             if (!Q && size == 3)
   4161                assign(tmp, binop(op_rev, mkexpr(res),
   4162                                          unop(Iop_64to8, mkexpr(arg_n))));
   4163             else
   4164                assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
   4165             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   4166                              mkexpr(tmp), mkexpr(mask)),
   4167                        binop(Q ? Iop_AndV128 : Iop_And64,
   4168                              mkexpr(arg_m), mkexpr(mask)),
   4169                        Q, condT);
   4170             DIP("vqrshl.%c%d %c%u, %c%u, %c%u\n",
   4171                 U ? 'u' : 's', 8 << size,
   4172                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
   4173                 nreg);
   4174          }
   4175          break;
   4176       case 6:
   4177          /* VMAX, VMIN  */
   4178          if (B == 0) {
   4179             /* VMAX */
   4180             IROp op;
   4181             if (U == 0) {
   4182                switch (size) {
   4183                   case 0: op = Q ? Iop_Max8Sx16 : Iop_Max8Sx8; break;
   4184                   case 1: op = Q ? Iop_Max16Sx8 : Iop_Max16Sx4; break;
   4185                   case 2: op = Q ? Iop_Max32Sx4 : Iop_Max32Sx2; break;
   4186                   case 3: return False;
   4187                   default: vassert(0);
   4188                }
   4189             } else {
   4190                switch (size) {
   4191                   case 0: op = Q ? Iop_Max8Ux16 : Iop_Max8Ux8; break;
   4192                   case 1: op = Q ? Iop_Max16Ux8 : Iop_Max16Ux4; break;
   4193                   case 2: op = Q ? Iop_Max32Ux4 : Iop_Max32Ux2; break;
   4194                   case 3: return False;
   4195                   default: vassert(0);
   4196                }
   4197             }
   4198             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4199             DIP("vmax.%c%d %c%u, %c%u, %c%u\n",
   4200                 U ? 'u' : 's', 8 << size,
   4201                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4202                 mreg);
   4203          } else {
   4204             /* VMIN */
   4205             IROp op;
   4206             if (U == 0) {
   4207                switch (size) {
   4208                   case 0: op = Q ? Iop_Min8Sx16 : Iop_Min8Sx8; break;
   4209                   case 1: op = Q ? Iop_Min16Sx8 : Iop_Min16Sx4; break;
   4210                   case 2: op = Q ? Iop_Min32Sx4 : Iop_Min32Sx2; break;
   4211                   case 3: return False;
   4212                   default: vassert(0);
   4213                }
   4214             } else {
   4215                switch (size) {
   4216                   case 0: op = Q ? Iop_Min8Ux16 : Iop_Min8Ux8; break;
   4217                   case 1: op = Q ? Iop_Min16Ux8 : Iop_Min16Ux4; break;
   4218                   case 2: op = Q ? Iop_Min32Ux4 : Iop_Min32Ux2; break;
   4219                   case 3: return False;
   4220                   default: vassert(0);
   4221                }
   4222             }
   4223             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4224             DIP("vmin.%c%d %c%u, %c%u, %c%u\n",
   4225                 U ? 'u' : 's', 8 << size,
   4226                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4227                 mreg);
   4228          }
   4229          break;
   4230       case 7:
   4231          if (B == 0) {
   4232             /* VABD */
   4233             IROp op_cmp, op_sub;
   4234             IRTemp cond;
   4235             if ((theInstr >> 23) & 1) {
   4236                vpanic("VABDL should not be in dis_neon_data_3same\n");
   4237             }
   4238             if (Q) {
   4239                switch (size) {
   4240                   case 0:
   4241                      op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
   4242                      op_sub = Iop_Sub8x16;
   4243                      break;
   4244                   case 1:
   4245                      op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
   4246                      op_sub = Iop_Sub16x8;
   4247                      break;
   4248                   case 2:
   4249                      op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
   4250                      op_sub = Iop_Sub32x4;
   4251                      break;
   4252                   case 3:
   4253                      return False;
   4254                   default:
   4255                      vassert(0);
   4256                }
   4257             } else {
   4258                switch (size) {
   4259                   case 0:
   4260                      op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
   4261                      op_sub = Iop_Sub8x8;
   4262                      break;
   4263                   case 1:
   4264                      op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
   4265                      op_sub = Iop_Sub16x4;
   4266                      break;
   4267                   case 2:
   4268                      op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
   4269                      op_sub = Iop_Sub32x2;
   4270                      break;
   4271                   case 3:
   4272                      return False;
   4273                   default:
   4274                      vassert(0);
   4275                }
   4276             }
   4277             if (Q) {
   4278                cond = newTemp(Ity_V128);
   4279             } else {
   4280                cond = newTemp(Ity_I64);
   4281             }
   4282             assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
   4283             assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
   4284                               binop(Q ? Iop_AndV128 : Iop_And64,
   4285                                     binop(op_sub, mkexpr(arg_n),
   4286                                                   mkexpr(arg_m)),
   4287                                     mkexpr(cond)),
   4288                               binop(Q ? Iop_AndV128 : Iop_And64,
   4289                                     binop(op_sub, mkexpr(arg_m),
   4290                                                   mkexpr(arg_n)),
   4291                                     unop(Q ? Iop_NotV128 : Iop_Not64,
   4292                                          mkexpr(cond)))));
   4293             DIP("vabd.%c%d %c%u, %c%u, %c%u\n",
   4294                 U ? 'u' : 's', 8 << size,
   4295                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4296                 mreg);
   4297          } else {
   4298             /* VABA */
   4299             IROp op_cmp, op_sub, op_add;
   4300             IRTemp cond, acc, tmp;
   4301             if ((theInstr >> 23) & 1) {
   4302                vpanic("VABAL should not be in dis_neon_data_3same");
   4303             }
   4304             if (Q) {
   4305                switch (size) {
   4306                   case 0:
   4307                      op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
   4308                      op_sub = Iop_Sub8x16;
   4309                      op_add = Iop_Add8x16;
   4310                      break;
   4311                   case 1:
   4312                      op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
   4313                      op_sub = Iop_Sub16x8;
   4314                      op_add = Iop_Add16x8;
   4315                      break;
   4316                   case 2:
   4317                      op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
   4318                      op_sub = Iop_Sub32x4;
   4319                      op_add = Iop_Add32x4;
   4320                      break;
   4321                   case 3:
   4322                      return False;
   4323                   default:
   4324                      vassert(0);
   4325                }
   4326             } else {
   4327                switch (size) {
   4328                   case 0:
   4329                      op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
   4330                      op_sub = Iop_Sub8x8;
   4331                      op_add = Iop_Add8x8;
   4332                      break;
   4333                   case 1:
   4334                      op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
   4335                      op_sub = Iop_Sub16x4;
   4336                      op_add = Iop_Add16x4;
   4337                      break;
   4338                   case 2:
   4339                      op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
   4340                      op_sub = Iop_Sub32x2;
   4341                      op_add = Iop_Add32x2;
   4342                      break;
   4343                   case 3:
   4344                      return False;
   4345                   default:
   4346                      vassert(0);
   4347                }
   4348             }
   4349             if (Q) {
   4350                cond = newTemp(Ity_V128);
   4351                acc = newTemp(Ity_V128);
   4352                tmp = newTemp(Ity_V128);
   4353                assign(acc, getQReg(dreg));
   4354             } else {
   4355                cond = newTemp(Ity_I64);
   4356                acc = newTemp(Ity_I64);
   4357                tmp = newTemp(Ity_I64);
   4358                assign(acc, getDRegI64(dreg));
   4359             }
   4360             assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
   4361             assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
   4362                               binop(Q ? Iop_AndV128 : Iop_And64,
   4363                                     binop(op_sub, mkexpr(arg_n),
   4364                                                   mkexpr(arg_m)),
   4365                                     mkexpr(cond)),
   4366                               binop(Q ? Iop_AndV128 : Iop_And64,
   4367                                     binop(op_sub, mkexpr(arg_m),
   4368                                                   mkexpr(arg_n)),
   4369                                     unop(Q ? Iop_NotV128 : Iop_Not64,
   4370                                          mkexpr(cond)))));
   4371             assign(res, binop(op_add, mkexpr(acc), mkexpr(tmp)));
   4372             DIP("vaba.%c%d %c%u, %c%u, %c%u\n",
   4373                 U ? 'u' : 's', 8 << size,
   4374                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4375                 mreg);
   4376          }
   4377          break;
   4378       case 8:
   4379          if (B == 0) {
   4380             IROp op;
   4381             if (U == 0) {
   4382                /* VADD  */
   4383                switch (size) {
   4384                   case 0: op = Q ? Iop_Add8x16 : Iop_Add8x8; break;
   4385                   case 1: op = Q ? Iop_Add16x8 : Iop_Add16x4; break;
   4386                   case 2: op = Q ? Iop_Add32x4 : Iop_Add32x2; break;
   4387                   case 3: op = Q ? Iop_Add64x2 : Iop_Add64; break;
   4388                   default: vassert(0);
   4389                }
   4390                DIP("vadd.i%d %c%u, %c%u, %c%u\n",
   4391                    8 << size, Q ? 'q' : 'd',
   4392                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4393             } else {
   4394                /* VSUB  */
   4395                switch (size) {
   4396                   case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
   4397                   case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
   4398                   case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
   4399                   case 3: op = Q ? Iop_Sub64x2 : Iop_Sub64; break;
   4400                   default: vassert(0);
   4401                }
   4402                DIP("vsub.i%d %c%u, %c%u, %c%u\n",
   4403                    8 << size, Q ? 'q' : 'd',
   4404                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4405             }
   4406             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4407          } else {
   4408             IROp op;
   4409             switch (size) {
   4410                case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
   4411                case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
   4412                case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
   4413                case 3: op = Q ? Iop_CmpNEZ64x2 : Iop_CmpwNEZ64; break;
   4414                default: vassert(0);
   4415             }
   4416             if (U == 0) {
   4417                /* VTST  */
   4418                assign(res, unop(op, binop(Q ? Iop_AndV128 : Iop_And64,
   4419                                           mkexpr(arg_n),
   4420                                           mkexpr(arg_m))));
   4421                DIP("vtst.%d %c%u, %c%u, %c%u\n",
   4422                    8 << size, Q ? 'q' : 'd',
   4423                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4424             } else {
   4425                /* VCEQ  */
   4426                assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
   4427                                 unop(op,
   4428                                      binop(Q ? Iop_XorV128 : Iop_Xor64,
   4429                                            mkexpr(arg_n),
   4430                                            mkexpr(arg_m)))));
   4431                DIP("vceq.i%d %c%u, %c%u, %c%u\n",
   4432                    8 << size, Q ? 'q' : 'd',
   4433                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4434             }
   4435          }
   4436          break;
   4437       case 9:
   4438          if (B == 0) {
   4439             /* VMLA, VMLS (integer) */
   4440             IROp op, op2;
   4441             UInt P = (theInstr >> 24) & 1;
   4442             if (P) {
   4443                switch (size) {
   4444                   case 0:
   4445                      op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
   4446                      op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   4447                      break;
   4448                   case 1:
   4449                      op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
   4450                      op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   4451                      break;
   4452                   case 2:
   4453                      op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
   4454                      op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   4455                      break;
   4456                   case 3:
   4457                      return False;
   4458                   default:
   4459                      vassert(0);
   4460                }
   4461             } else {
   4462                switch (size) {
   4463                   case 0:
   4464                      op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
   4465                      op2 = Q ? Iop_Add8x16 : Iop_Add8x8;
   4466                      break;
   4467                   case 1:
   4468                      op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
   4469                      op2 = Q ? Iop_Add16x8 : Iop_Add16x4;
   4470                      break;
   4471                   case 2:
   4472                      op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
   4473                      op2 = Q ? Iop_Add32x4 : Iop_Add32x2;
   4474                      break;
   4475                   case 3:
   4476                      return False;
   4477                   default:
   4478                      vassert(0);
   4479                }
   4480             }
   4481             assign(res, binop(op2,
   4482                               Q ? getQReg(dreg) : getDRegI64(dreg),
   4483                               binop(op, mkexpr(arg_n), mkexpr(arg_m))));
   4484             DIP("vml%c.i%d %c%u, %c%u, %c%u\n",
   4485                 P ? 's' : 'a', 8 << size,
   4486                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4487                 mreg);
   4488          } else {
   4489             /* VMUL */
   4490             IROp op;
   4491             UInt P = (theInstr >> 24) & 1;
   4492             if (P) {
   4493                switch (size) {
   4494                   case 0:
   4495                      op = Q ? Iop_PolynomialMul8x16 : Iop_PolynomialMul8x8;
   4496                      break;
   4497                   case 1: case 2: case 3: return False;
   4498                   default: vassert(0);
   4499                }
   4500             } else {
   4501                switch (size) {
   4502                   case 0: op = Q ? Iop_Mul8x16 : Iop_Mul8x8; break;
   4503                   case 1: op = Q ? Iop_Mul16x8 : Iop_Mul16x4; break;
   4504                   case 2: op = Q ? Iop_Mul32x4 : Iop_Mul32x2; break;
   4505                   case 3: return False;
   4506                   default: vassert(0);
   4507                }
   4508             }
   4509             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4510             DIP("vmul.%c%d %c%u, %c%u, %c%u\n",
   4511                 P ? 'p' : 'i', 8 << size,
   4512                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4513                 mreg);
   4514          }
   4515          break;
   4516       case 10: {
   4517          /* VPMAX, VPMIN  */
   4518          UInt P = (theInstr >> 4) & 1;
   4519          IROp op;
   4520          if (Q)
   4521             return False;
   4522          if (P) {
   4523             switch (size) {
   4524                case 0: op = U ? Iop_PwMin8Ux8  : Iop_PwMin8Sx8; break;
   4525                case 1: op = U ? Iop_PwMin16Ux4 : Iop_PwMin16Sx4; break;
   4526                case 2: op = U ? Iop_PwMin32Ux2 : Iop_PwMin32Sx2; break;
   4527                case 3: return False;
   4528                default: vassert(0);
   4529             }
   4530          } else {
   4531             switch (size) {
   4532                case 0: op = U ? Iop_PwMax8Ux8  : Iop_PwMax8Sx8; break;
   4533                case 1: op = U ? Iop_PwMax16Ux4 : Iop_PwMax16Sx4; break;
   4534                case 2: op = U ? Iop_PwMax32Ux2 : Iop_PwMax32Sx2; break;
   4535                case 3: return False;
   4536                default: vassert(0);
   4537             }
   4538          }
   4539          assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4540          DIP("vp%s.%c%d %c%u, %c%u, %c%u\n",
   4541              P ? "min" : "max", U ? 'u' : 's',
   4542              8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
   4543              Q ? 'q' : 'd', mreg);
   4544          break;
   4545       }
   4546       case 11:
   4547          if (B == 0) {
   4548             if (U == 0) {
   4549                /* VQDMULH  */
   4550                IROp op ,op2;
   4551                ULong imm;
   4552                switch (size) {
   4553                   case 0: case 3:
   4554                      return False;
   4555                   case 1:
   4556                      op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
   4557                      op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
   4558                      imm = 1LL << 15;
   4559                      imm = (imm << 16) | imm;
   4560                      imm = (imm << 32) | imm;
   4561                      break;
   4562                   case 2:
   4563                      op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
   4564                      op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
   4565                      imm = 1LL << 31;
   4566                      imm = (imm << 32) | imm;
   4567                      break;
   4568                   default:
   4569                      vassert(0);
   4570                }
   4571                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4572                setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   4573                                 binop(op2, mkexpr(arg_n),
   4574                                            Q ? mkU128(imm) : mkU64(imm)),
   4575                                 binop(op2, mkexpr(arg_m),
   4576                                            Q ? mkU128(imm) : mkU64(imm))),
   4577                           Q ? mkU128(0) : mkU64(0),
   4578                           Q, condT);
   4579                DIP("vqdmulh.s%d %c%u, %c%u, %c%u\n",
   4580                    8 << size, Q ? 'q' : 'd',
   4581                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4582             } else {
   4583                /* VQRDMULH */
   4584                IROp op ,op2;
   4585                ULong imm;
   4586                switch(size) {
   4587                   case 0: case 3:
   4588                      return False;
   4589                   case 1:
   4590                      imm = 1LL << 15;
   4591                      imm = (imm << 16) | imm;
   4592                      imm = (imm << 32) | imm;
   4593                      op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
   4594                      op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
   4595                      break;
   4596                   case 2:
   4597                      imm = 1LL << 31;
   4598                      imm = (imm << 32) | imm;
   4599                      op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
   4600                      op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
   4601                      break;
   4602                   default:
   4603                      vassert(0);
   4604                }
   4605                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4606                setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   4607                                 binop(op2, mkexpr(arg_n),
   4608                                            Q ? mkU128(imm) : mkU64(imm)),
   4609                                 binop(op2, mkexpr(arg_m),
   4610                                            Q ? mkU128(imm) : mkU64(imm))),
   4611                           Q ? mkU128(0) : mkU64(0),
   4612                           Q, condT);
   4613                DIP("vqrdmulh.s%d %c%u, %c%u, %c%u\n",
   4614                    8 << size, Q ? 'q' : 'd',
   4615                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4616             }
   4617          } else {
   4618             if (U == 0) {
   4619                /* VPADD */
   4620                IROp op;
   4621                if (Q)
   4622                   return False;
   4623                switch (size) {
   4624                   case 0: op = Q ? Iop_PwAdd8x16 : Iop_PwAdd8x8;  break;
   4625                   case 1: op = Q ? Iop_PwAdd16x8 : Iop_PwAdd16x4; break;
   4626                   case 2: op = Q ? Iop_PwAdd32x4 : Iop_PwAdd32x2; break;
   4627                   case 3: return False;
   4628                   default: vassert(0);
   4629                }
   4630                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4631                DIP("vpadd.i%d %c%u, %c%u, %c%u\n",
   4632                    8 << size, Q ? 'q' : 'd',
   4633                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4634             } else {
   4635                return False;
   4636             }
   4637          }
   4638          break;
   4639       case 12: {
   4640          return False;
   4641       }
   4642       /* Starting from here these are FP SIMD cases */
   4643       case 13:
   4644          if (B == 0) {
   4645             IROp op;
   4646             if (U == 0) {
   4647                if ((C >> 1) == 0) {
   4648                   /* VADD  */
   4649                   op = Q ? Iop_Add32Fx4 : Iop_Add32Fx2 ;
   4650                   DIP("vadd.f32 %c%u, %c%u, %c%u\n",
   4651                       Q ? 'q' : 'd', dreg,
   4652                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4653                } else {
   4654                   /* VSUB  */
   4655                   op = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2 ;
   4656                   DIP("vsub.f32 %c%u, %c%u, %c%u\n",
   4657                       Q ? 'q' : 'd', dreg,
   4658                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4659                }
   4660             } else {
   4661                if ((C >> 1) == 0) {
   4662                   /* VPADD */
   4663                   if (Q)
   4664                      return False;
   4665                   op = Iop_PwAdd32Fx2;
   4666                   DIP("vpadd.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
   4667                } else {
   4668                   /* VABD  */
   4669                   if (Q) {
   4670                      assign(res, unop(Iop_Abs32Fx4,
   4671                                       triop(Iop_Sub32Fx4,
   4672                                             get_FAKE_roundingmode(),
   4673                                             mkexpr(arg_n),
   4674                                             mkexpr(arg_m))));
   4675                   } else {
   4676                      assign(res, unop(Iop_Abs32Fx2,
   4677                                       binop(Iop_Sub32Fx2,
   4678                                             mkexpr(arg_n),
   4679                                             mkexpr(arg_m))));
   4680                   }
   4681                   DIP("vabd.f32 %c%u, %c%u, %c%u\n",
   4682                       Q ? 'q' : 'd', dreg,
   4683                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4684                   break;
   4685                }
   4686             }
   4687             assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
   4688          } else {
   4689             if (U == 0) {
   4690                /* VMLA, VMLS  */
   4691                IROp op, op2;
   4692                UInt P = (theInstr >> 21) & 1;
   4693                if (P) {
   4694                   switch (size & 1) {
   4695                      case 0:
   4696                         op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
   4697                         op2 = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
   4698                         break;
   4699                      case 1: return False;
   4700                      default: vassert(0);
   4701                   }
   4702                } else {
   4703                   switch (size & 1) {
   4704                      case 0:
   4705                         op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
   4706                         op2 = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
   4707                         break;
   4708                      case 1: return False;
   4709                      default: vassert(0);
   4710                   }
   4711                }
   4712                assign(res, binop_w_fake_RM(
   4713                               op2,
   4714                               Q ? getQReg(dreg) : getDRegI64(dreg),
   4715                               binop_w_fake_RM(op, mkexpr(arg_n),
   4716                                                   mkexpr(arg_m))));
   4717 
   4718                DIP("vml%c.f32 %c%u, %c%u, %c%u\n",
   4719                    P ? 's' : 'a', Q ? 'q' : 'd',
   4720                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4721             } else {
   4722                /* VMUL  */
   4723                IROp op;
   4724                if ((C >> 1) != 0)
   4725                   return False;
   4726                op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2 ;
   4727                assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
   4728                DIP("vmul.f32 %c%u, %c%u, %c%u\n",
   4729                    Q ? 'q' : 'd', dreg,
   4730                    Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4731             }
   4732          }
   4733          break;
   4734       case 14:
   4735          if (B == 0) {
   4736             if (U == 0) {
   4737                if ((C >> 1) == 0) {
   4738                   /* VCEQ  */
   4739                   IROp op;
   4740                   if ((theInstr >> 20) & 1)
   4741                      return False;
   4742                   op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2;
   4743                   assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4744                   DIP("vceq.f32 %c%u, %c%u, %c%u\n",
   4745                       Q ? 'q' : 'd', dreg,
   4746                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4747                } else {
   4748                   return False;
   4749                }
   4750             } else {
   4751                if ((C >> 1) == 0) {
   4752                   /* VCGE  */
   4753                   IROp op;
   4754                   if ((theInstr >> 20) & 1)
   4755                      return False;
   4756                   op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
   4757                   assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4758                   DIP("vcge.f32 %c%u, %c%u, %c%u\n",
   4759                       Q ? 'q' : 'd', dreg,
   4760                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4761                } else {
   4762                   /* VCGT  */
   4763                   IROp op;
   4764                   if ((theInstr >> 20) & 1)
   4765                      return False;
   4766                   op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
   4767                   assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4768                   DIP("vcgt.f32 %c%u, %c%u, %c%u\n",
   4769                       Q ? 'q' : 'd', dreg,
   4770                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4771                }
   4772             }
   4773          } else {
   4774             if (U == 1) {
   4775                /* VACGE, VACGT */
   4776                UInt op_bit = (theInstr >> 21) & 1;
   4777                IROp op, op2;
   4778                op2 = Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2;
   4779                if (op_bit) {
   4780                   op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
   4781                   assign(res, binop(op,
   4782                                     unop(op2, mkexpr(arg_n)),
   4783                                     unop(op2, mkexpr(arg_m))));
   4784                } else {
   4785                   op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
   4786                   assign(res, binop(op,
   4787                                     unop(op2, mkexpr(arg_n)),
   4788                                     unop(op2, mkexpr(arg_m))));
   4789                }
   4790                DIP("vacg%c.f32 %c%u, %c%u, %c%u\n", op_bit ? 't' : 'e',
   4791                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
   4792                    Q ? 'q' : 'd', mreg);
   4793             } else {
   4794                return False;
   4795             }
   4796          }
   4797          break;
   4798       case 15:
   4799          if (B == 0) {
   4800             if (U == 0) {
   4801                /* VMAX, VMIN  */
   4802                IROp op;
   4803                if ((theInstr >> 20) & 1)
   4804                   return False;
   4805                if ((theInstr >> 21) & 1) {
   4806                   op = Q ? Iop_Min32Fx4 : Iop_Min32Fx2;
   4807                   DIP("vmin.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
   4808                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4809                } else {
   4810                   op = Q ? Iop_Max32Fx4 : Iop_Max32Fx2;
   4811                   DIP("vmax.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
   4812                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4813                }
   4814                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4815             } else {
   4816                /* VPMAX, VPMIN   */
   4817                IROp op;
   4818                if (Q)
   4819                   return False;
   4820                if ((theInstr >> 20) & 1)
   4821                   return False;
   4822                if ((theInstr >> 21) & 1) {
   4823                   op = Iop_PwMin32Fx2;
   4824                   DIP("vpmin.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
   4825                } else {
   4826                   op = Iop_PwMax32Fx2;
   4827                   DIP("vpmax.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
   4828                }
   4829                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4830             }
   4831          } else {
   4832             if (U == 0) {
   4833                if ((C >> 1) == 0) {
   4834                   /* VRECPS */
   4835                   if ((theInstr >> 20) & 1)
   4836                      return False;
   4837                   assign(res, binop(Q ? Iop_RecipStep32Fx4
   4838                                       : Iop_RecipStep32Fx2,
   4839                                     mkexpr(arg_n),
   4840                                     mkexpr(arg_m)));
   4841                   DIP("vrecps.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
   4842                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4843                } else {
   4844                   /* VRSQRTS  */
   4845                   if ((theInstr >> 20) & 1)
   4846                      return False;
   4847                   assign(res, binop(Q ? Iop_RSqrtStep32Fx4
   4848                                       : Iop_RSqrtStep32Fx2,
   4849                                     mkexpr(arg_n),
   4850                                     mkexpr(arg_m)));
   4851                   DIP("vrsqrts.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
   4852                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4853                }
   4854             } else {
   4855                return False;
   4856             }
   4857          }
   4858          break;
   4859       default:
   4860          /*NOTREACHED*/
   4861          vassert(0);
   4862    }
   4863 
   4864    if (Q) {
   4865       putQReg(dreg, mkexpr(res), condT);
   4866    } else {
   4867       putDRegI64(dreg, mkexpr(res), condT);
   4868    }
   4869 
   4870    return True;
   4871 }
   4872 
   4873 /* A7.4.2 Three registers of different length */
   4874 static
   4875 Bool dis_neon_data_3diff ( UInt theInstr, IRTemp condT )
   4876 {
   4877    /* In paths where this returns False, indicating a non-decodable
   4878       instruction, there may still be some IR assignments to temporaries
   4879       generated.  This is inconvenient but harmless, and the post-front-end
   4880       IR optimisation pass will just remove them anyway.  So there's no
   4881       effort made here to tidy it up.
   4882    */
   4883    UInt A = (theInstr >> 8) & 0xf;
   4884    UInt B = (theInstr >> 20) & 3;
   4885    UInt U = (theInstr >> 24) & 1;
   4886    UInt P = (theInstr >> 9) & 1;
   4887    UInt mreg = get_neon_m_regno(theInstr);
   4888    UInt nreg = get_neon_n_regno(theInstr);
   4889    UInt dreg = get_neon_d_regno(theInstr);
   4890    UInt size = B;
   4891    ULong imm;
   4892    IRTemp res, arg_m, arg_n, cond, tmp;
   4893    IROp cvt, cvt2, cmp, op, op2, sh, add;
   4894    switch (A) {
   4895       case 0: case 1: case 2: case 3:
   4896          /* VADDL, VADDW, VSUBL, VSUBW */
   4897          if (dreg & 1)
   4898             return False;
   4899          dreg >>= 1;
   4900          size = B;
   4901          switch (size) {
   4902             case 0:
   4903                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
   4904                op = (A & 2) ? Iop_Sub16x8 : Iop_Add16x8;
   4905                break;
   4906             case 1:
   4907                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
   4908                op = (A & 2) ? Iop_Sub32x4 : Iop_Add32x4;
   4909                break;
   4910             case 2:
   4911                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
   4912                op = (A & 2) ? Iop_Sub64x2 : Iop_Add64x2;
   4913                break;
   4914             case 3:
   4915                return False;
   4916             default:
   4917                vassert(0);
   4918          }
   4919          arg_n = newTemp(Ity_V128);
   4920          arg_m = newTemp(Ity_V128);
   4921          if (A & 1) {
   4922             if (nreg & 1)
   4923                return False;
   4924             nreg >>= 1;
   4925             assign(arg_n, getQReg(nreg));
   4926          } else {
   4927             assign(arg_n, unop(cvt, getDRegI64(nreg)));
   4928          }
   4929          assign(arg_m, unop(cvt, getDRegI64(mreg)));
   4930          putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
   4931                        condT);
   4932          DIP("v%s%c.%c%d q%u, %c%u, d%u\n", (A & 2) ? "sub" : "add",
   4933              (A & 1) ? 'w' : 'l', U ? 'u' : 's', 8 << size, dreg,
   4934              (A & 1) ? 'q' : 'd', nreg, mreg);
   4935          return True;
   4936       case 4:
   4937          /* VADDHN, VRADDHN */
   4938          if (mreg & 1)
   4939             return False;
   4940          mreg >>= 1;
   4941          if (nreg & 1)
   4942             return False;
   4943          nreg >>= 1;
   4944          size = B;
   4945          switch (size) {
   4946             case 0:
   4947                op = Iop_Add16x8;
   4948                cvt = Iop_NarrowUn16to8x8;
   4949                sh = Iop_ShrN16x8;
   4950                imm = 1U << 7;
   4951                imm = (imm << 16) | imm;
   4952                imm = (imm << 32) | imm;
   4953                break;
   4954             case 1:
   4955                op = Iop_Add32x4;
   4956                cvt = Iop_NarrowUn32to16x4;
   4957                sh = Iop_ShrN32x4;
   4958                imm = 1U << 15;
   4959                imm = (imm << 32) | imm;
   4960                break;
   4961             case 2:
   4962                op = Iop_Add64x2;
   4963                cvt = Iop_NarrowUn64to32x2;
   4964                sh = Iop_ShrN64x2;
   4965                imm = 1U << 31;
   4966                break;
   4967             case 3:
   4968                return False;
   4969             default:
   4970                vassert(0);
   4971          }
   4972          tmp = newTemp(Ity_V128);
   4973          res = newTemp(Ity_V128);
   4974          assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
   4975          if (U) {
   4976             /* VRADDHN */
   4977             assign(res, binop(op, mkexpr(tmp),
   4978                      binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
   4979          } else {
   4980             assign(res, mkexpr(tmp));
   4981          }
   4982          putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
   4983                     condT);
   4984          DIP("v%saddhn.i%d d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
   4985              nreg, mreg);
   4986          return True;
   4987       case 5:
   4988          /* VABAL */
   4989          if (!((theInstr >> 23) & 1)) {
   4990             vpanic("VABA should not be in dis_neon_data_3diff\n");
   4991          }
   4992          if (dreg & 1)
   4993             return False;
   4994          dreg >>= 1;
   4995          switch (size) {
   4996             case 0:
   4997                cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
   4998                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
   4999                cvt2 = Iop_Widen8Sto16x8;
   5000                op = Iop_Sub16x8;
   5001                op2 = Iop_Add16x8;
   5002                break;
   5003             case 1:
   5004                cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
   5005                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
   5006                cvt2 = Iop_Widen16Sto32x4;
   5007                op = Iop_Sub32x4;
   5008                op2 = Iop_Add32x4;
   5009                break;
   5010             case 2:
   5011                cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
   5012                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
   5013                cvt2 = Iop_Widen32Sto64x2;
   5014                op = Iop_Sub64x2;
   5015                op2 = Iop_Add64x2;
   5016                break;
   5017             case 3:
   5018                return False;
   5019             default:
   5020                vassert(0);
   5021          }
   5022          arg_n = newTemp(Ity_V128);
   5023          arg_m = newTemp(Ity_V128);
   5024          cond = newTemp(Ity_V128);
   5025          res = newTemp(Ity_V128);
   5026          assign(arg_n, unop(cvt, getDRegI64(nreg)));
   5027          assign(arg_m, unop(cvt, getDRegI64(mreg)));
   5028          assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
   5029                                             getDRegI64(mreg))));
   5030          assign(res, binop(op2,
   5031                            binop(Iop_OrV128,
   5032                                  binop(Iop_AndV128,
   5033                                        binop(op, mkexpr(arg_n), mkexpr(arg_m)),
   5034                                        mkexpr(cond)),
   5035                                  binop(Iop_AndV128,
   5036                                        binop(op, mkexpr(arg_m), mkexpr(arg_n)),
   5037                                        unop(Iop_NotV128, mkexpr(cond)))),
   5038                            getQReg(dreg)));
   5039          putQReg(dreg, mkexpr(res), condT);
   5040          DIP("vabal.%c%d q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
   5041              nreg, mreg);
   5042          return True;
   5043       case 6:
   5044          /* VSUBHN, VRSUBHN */
   5045          if (mreg & 1)
   5046             return False;
   5047          mreg >>= 1;
   5048          if (nreg & 1)
   5049             return False;
   5050          nreg >>= 1;
   5051          size = B;
   5052          switch (size) {
   5053             case 0:
   5054                op = Iop_Sub16x8;
   5055                op2 = Iop_Add16x8;
   5056                cvt = Iop_NarrowUn16to8x8;
   5057                sh = Iop_ShrN16x8;
   5058                imm = 1U << 7;
   5059                imm = (imm << 16) | imm;
   5060                imm = (imm << 32) | imm;
   5061                break;
   5062             case 1:
   5063                op = Iop_Sub32x4;
   5064                op2 = Iop_Add32x4;
   5065                cvt = Iop_NarrowUn32to16x4;
   5066                sh = Iop_ShrN32x4;
   5067                imm = 1U << 15;
   5068                imm = (imm << 32) | imm;
   5069                break;
   5070             case 2:
   5071                op = Iop_Sub64x2;
   5072                op2 = Iop_Add64x2;
   5073                cvt = Iop_NarrowUn64to32x2;
   5074                sh = Iop_ShrN64x2;
   5075                imm = 1U << 31;
   5076                break;
   5077             case 3:
   5078                return False;
   5079             default:
   5080                vassert(0);
   5081          }
   5082          tmp = newTemp(Ity_V128);
   5083          res = newTemp(Ity_V128);
   5084          assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
   5085          if (U) {
   5086             /* VRSUBHN */
   5087             assign(res, binop(op2, mkexpr(tmp),
   5088                      binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
   5089          } else {
   5090             assign(res, mkexpr(tmp));
   5091          }
   5092          putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
   5093                     condT);
   5094          DIP("v%ssubhn.i%d d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
   5095              nreg, mreg);
   5096          return True;
   5097       case 7:
   5098          /* VABDL */
   5099          if (!((theInstr >> 23) & 1)) {
   5100             vpanic("VABL should not be in dis_neon_data_3diff\n");
   5101          }
   5102          if (dreg & 1)
   5103             return False;
   5104          dreg >>= 1;
   5105          switch (size) {
   5106             case 0:
   5107                cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
   5108                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
   5109                cvt2 = Iop_Widen8Sto16x8;
   5110                op = Iop_Sub16x8;
   5111                break;
   5112             case 1:
   5113                cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
   5114                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
   5115                cvt2 = Iop_Widen16Sto32x4;
   5116                op = Iop_Sub32x4;
   5117                break;
   5118             case 2:
   5119                cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
   5120                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
   5121                cvt2 = Iop_Widen32Sto64x2;
   5122                op = Iop_Sub64x2;
   5123                break;
   5124             case 3:
   5125                return False;
   5126             default:
   5127                vassert(0);
   5128          }
   5129          arg_n = newTemp(Ity_V128);
   5130          arg_m = newTemp(Ity_V128);
   5131          cond = newTemp(Ity_V128);
   5132          res = newTemp(Ity_V128);
   5133          assign(arg_n, unop(cvt, getDRegI64(nreg)));
   5134          assign(arg_m, unop(cvt, getDRegI64(mreg)));
   5135          assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
   5136                                             getDRegI64(mreg))));
   5137          assign(res, binop(Iop_OrV128,
   5138                            binop(Iop_AndV128,
   5139                                  binop(op, mkexpr(arg_n), mkexpr(arg_m)),
   5140                                  mkexpr(cond)),
   5141                            binop(Iop_AndV128,
   5142                                  binop(op, mkexpr(arg_m), mkexpr(arg_n)),
   5143                                  unop(Iop_NotV128, mkexpr(cond)))));
   5144          putQReg(dreg, mkexpr(res), condT);
   5145          DIP("vabdl.%c%d q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
   5146              nreg, mreg);
   5147          return True;
   5148       case 8:
   5149       case 10:
   5150          /* VMLAL, VMLSL (integer) */
   5151          if (dreg & 1)
   5152             return False;
   5153          dreg >>= 1;
   5154          size = B;
   5155          switch (size) {
   5156             case 0:
   5157                op = U ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
   5158                op2 = P ? Iop_Sub16x8 : Iop_Add16x8;
   5159                break;
   5160             case 1:
   5161                op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
   5162                op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
   5163                break;
   5164             case 2:
   5165                op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
   5166                op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
   5167                break;
   5168             case 3:
   5169                return False;
   5170             default:
   5171                vassert(0);
   5172          }
   5173          res = newTemp(Ity_V128);
   5174          assign(res, binop(op, getDRegI64(nreg),getDRegI64(mreg)));
   5175          putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
   5176          DIP("vml%cl.%c%d q%u, d%u, d%u\n", P ? 's' : 'a', U ? 'u' : 's',
   5177              8 << size, dreg, nreg, mreg);
   5178          return True;
   5179       case 9:
   5180       case 11:
   5181          /* VQDMLAL, VQDMLSL */
   5182          if (U)
   5183             return False;
   5184          if (dreg & 1)
   5185             return False;
   5186          dreg >>= 1;
   5187          size = B;
   5188          switch (size) {
   5189             case 0: case 3:
   5190                return False;
   5191             case 1:
   5192                op = Iop_QDMull16Sx4;
   5193                cmp = Iop_CmpEQ16x4;
   5194                add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
   5195                op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
   5196                imm = 1LL << 15;
   5197                imm = (imm << 16) | imm;
   5198                imm = (imm << 32) | imm;
   5199                break;
   5200             case 2:
   5201                op = Iop_QDMull32Sx2;
   5202                cmp = Iop_CmpEQ32x2;
   5203                add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
   5204                op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
   5205                imm = 1LL << 31;
   5206                imm = (imm << 32) | imm;
   5207                break;
   5208             default:
   5209                vassert(0);
   5210          }
   5211          res = newTemp(Ity_V128);
   5212          tmp = newTemp(Ity_V128);
   5213          assign(res, binop(op, getDRegI64(nreg), getDRegI64(mreg)));
   5214          assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
   5215          setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
   5216                     True, condT);
   5217          setFlag_QC(binop(Iop_And64,
   5218                           binop(cmp, getDRegI64(nreg), mkU64(imm)),
   5219                           binop(cmp, getDRegI64(mreg), mkU64(imm))),
   5220                     mkU64(0),
   5221                     False, condT);
   5222          putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
   5223          DIP("vqdml%cl.s%d q%u, d%u, d%u\n", P ? 's' : 'a', 8 << size, dreg,
   5224              nreg, mreg);
   5225          return True;
   5226       case 12:
   5227       case 14:
   5228          /* VMULL (integer or polynomial) */
   5229          if (dreg & 1)
   5230             return False;
   5231          dreg >>= 1;
   5232          size = B;
   5233          switch (size) {
   5234             case 0:
   5235                op = (U) ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
   5236                if (P)
   5237                   op = Iop_PolynomialMull8x8;
   5238                break;
   5239             case 1:
   5240                if (P) return False;
   5241                op = (U) ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
   5242                break;
   5243             case 2:
   5244                if (P) return False;
   5245                op = (U) ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
   5246                break;
   5247             case 3:
   5248                return False;
   5249             default:
   5250                vassert(0);
   5251          }
   5252          putQReg(dreg, binop(op, getDRegI64(nreg),
   5253                                  getDRegI64(mreg)), condT);
   5254          DIP("vmull.%c%d q%u, d%u, d%u\n", P ? 'p' : (U ? 'u' : 's'),
   5255                8 << size, dreg, nreg, mreg);
   5256          return True;
   5257       case 13:
   5258          /* VQDMULL */
   5259          if (U)
   5260             return False;
   5261          if (dreg & 1)
   5262             return False;
   5263          dreg >>= 1;
   5264          size = B;
   5265          switch (size) {
   5266             case 0:
   5267             case 3:
   5268                return False;
   5269             case 1:
   5270                op = Iop_QDMull16Sx4;
   5271                op2 = Iop_CmpEQ16x4;
   5272                imm = 1LL << 15;
   5273                imm = (imm << 16) | imm;
   5274                imm = (imm << 32) | imm;
   5275                break;
   5276             case 2:
   5277                op = Iop_QDMull32Sx2;
   5278                op2 = Iop_CmpEQ32x2;
   5279                imm = 1LL << 31;
   5280                imm = (imm << 32) | imm;
   5281                break;
   5282             default:
   5283                vassert(0);
   5284          }
   5285          putQReg(dreg, binop(op, getDRegI64(nreg), getDRegI64(mreg)),
   5286                condT);
   5287          setFlag_QC(binop(Iop_And64,
   5288                           binop(op2, getDRegI64(nreg), mkU64(imm)),
   5289                           binop(op2, getDRegI64(mreg), mkU64(imm))),
   5290                     mkU64(0),
   5291                     False, condT);
   5292          DIP("vqdmull.s%d q%u, d%u, d%u\n", 8 << size, dreg, nreg, mreg);
   5293          return True;
   5294       default:
   5295          return False;
   5296    }
   5297    return False;
   5298 }
   5299 
   5300 /* A7.4.3 Two registers and a scalar */
   5301 static
   5302 Bool dis_neon_data_2reg_and_scalar ( UInt theInstr, IRTemp condT )
   5303 {
   5304 #  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
   5305    UInt U = INSN(24,24);
   5306    UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
   5307    UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
   5308    UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
   5309    UInt size = INSN(21,20);
   5310    UInt index;
   5311    UInt Q = INSN(24,24);
   5312 
   5313    if (INSN(27,25) != 1 || INSN(23,23) != 1
   5314        || INSN(6,6) != 1 || INSN(4,4) != 0)
   5315       return False;
   5316 
   5317    /* VMLA, VMLS (scalar)  */
   5318    if ((INSN(11,8) & BITS4(1,0,1,0)) == BITS4(0,0,0,0)) {
   5319       IRTemp res, arg_m, arg_n;
   5320       IROp dup, get, op, op2, add, sub;
   5321       if (Q) {
   5322          if ((dreg & 1) || (nreg & 1))
   5323             return False;
   5324          dreg >>= 1;
   5325          nreg >>= 1;
   5326          res = newTemp(Ity_V128);
   5327          arg_m = newTemp(Ity_V128);
   5328          arg_n = newTemp(Ity_V128);
   5329          assign(arg_n, getQReg(nreg));
   5330          switch(size) {
   5331             case 1:
   5332                dup = Iop_Dup16x8;
   5333                get = Iop_GetElem16x4;
   5334                index = mreg >> 3;
   5335                mreg &= 7;
   5336                break;
   5337             case 2:
   5338                dup = Iop_Dup32x4;
   5339                get = Iop_GetElem32x2;
   5340                index = mreg >> 4;
   5341                mreg &= 0xf;
   5342                break;
   5343             case 0:
   5344             case 3:
   5345                return False;
   5346             default:
   5347                vassert(0);
   5348          }
   5349          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5350       } else {
   5351          res = newTemp(Ity_I64);
   5352          arg_m = newTemp(Ity_I64);
   5353          arg_n = newTemp(Ity_I64);
   5354          assign(arg_n, getDRegI64(nreg));
   5355          switch(size) {
   5356             case 1:
   5357                dup = Iop_Dup16x4;
   5358                get = Iop_GetElem16x4;
   5359                index = mreg >> 3;
   5360                mreg &= 7;
   5361                break;
   5362             case 2:
   5363                dup = Iop_Dup32x2;
   5364                get = Iop_GetElem32x2;
   5365                index = mreg >> 4;
   5366                mreg &= 0xf;
   5367                break;
   5368             case 0:
   5369             case 3:
   5370                return False;
   5371             default:
   5372                vassert(0);
   5373          }
   5374          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5375       }
   5376       if (INSN(8,8)) {
   5377          switch (size) {
   5378             case 2:
   5379                op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
   5380                add = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
   5381                sub = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
   5382                break;
   5383             case 0:
   5384             case 1:
   5385             case 3:
   5386                return False;
   5387             default:
   5388                vassert(0);
   5389          }
   5390       } else {
   5391          switch (size) {
   5392             case 1:
   5393                op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
   5394                add = Q ? Iop_Add16x8 : Iop_Add16x4;
   5395                sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   5396                break;
   5397             case 2:
   5398                op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
   5399                add = Q ? Iop_Add32x4 : Iop_Add32x2;
   5400                sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   5401                break;
   5402             case 0:
   5403             case 3:
   5404                return False;
   5405             default:
   5406                vassert(0);
   5407          }
   5408       }
   5409       op2 = INSN(10,10) ? sub : add;
   5410       assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
   5411       if (Q)
   5412          putQReg(dreg, binop_w_fake_RM(op2, getQReg(dreg), mkexpr(res)),
   5413                  condT);
   5414       else
   5415          putDRegI64(dreg, binop(op2, getDRegI64(dreg), mkexpr(res)),
   5416                     condT);
   5417       DIP("vml%c.%c%d %c%u, %c%u, d%u[%u]\n", INSN(10,10) ? 's' : 'a',
   5418             INSN(8,8) ? 'f' : 'i', 8 << size,
   5419             Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, mreg, index);
   5420       return True;
   5421    }
   5422 
   5423    /* VMLAL, VMLSL (scalar)   */
   5424    if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,0)) {
   5425       IRTemp res, arg_m, arg_n;
   5426       IROp dup, get, op, op2, add, sub;
   5427       if (dreg & 1)
   5428          return False;
   5429       dreg >>= 1;
   5430       res = newTemp(Ity_V128);
   5431       arg_m = newTemp(Ity_I64);
   5432       arg_n = newTemp(Ity_I64);
   5433       assign(arg_n, getDRegI64(nreg));
   5434       switch(size) {
   5435          case 1:
   5436             dup = Iop_Dup16x4;
   5437             get = Iop_GetElem16x4;
   5438             index = mreg >> 3;
   5439             mreg &= 7;
   5440             break;
   5441          case 2:
   5442             dup = Iop_Dup32x2;
   5443             get = Iop_GetElem32x2;
   5444             index = mreg >> 4;
   5445             mreg &= 0xf;
   5446             break;
   5447          case 0:
   5448          case 3:
   5449             return False;
   5450          default:
   5451             vassert(0);
   5452       }
   5453       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5454       switch (size) {
   5455          case 1:
   5456             op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
   5457             add = Iop_Add32x4;
   5458             sub = Iop_Sub32x4;
   5459             break;
   5460          case 2:
   5461             op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
   5462             add = Iop_Add64x2;
   5463             sub = Iop_Sub64x2;
   5464             break;
   5465          case 0:
   5466          case 3:
   5467             return False;
   5468          default:
   5469             vassert(0);
   5470       }
   5471       op2 = INSN(10,10) ? sub : add;
   5472       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   5473       putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
   5474       DIP("vml%cl.%c%d q%u, d%u, d%u[%u]\n",
   5475           INSN(10,10) ? 's' : 'a', U ? 'u' : 's',
   5476           8 << size, dreg, nreg, mreg, index);
   5477       return True;
   5478    }
   5479 
   5480    /* VQDMLAL, VQDMLSL (scalar)  */
   5481    if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,1) && !U) {
   5482       IRTemp res, arg_m, arg_n, tmp;
   5483       IROp dup, get, op, op2, add, cmp;
   5484       UInt P = INSN(10,10);
   5485       ULong imm;
   5486       if (dreg & 1)
   5487          return False;
   5488       dreg >>= 1;
   5489       res = newTemp(Ity_V128);
   5490       arg_m = newTemp(Ity_I64);
   5491       arg_n = newTemp(Ity_I64);
   5492       assign(arg_n, getDRegI64(nreg));
   5493       switch(size) {
   5494          case 1:
   5495             dup = Iop_Dup16x4;
   5496             get = Iop_GetElem16x4;
   5497             index = mreg >> 3;
   5498             mreg &= 7;
   5499             break;
   5500          case 2:
   5501             dup = Iop_Dup32x2;
   5502             get = Iop_GetElem32x2;
   5503             index = mreg >> 4;
   5504             mreg &= 0xf;
   5505             break;
   5506          case 0:
   5507          case 3:
   5508             return False;
   5509          default:
   5510             vassert(0);
   5511       }
   5512       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5513       switch (size) {
   5514          case 0:
   5515          case 3:
   5516             return False;
   5517          case 1:
   5518             op = Iop_QDMull16Sx4;
   5519             cmp = Iop_CmpEQ16x4;
   5520             add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
   5521             op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
   5522             imm = 1LL << 15;
   5523             imm = (imm << 16) | imm;
   5524             imm = (imm << 32) | imm;
   5525             break;
   5526          case 2:
   5527             op = Iop_QDMull32Sx2;
   5528             cmp = Iop_CmpEQ32x2;
   5529             add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
   5530             op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
   5531             imm = 1LL << 31;
   5532             imm = (imm << 32) | imm;
   5533             break;
   5534          default:
   5535             vassert(0);
   5536       }
   5537       res = newTemp(Ity_V128);
   5538       tmp = newTemp(Ity_V128);
   5539       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   5540       assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
   5541       setFlag_QC(binop(Iop_And64,
   5542                        binop(cmp, mkexpr(arg_n), mkU64(imm)),
   5543                        binop(cmp, mkexpr(arg_m), mkU64(imm))),
   5544                  mkU64(0),
   5545                  False, condT);
   5546       setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
   5547                  True, condT);
   5548       putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
   5549       DIP("vqdml%cl.s%d q%u, d%u, d%u[%u]\n", P ? 's' : 'a', 8 << size,
   5550           dreg, nreg, mreg, index);
   5551       return True;
   5552    }
   5553 
   5554    /* VMUL (by scalar)  */
   5555    if ((INSN(11,8) & BITS4(1,1,1,0)) == BITS4(1,0,0,0)) {
   5556       IRTemp res, arg_m, arg_n;
   5557       IROp dup, get, op;
   5558       if (Q) {
   5559          if ((dreg & 1) || (nreg & 1))
   5560             return False;
   5561          dreg >>= 1;
   5562          nreg >>= 1;
   5563          res = newTemp(Ity_V128);
   5564          arg_m = newTemp(Ity_V128);
   5565          arg_n = newTemp(Ity_V128);
   5566          assign(arg_n, getQReg(nreg));
   5567          switch(size) {
   5568             case 1:
   5569                dup = Iop_Dup16x8;
   5570                get = Iop_GetElem16x4;
   5571                index = mreg >> 3;
   5572                mreg &= 7;
   5573                break;
   5574             case 2:
   5575                dup = Iop_Dup32x4;
   5576                get = Iop_GetElem32x2;
   5577                index = mreg >> 4;
   5578                mreg &= 0xf;
   5579                break;
   5580             case 0:
   5581             case 3:
   5582                return False;
   5583             default:
   5584                vassert(0);
   5585          }
   5586          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5587       } else {
   5588          res = newTemp(Ity_I64);
   5589          arg_m = newTemp(Ity_I64);
   5590          arg_n = newTemp(Ity_I64);
   5591          assign(arg_n, getDRegI64(nreg));
   5592          switch(size) {
   5593             case 1:
   5594                dup = Iop_Dup16x4;
   5595                get = Iop_GetElem16x4;
   5596                index = mreg >> 3;
   5597                mreg &= 7;
   5598                break;
   5599             case 2:
   5600                dup = Iop_Dup32x2;
   5601                get = Iop_GetElem32x2;
   5602                index = mreg >> 4;
   5603                mreg &= 0xf;
   5604                break;
   5605             case 0:
   5606             case 3:
   5607                return False;
   5608             default:
   5609                vassert(0);
   5610          }
   5611          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5612       }
   5613       if (INSN(8,8)) {
   5614          switch (size) {
   5615             case 2:
   5616                op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
   5617                break;
   5618             case 0:
   5619             case 1:
   5620             case 3:
   5621                return False;
   5622             default:
   5623                vassert(0);
   5624          }
   5625       } else {
   5626          switch (size) {
   5627             case 1:
   5628                op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
   5629                break;
   5630             case 2:
   5631                op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
   5632                break;
   5633             case 0:
   5634             case 3:
   5635                return False;
   5636             default:
   5637                vassert(0);
   5638          }
   5639       }
   5640       assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
   5641       if (Q)
   5642          putQReg(dreg, mkexpr(res), condT);
   5643       else
   5644          putDRegI64(dreg, mkexpr(res), condT);
   5645       DIP("vmul.%c%d %c%u, %c%u, d%u[%u]\n", INSN(8,8) ? 'f' : 'i',
   5646           8 << size, Q ? 'q' : 'd', dreg,
   5647           Q ? 'q' : 'd', nreg, mreg, index);
   5648       return True;
   5649    }
   5650 
   5651    /* VMULL (scalar) */
   5652    if (INSN(11,8) == BITS4(1,0,1,0)) {
   5653       IRTemp res, arg_m, arg_n;
   5654       IROp dup, get, op;
   5655       if (dreg & 1)
   5656          return False;
   5657       dreg >>= 1;
   5658       res = newTemp(Ity_V128);
   5659       arg_m = newTemp(Ity_I64);
   5660       arg_n = newTemp(Ity_I64);
   5661       assign(arg_n, getDRegI64(nreg));
   5662       switch(size) {
   5663          case 1:
   5664             dup = Iop_Dup16x4;
   5665             get = Iop_GetElem16x4;
   5666             index = mreg >> 3;
   5667             mreg &= 7;
   5668             break;
   5669          case 2:
   5670             dup = Iop_Dup32x2;
   5671             get = Iop_GetElem32x2;
   5672             index = mreg >> 4;
   5673             mreg &= 0xf;
   5674             break;
   5675          case 0:
   5676          case 3:
   5677             return False;
   5678          default:
   5679             vassert(0);
   5680       }
   5681       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5682       switch (size) {
   5683          case 1: op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4; break;
   5684          case 2: op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2; break;
   5685          case 0: case 3: return False;
   5686          default: vassert(0);
   5687       }
   5688       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   5689       putQReg(dreg, mkexpr(res), condT);
   5690       DIP("vmull.%c%d q%u, d%u, d%u[%u]\n", U ? 'u' : 's', 8 << size, dreg,
   5691           nreg, mreg, index);
   5692       return True;
   5693    }
   5694 
   5695    /* VQDMULL */
   5696    if (INSN(11,8) == BITS4(1,0,1,1) && !U) {
   5697       IROp op ,op2, dup, get;
   5698       ULong imm;
   5699       IRTemp arg_m, arg_n;
   5700       if (dreg & 1)
   5701          return False;
   5702       dreg >>= 1;
   5703       arg_m = newTemp(Ity_I64);
   5704       arg_n = newTemp(Ity_I64);
   5705       assign(arg_n, getDRegI64(nreg));
   5706       switch(size) {
   5707          case 1:
   5708             dup = Iop_Dup16x4;
   5709             get = Iop_GetElem16x4;
   5710             index = mreg >> 3;
   5711             mreg &= 7;
   5712             break;
   5713          case 2:
   5714             dup = Iop_Dup32x2;
   5715             get = Iop_GetElem32x2;
   5716             index = mreg >> 4;
   5717             mreg &= 0xf;
   5718             break;
   5719          case 0:
   5720          case 3:
   5721             return False;
   5722          default:
   5723             vassert(0);
   5724       }
   5725       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5726       switch (size) {
   5727          case 0:
   5728          case 3:
   5729             return False;
   5730          case 1:
   5731             op = Iop_QDMull16Sx4;
   5732             op2 = Iop_CmpEQ16x4;
   5733             imm = 1LL << 15;
   5734             imm = (imm << 16) | imm;
   5735             imm = (imm << 32) | imm;
   5736             break;
   5737          case 2:
   5738             op = Iop_QDMull32Sx2;
   5739             op2 = Iop_CmpEQ32x2;
   5740             imm = 1LL << 31;
   5741             imm = (imm << 32) | imm;
   5742             break;
   5743          default:
   5744             vassert(0);
   5745       }
   5746       putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
   5747             condT);
   5748       setFlag_QC(binop(Iop_And64,
   5749                        binop(op2, mkexpr(arg_n), mkU64(imm)),
   5750                        binop(op2, mkexpr(arg_m), mkU64(imm))),
   5751                  mkU64(0),
   5752                  False, condT);
   5753       DIP("vqdmull.s%d q%u, d%u, d%u[%u]\n", 8 << size, dreg, nreg, mreg,
   5754           index);
   5755       return True;
   5756    }
   5757 
   5758    /* VQDMULH */
   5759    if (INSN(11,8) == BITS4(1,1,0,0)) {
   5760       IROp op ,op2, dup, get;
   5761       ULong imm;
   5762       IRTemp res, arg_m, arg_n;
   5763       if (Q) {
   5764          if ((dreg & 1) || (nreg & 1))
   5765             return False;
   5766          dreg >>= 1;
   5767          nreg >>= 1;
   5768          res = newTemp(Ity_V128);
   5769          arg_m = newTemp(Ity_V128);
   5770          arg_n = newTemp(Ity_V128);
   5771          assign(arg_n, getQReg(nreg));
   5772          switch(size) {
   5773             case 1:
   5774                dup = Iop_Dup16x8;
   5775                get = Iop_GetElem16x4;
   5776                index = mreg >> 3;
   5777                mreg &= 7;
   5778                break;
   5779             case 2:
   5780                dup = Iop_Dup32x4;
   5781                get = Iop_GetElem32x2;
   5782                index = mreg >> 4;
   5783                mreg &= 0xf;
   5784                break;
   5785             case 0:
   5786             case 3:
   5787                return False;
   5788             default:
   5789                vassert(0);
   5790          }
   5791          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5792       } else {
   5793          res = newTemp(Ity_I64);
   5794          arg_m = newTemp(Ity_I64);
   5795          arg_n = newTemp(Ity_I64);
   5796          assign(arg_n, getDRegI64(nreg));
   5797          switch(size) {
   5798             case 1:
   5799                dup = Iop_Dup16x4;
   5800                get = Iop_GetElem16x4;
   5801                index = mreg >> 3;
   5802                mreg &= 7;
   5803                break;
   5804             case 2:
   5805                dup = Iop_Dup32x2;
   5806                get = Iop_GetElem32x2;
   5807                index = mreg >> 4;
   5808                mreg &= 0xf;
   5809                break;
   5810             case 0:
   5811             case 3:
   5812                return False;
   5813             default:
   5814                vassert(0);
   5815          }
   5816          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5817       }
   5818       switch (size) {
   5819          case 0:
   5820          case 3:
   5821             return False;
   5822          case 1:
   5823             op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
   5824             op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
   5825             imm = 1LL << 15;
   5826             imm = (imm << 16) | imm;
   5827             imm = (imm << 32) | imm;
   5828             break;
   5829          case 2:
   5830             op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
   5831             op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
   5832             imm = 1LL << 31;
   5833             imm = (imm << 32) | imm;
   5834             break;
   5835          default:
   5836             vassert(0);
   5837       }
   5838       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   5839       setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   5840                        binop(op2, mkexpr(arg_n),
   5841                                   Q ? mkU128(imm) : mkU64(imm)),
   5842                        binop(op2, mkexpr(arg_m),
   5843                              Q ? mkU128(imm) : mkU64(imm))),
   5844                  Q ? mkU128(0) : mkU64(0),
   5845                  Q, condT);
   5846       if (Q)
   5847          putQReg(dreg, mkexpr(res), condT);
   5848       else
   5849          putDRegI64(dreg, mkexpr(res), condT);
   5850       DIP("vqdmulh.s%d %c%u, %c%u, d%u[%u]\n",
   5851           8 << size, Q ? 'q' : 'd', dreg,
   5852           Q ? 'q' : 'd', nreg, mreg, index);
   5853       return True;
   5854    }
   5855 
   5856    /* VQRDMULH (scalar) */
   5857    if (INSN(11,8) == BITS4(1,1,0,1)) {
   5858       IROp op ,op2, dup, get;
   5859       ULong imm;
   5860       IRTemp res, arg_m, arg_n;
   5861       if (Q) {
   5862          if ((dreg & 1) || (nreg & 1))
   5863             return False;
   5864          dreg >>= 1;
   5865          nreg >>= 1;
   5866          res = newTemp(Ity_V128);
   5867          arg_m = newTemp(Ity_V128);
   5868          arg_n = newTemp(Ity_V128);
   5869          assign(arg_n, getQReg(nreg));
   5870          switch(size) {
   5871             case 1:
   5872                dup = Iop_Dup16x8;
   5873                get = Iop_GetElem16x4;
   5874                index = mreg >> 3;
   5875                mreg &= 7;
   5876                break;
   5877             case 2:
   5878                dup = Iop_Dup32x4;
   5879                get = Iop_GetElem32x2;
   5880                index = mreg >> 4;
   5881                mreg &= 0xf;
   5882                break;
   5883             case 0:
   5884             case 3:
   5885                return False;
   5886             default:
   5887                vassert(0);
   5888          }
   5889          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5890       } else {
   5891          res = newTemp(Ity_I64);
   5892          arg_m = newTemp(Ity_I64);
   5893          arg_n = newTemp(Ity_I64);
   5894          assign(arg_n, getDRegI64(nreg));
   5895          switch(size) {
   5896             case 1:
   5897                dup = Iop_Dup16x4;
   5898                get = Iop_GetElem16x4;
   5899                index = mreg >> 3;
   5900                mreg &= 7;
   5901                break;
   5902             case 2:
   5903                dup = Iop_Dup32x2;
   5904                get = Iop_GetElem32x2;
   5905                index = mreg >> 4;
   5906                mreg &= 0xf;
   5907                break;
   5908             case 0:
   5909             case 3:
   5910                return False;
   5911             default:
   5912                vassert(0);
   5913          }
   5914          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
   5915       }
   5916       switch (size) {
   5917          case 0:
   5918          case 3:
   5919             return False;
   5920          case 1:
   5921             op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
   5922             op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
   5923             imm = 1LL << 15;
   5924             imm = (imm << 16) | imm;
   5925             imm = (imm << 32) | imm;
   5926             break;
   5927          case 2:
   5928             op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
   5929             op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
   5930             imm = 1LL << 31;
   5931             imm = (imm << 32) | imm;
   5932             break;
   5933          default:
   5934             vassert(0);
   5935       }
   5936       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   5937       setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   5938                        binop(op2, mkexpr(arg_n),
   5939                                   Q ? mkU128(imm) : mkU64(imm)),
   5940                        binop(op2, mkexpr(arg_m),
   5941                                   Q ? mkU128(imm) : mkU64(imm))),
   5942                  Q ? mkU128(0) : mkU64(0),
   5943                  Q, condT);
   5944       if (Q)
   5945          putQReg(dreg, mkexpr(res), condT);
   5946       else
   5947          putDRegI64(dreg, mkexpr(res), condT);
   5948       DIP("vqrdmulh.s%d %c%u, %c%u, d%u[%u]\n",
   5949           8 << size, Q ? 'q' : 'd', dreg,
   5950           Q ? 'q' : 'd', nreg, mreg, index);
   5951       return True;
   5952    }
   5953 
   5954    return False;
   5955 #  undef INSN
   5956 }
   5957 
   5958 /* A7.4.4 Two registers and a shift amount */
   5959 static
   5960 Bool dis_neon_data_2reg_and_shift ( UInt theInstr, IRTemp condT )
   5961 {
   5962    UInt A = (theInstr >> 8) & 0xf;
   5963    UInt B = (theInstr >> 6) & 1;
   5964    UInt L = (theInstr >> 7) & 1;
   5965    UInt U = (theInstr >> 24) & 1;
   5966    UInt Q = B;
   5967    UInt imm6 = (theInstr >> 16) & 0x3f;
   5968    UInt shift_imm;
   5969    UInt size = 4;
   5970    UInt tmp;
   5971    UInt mreg = get_neon_m_regno(theInstr);
   5972    UInt dreg = get_neon_d_regno(theInstr);
   5973    ULong imm = 0;
   5974    IROp op, cvt, add = Iop_INVALID, cvt2, op_rev;
   5975    IRTemp reg_m, res, mask;
   5976 
   5977    if (L == 0 && ((theInstr >> 19) & 7) == 0)
   5978       /* It is one reg and immediate */
   5979       return False;
   5980 
   5981    tmp = (L << 6) | imm6;
   5982    if (tmp & 0x40) {
   5983       size = 3;
   5984       shift_imm = 64 - imm6;
   5985    } else if (tmp & 0x20) {
   5986       size = 2;
   5987       shift_imm = 64 - imm6;
   5988    } else if (tmp & 0x10) {
   5989       size = 1;
   5990       shift_imm = 32 - imm6;
   5991    } else if (tmp & 0x8) {
   5992       size = 0;
   5993       shift_imm = 16 - imm6;
   5994    } else {
   5995       return False;
   5996    }
   5997 
   5998    switch (A) {
   5999       case 3:
   6000       case 2:
   6001          /* VRSHR, VRSRA */
   6002          if (shift_imm > 0) {
   6003             IRExpr *imm_val;
   6004             imm = 1L;
   6005             switch (size) {
   6006                case 0:
   6007                   imm = (imm << 8) | imm;
   6008                   /* fall through */
   6009                case 1:
   6010                   imm = (imm << 16) | imm;
   6011                   /* fall through */
   6012                case 2:
   6013                   imm = (imm << 32) | imm;
   6014                   /* fall through */
   6015                case 3:
   6016                   break;
   6017                default:
   6018                   vassert(0);
   6019             }
   6020             if (Q) {
   6021                reg_m = newTemp(Ity_V128);
   6022                res = newTemp(Ity_V128);
   6023                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
   6024                assign(reg_m, getQReg(mreg));
   6025                switch (size) {
   6026                   case 0:
   6027                      add = Iop_Add8x16;
   6028                      op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
   6029                      break;
   6030                   case 1:
   6031                      add = Iop_Add16x8;
   6032                      op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
   6033                      break;
   6034                   case 2:
   6035                      add = Iop_Add32x4;
   6036                      op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
   6037                      break;
   6038                   case 3:
   6039                      add = Iop_Add64x2;
   6040                      op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
   6041                      break;
   6042                   default:
   6043                      vassert(0);
   6044                }
   6045             } else {
   6046                reg_m = newTemp(Ity_I64);
   6047                res = newTemp(Ity_I64);
   6048                imm_val = mkU64(imm);
   6049                assign(reg_m, getDRegI64(mreg));
   6050                switch (size) {
   6051                   case 0:
   6052                      add = Iop_Add8x8;
   6053                      op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
   6054                      break;
   6055                   case 1:
   6056                      add = Iop_Add16x4;
   6057                      op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
   6058                      break;
   6059                   case 2:
   6060                      add = Iop_Add32x2;
   6061                      op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
   6062                      break;
   6063                   case 3:
   6064                      add = Iop_Add64;
   6065                      op = U ? Iop_Shr64 : Iop_Sar64;
   6066                      break;
   6067                   default:
   6068                      vassert(0);
   6069                }
   6070             }
   6071             assign(res,
   6072                    binop(add,
   6073                          binop(op,
   6074                                mkexpr(reg_m),
   6075                                mkU8(shift_imm)),
   6076                          binop(Q ? Iop_AndV128 : Iop_And64,
   6077                                binop(op,
   6078                                      mkexpr(reg_m),
   6079                                      mkU8(shift_imm - 1)),
   6080                                imm_val)));
   6081          } else {
   6082             if (Q) {
   6083                res = newTemp(Ity_V128);
   6084                assign(res, getQReg(mreg));
   6085             } else {
   6086                res = newTemp(Ity_I64);
   6087                assign(res, getDRegI64(mreg));
   6088             }
   6089          }
   6090          if (A == 3) {
   6091             if (Q) {
   6092                putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
   6093                              condT);
   6094             } else {
   6095                putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
   6096                                 condT);
   6097             }
   6098             DIP("vrsra.%c%d %c%u, %c%u, #%u\n",
   6099                 U ? 'u' : 's', 8 << size,
   6100                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6101          } else {
   6102             if (Q) {
   6103                putQReg(dreg, mkexpr(res), condT);
   6104             } else {
   6105                putDRegI64(dreg, mkexpr(res), condT);
   6106             }
   6107             DIP("vrshr.%c%d %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
   6108                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6109          }
   6110          return True;
   6111       case 1:
   6112       case 0:
   6113          /* VSHR, VSRA */
   6114          if (Q) {
   6115             reg_m = newTemp(Ity_V128);
   6116             assign(reg_m, getQReg(mreg));
   6117             res = newTemp(Ity_V128);
   6118          } else {
   6119             reg_m = newTemp(Ity_I64);
   6120             assign(reg_m, getDRegI64(mreg));
   6121             res = newTemp(Ity_I64);
   6122          }
   6123          if (Q) {
   6124             switch (size) {
   6125                case 0:
   6126                   op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
   6127                   add = Iop_Add8x16;
   6128                   break;
   6129                case 1:
   6130                   op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
   6131                   add = Iop_Add16x8;
   6132                   break;
   6133                case 2:
   6134                   op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
   6135                   add = Iop_Add32x4;
   6136                   break;
   6137                case 3:
   6138                   op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
   6139                   add = Iop_Add64x2;
   6140                   break;
   6141                default:
   6142                   vassert(0);
   6143             }
   6144          } else {
   6145             switch (size) {
   6146                case 0:
   6147                   op =  U ? Iop_ShrN8x8 : Iop_SarN8x8;
   6148                   add = Iop_Add8x8;
   6149                   break;
   6150                case 1:
   6151                   op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
   6152                   add = Iop_Add16x4;
   6153                   break;
   6154                case 2:
   6155                   op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
   6156                   add = Iop_Add32x2;
   6157                   break;
   6158                case 3:
   6159                   op = U ? Iop_Shr64 : Iop_Sar64;
   6160                   add = Iop_Add64;
   6161                   break;
   6162                default:
   6163                   vassert(0);
   6164             }
   6165          }
   6166          assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
   6167          if (A == 1) {
   6168             if (Q) {
   6169                putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
   6170                              condT);
   6171             } else {
   6172                putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
   6173                                 condT);
   6174             }
   6175             DIP("vsra.%c%d %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
   6176                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6177          } else {
   6178             if (Q) {
   6179                putQReg(dreg, mkexpr(res), condT);
   6180             } else {
   6181                putDRegI64(dreg, mkexpr(res), condT);
   6182             }
   6183             DIP("vshr.%c%d %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
   6184                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6185          }
   6186          return True;
   6187       case 4:
   6188          /* VSRI */
   6189          if (!U)
   6190             return False;
   6191          if (Q) {
   6192             res = newTemp(Ity_V128);
   6193             mask = newTemp(Ity_V128);
   6194          } else {
   6195             res = newTemp(Ity_I64);
   6196             mask = newTemp(Ity_I64);
   6197          }
   6198          switch (size) {
   6199             case 0: op = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; break;
   6200             case 1: op = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; break;
   6201             case 2: op = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; break;
   6202             case 3: op = Q ? Iop_ShrN64x2 : Iop_Shr64; break;
   6203             default: vassert(0);
   6204          }
   6205          if (Q) {
   6206             assign(mask, binop(op, binop(Iop_64HLtoV128,
   6207                                          mkU64(0xFFFFFFFFFFFFFFFFLL),
   6208                                          mkU64(0xFFFFFFFFFFFFFFFFLL)),
   6209                                mkU8(shift_imm)));
   6210             assign(res, binop(Iop_OrV128,
   6211                               binop(Iop_AndV128,
   6212                                     getQReg(dreg),
   6213                                     unop(Iop_NotV128,
   6214                                          mkexpr(mask))),
   6215                               binop(op,
   6216                                     getQReg(mreg),
   6217                                     mkU8(shift_imm))));
   6218             putQReg(dreg, mkexpr(res), condT);
   6219          } else {
   6220             assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
   6221                                mkU8(shift_imm)));
   6222             assign(res, binop(Iop_Or64,
   6223                               binop(Iop_And64,
   6224                                     getDRegI64(dreg),
   6225                                     unop(Iop_Not64,
   6226                                          mkexpr(mask))),
   6227                               binop(op,
   6228                                     getDRegI64(mreg),
   6229                                     mkU8(shift_imm))));
   6230             putDRegI64(dreg, mkexpr(res), condT);
   6231          }
   6232          DIP("vsri.%d %c%u, %c%u, #%u\n",
   6233              8 << size, Q ? 'q' : 'd', dreg,
   6234              Q ? 'q' : 'd', mreg, shift_imm);
   6235          return True;
   6236       case 5:
   6237          if (U) {
   6238             /* VSLI */
   6239             shift_imm = 8 * (1 << size) - shift_imm;
   6240             if (Q) {
   6241                res = newTemp(Ity_V128);
   6242                mask = newTemp(Ity_V128);
   6243             } else {
   6244                res = newTemp(Ity_I64);
   6245                mask = newTemp(Ity_I64);
   6246             }
   6247             switch (size) {
   6248                case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
   6249                case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
   6250                case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
   6251                case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
   6252                default: vassert(0);
   6253             }
   6254             if (Q) {
   6255                assign(mask, binop(op, binop(Iop_64HLtoV128,
   6256                                             mkU64(0xFFFFFFFFFFFFFFFFLL),
   6257                                             mkU64(0xFFFFFFFFFFFFFFFFLL)),
   6258                                   mkU8(shift_imm)));
   6259                assign(res, binop(Iop_OrV128,
   6260                                  binop(Iop_AndV128,
   6261                                        getQReg(dreg),
   6262                                        unop(Iop_NotV128,
   6263                                             mkexpr(mask))),
   6264                                  binop(op,
   6265                                        getQReg(mreg),
   6266                                        mkU8(shift_imm))));
   6267                putQReg(dreg, mkexpr(res), condT);
   6268             } else {
   6269                assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
   6270                                   mkU8(shift_imm)));
   6271                assign(res, binop(Iop_Or64,
   6272                                  binop(Iop_And64,
   6273                                        getDRegI64(dreg),
   6274                                        unop(Iop_Not64,
   6275                                             mkexpr(mask))),
   6276                                  binop(op,
   6277                                        getDRegI64(mreg),
   6278                                        mkU8(shift_imm))));
   6279                putDRegI64(dreg, mkexpr(res), condT);
   6280             }
   6281             DIP("vsli.%d %c%u, %c%u, #%u\n",
   6282                 8 << size, Q ? 'q' : 'd', dreg,
   6283                 Q ? 'q' : 'd', mreg, shift_imm);
   6284             return True;
   6285          } else {
   6286             /* VSHL #imm */
   6287             shift_imm = 8 * (1 << size) - shift_imm;
   6288             if (Q) {
   6289                res = newTemp(Ity_V128);
   6290             } else {
   6291                res = newTemp(Ity_I64);
   6292             }
   6293             switch (size) {
   6294                case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
   6295                case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
   6296                case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
   6297                case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
   6298                default: vassert(0);
   6299             }
   6300             assign(res, binop(op, Q ? getQReg(mreg) : getDRegI64(mreg),
   6301                      mkU8(shift_imm)));
   6302             if (Q) {
   6303                putQReg(dreg, mkexpr(res), condT);
   6304             } else {
   6305                putDRegI64(dreg, mkexpr(res), condT);
   6306             }
   6307             DIP("vshl.i%d %c%u, %c%u, #%u\n",
   6308                 8 << size, Q ? 'q' : 'd', dreg,
   6309                 Q ? 'q' : 'd', mreg, shift_imm);
   6310             return True;
   6311          }
   6312          break;
   6313       case 6:
   6314       case 7:
   6315          /* VQSHL, VQSHLU */
   6316          shift_imm = 8 * (1 << size) - shift_imm;
   6317          if (U) {
   6318             if (A & 1) {
   6319                switch (size) {
   6320                   case 0:
   6321                      op = Q ? Iop_QShlNsatUU8x16 : Iop_QShlNsatUU8x8;
   6322                      op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   6323                      break;
   6324                   case 1:
   6325                      op = Q ? Iop_QShlNsatUU16x8 : Iop_QShlNsatUU16x4;
   6326                      op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   6327                      break;
   6328                   case 2:
   6329                      op = Q ? Iop_QShlNsatUU32x4 : Iop_QShlNsatUU32x2;
   6330                      op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   6331                      break;
   6332                   case 3:
   6333                      op = Q ? Iop_QShlNsatUU64x2 : Iop_QShlNsatUU64x1;
   6334                      op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
   6335                      break;
   6336                   default:
   6337                      vassert(0);
   6338                }
   6339                DIP("vqshl.u%d %c%u, %c%u, #%u\n",
   6340                    8 << size,
   6341                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6342             } else {
   6343                switch (size) {
   6344                   case 0:
   6345                      op = Q ? Iop_QShlNsatSU8x16 : Iop_QShlNsatSU8x8;
   6346                      op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   6347                      break;
   6348                   case 1:
   6349                      op = Q ? Iop_QShlNsatSU16x8 : Iop_QShlNsatSU16x4;
   6350                      op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   6351                      break;
   6352                   case 2:
   6353                      op = Q ? Iop_QShlNsatSU32x4 : Iop_QShlNsatSU32x2;
   6354                      op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   6355                      break;
   6356                   case 3:
   6357                      op = Q ? Iop_QShlNsatSU64x2 : Iop_QShlNsatSU64x1;
   6358                      op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
   6359                      break;
   6360                   default:
   6361                      vassert(0);
   6362                }
   6363                DIP("vqshlu.s%d %c%u, %c%u, #%u\n",
   6364                    8 << size,
   6365                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6366             }
   6367          } else {
   6368             if (!(A & 1))
   6369                return False;
   6370             switch (size) {
   6371                case 0:
   6372                   op = Q ? Iop_QShlNsatSS8x16 : Iop_QShlNsatSS8x8;
   6373                   op_rev = Q ? Iop_SarN8x16 : Iop_SarN8x8;
   6374                   break;
   6375                case 1:
   6376                   op = Q ? Iop_QShlNsatSS16x8 : Iop_QShlNsatSS16x4;
   6377                   op_rev = Q ? Iop_SarN16x8 : Iop_SarN16x4;
   6378                   break;
   6379                case 2:
   6380                   op = Q ? Iop_QShlNsatSS32x4 : Iop_QShlNsatSS32x2;
   6381                   op_rev = Q ? Iop_SarN32x4 : Iop_SarN32x2;
   6382                   break;
   6383                case 3:
   6384                   op = Q ? Iop_QShlNsatSS64x2 : Iop_QShlNsatSS64x1;
   6385                   op_rev = Q ? Iop_SarN64x2 : Iop_Sar64;
   6386                   break;
   6387                default:
   6388                   vassert(0);
   6389             }
   6390             DIP("vqshl.s%d %c%u, %c%u, #%u\n",
   6391                 8 << size,
   6392                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
   6393          }
   6394          if (Q) {
   6395             tmp = newTemp(Ity_V128);
   6396             res = newTemp(Ity_V128);
   6397             reg_m = newTemp(Ity_V128);
   6398             assign(reg_m, getQReg(mreg));
   6399          } else {
   6400             tmp = newTemp(Ity_I64);
   6401             res = newTemp(Ity_I64);
   6402             reg_m = newTemp(Ity_I64);
   6403             assign(reg_m, getDRegI64(mreg));
   6404          }
   6405          assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
   6406          assign(tmp, binop(op_rev, mkexpr(res), mkU8(shift_imm)));
   6407          setFlag_QC(mkexpr(tmp), mkexpr(reg_m), Q, condT);
   6408          if (Q)
   6409             putQReg(dreg, mkexpr(res), condT);
   6410          else
   6411             putDRegI64(dreg, mkexpr(res), condT);
   6412          return True;
   6413       case 8:
   6414          if (!U) {
   6415             if (L == 1)
   6416                return False;
   6417             size++;
   6418             dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
   6419             mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
   6420             if (mreg & 1)
   6421                return False;
   6422             mreg >>= 1;
   6423             if (!B) {
   6424                /* VSHRN*/
   6425                IROp narOp;
   6426                reg_m = newTemp(Ity_V128);
   6427                assign(reg_m, getQReg(mreg));
   6428                res = newTemp(Ity_I64);
   6429                switch (size) {
   6430                   case 1:
   6431                      op = Iop_ShrN16x8;
   6432                      narOp = Iop_NarrowUn16to8x8;
   6433                      break;
   6434                   case 2:
   6435                      op = Iop_ShrN32x4;
   6436                      narOp = Iop_NarrowUn32to16x4;
   6437                      break;
   6438                   case 3:
   6439                      op = Iop_ShrN64x2;
   6440                      narOp = Iop_NarrowUn64to32x2;
   6441                      break;
   6442                   default:
   6443                      vassert(0);
   6444                }
   6445                assign(res, unop(narOp,
   6446                                 binop(op,
   6447                                       mkexpr(reg_m),
   6448                                       mkU8(shift_imm))));
   6449                putDRegI64(dreg, mkexpr(res), condT);
   6450                DIP("vshrn.i%d d%u, q%u, #%u\n", 8 << size, dreg, mreg,
   6451                    shift_imm);
   6452                return True;
   6453             } else {
   6454                /* VRSHRN   */
   6455                IROp addOp, shOp, narOp;
   6456                IRExpr *imm_val;
   6457                reg_m = newTemp(Ity_V128);
   6458                assign(reg_m, getQReg(mreg));
   6459                res = newTemp(Ity_I64);
   6460                imm = 1L;
   6461                switch (size) {
   6462                   case 0: imm = (imm <<  8) | imm; /* fall through */
   6463                   case 1: imm = (imm << 16) | imm; /* fall through */
   6464                   case 2: imm = (imm << 32) | imm; /* fall through */
   6465                   case 3: break;
   6466                   default: vassert(0);
   6467                }
   6468                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
   6469                switch (size) {
   6470                   case 1:
   6471                      addOp = Iop_Add16x8;
   6472                      shOp = Iop_ShrN16x8;
   6473                      narOp = Iop_NarrowUn16to8x8;
   6474                      break;
   6475                   case 2:
   6476                      addOp = Iop_Add32x4;
   6477                      shOp = Iop_ShrN32x4;
   6478                      narOp = Iop_NarrowUn32to16x4;
   6479                      break;
   6480                   case 3:
   6481                      addOp = Iop_Add64x2;
   6482                      shOp = Iop_ShrN64x2;
   6483                      narOp = Iop_NarrowUn64to32x2;
   6484                      break;
   6485                   default:
   6486                      vassert(0);
   6487                }
   6488                assign(res, unop(narOp,
   6489                                 binop(addOp,
   6490                                       binop(shOp,
   6491                                             mkexpr(reg_m),
   6492                                             mkU8(shift_imm)),
   6493                                       binop(Iop_AndV128,
   6494                                             binop(shOp,
   6495                                                   mkexpr(reg_m),
   6496                                                   mkU8(shift_imm - 1)),
   6497                                             imm_val))));
   6498                putDRegI64(dreg, mkexpr(res), condT);
   6499                if (shift_imm == 0) {
   6500                   DIP("vmov%d d%u, q%u, #%u\n", 8 << size, dreg, mreg,
   6501                       shift_imm);
   6502                } else {
   6503                   DIP("vrshrn.i%d d%u, q%u, #%u\n", 8 << size, dreg, mreg,
   6504                       shift_imm);
   6505                }
   6506                return True;
   6507             }
   6508          } else {
   6509             /* fall through */
   6510          }
   6511       case 9:
   6512          dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
   6513          mreg = ((theInstr >>  1) & 0x10) | (theInstr & 0xF);
   6514          if (mreg & 1)
   6515             return False;
   6516          mreg >>= 1;
   6517          size++;
   6518          if ((theInstr >> 8) & 1) {
   6519             switch (size) {
   6520                case 1:
   6521                   op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
   6522                   cvt = U ? Iop_QNarrowUn16Uto8Ux8 : Iop_QNarrowUn16Sto8Sx8;
   6523                   cvt2 = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
   6524                   break;
   6525                case 2:
   6526                   op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
   6527                   cvt = U ? Iop_QNarrowUn32Uto16Ux4 : Iop_QNarrowUn32Sto16Sx4;
   6528                   cvt2 = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
   6529                   break;
   6530                case 3:
   6531                   op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
   6532                   cvt = U ? Iop_QNarrowUn64Uto32Ux2 : Iop_QNarrowUn64Sto32Sx2;
   6533                   cvt2 = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
   6534                   break;
   6535                default:
   6536                   vassert(0);
   6537             }
   6538             DIP("vq%sshrn.%c%d d%u, q%u, #%u\n", B ? "r" : "",
   6539                 U ? 'u' : 's', 8 << size, dreg, mreg, shift_imm);
   6540          } else {
   6541             vassert(U);
   6542             switch (size) {
   6543                case 1:
   6544                   op = Iop_SarN16x8;
   6545                   cvt = Iop_QNarrowUn16Sto8Ux8;
   6546                   cvt2 = Iop_Widen8Uto16x8;
   6547                   break;
   6548                case 2:
   6549                   op = Iop_SarN32x4;
   6550                   cvt = Iop_QNarrowUn32Sto16Ux4;
   6551                   cvt2 = Iop_Widen16Uto32x4;
   6552                   break;
   6553                case 3:
   6554                   op = Iop_SarN64x2;
   6555                   cvt = Iop_QNarrowUn64Sto32Ux2;
   6556                   cvt2 = Iop_Widen32Uto64x2;
   6557                   break;
   6558                default:
   6559                   vassert(0);
   6560             }
   6561             DIP("vq%sshrun.s%d d%u, q%u, #%u\n", B ? "r" : "",
   6562                 8 << size, dreg, mreg, shift_imm);
   6563          }
   6564          if (B) {
   6565             if (shift_imm > 0) {
   6566                imm = 1;
   6567                switch (size) {
   6568                   case 1: imm = (imm << 16) | imm; /* fall through */
   6569                   case 2: imm = (imm << 32) | imm; /* fall through */
   6570                   case 3: break;
   6571                   case 0: default: vassert(0);
   6572                }
   6573                switch (size) {
   6574                   case 1: add = Iop_Add16x8; break;
   6575                   case 2: add = Iop_Add32x4; break;
   6576                   case 3: add = Iop_Add64x2; break;
   6577                   case 0: default: vassert(0);
   6578                }
   6579             }
   6580          }
   6581          reg_m = newTemp(Ity_V128);
   6582          res = newTemp(Ity_V128);
   6583          assign(reg_m, getQReg(mreg));
   6584          if (B) {
   6585             /* VQRSHRN, VQRSHRUN */
   6586             assign(res, binop(add,
   6587                               binop(op, mkexpr(reg_m), mkU8(shift_imm)),
   6588                               binop(Iop_AndV128,
   6589                                     binop(op,
   6590                                           mkexpr(reg_m),
   6591                                           mkU8(shift_imm - 1)),
   6592                                     mkU128(imm))));
   6593          } else {
   6594             /* VQSHRN, VQSHRUN */
   6595             assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
   6596          }
   6597          setFlag_QC(unop(cvt2, unop(cvt, mkexpr(res))), mkexpr(res),
   6598                     True, condT);
   6599          putDRegI64(dreg, unop(cvt, mkexpr(res)), condT);
   6600          return True;
   6601       case 10:
   6602          /* VSHLL
   6603             VMOVL ::= VSHLL #0 */
   6604          if (B)
   6605             return False;
   6606          if (dreg & 1)
   6607             return False;
   6608          dreg >>= 1;
   6609          shift_imm = (8 << size) - shift_imm;
   6610          res = newTemp(Ity_V128);
   6611          switch (size) {
   6612             case 0:
   6613                op = Iop_ShlN16x8;
   6614                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
   6615                break;
   6616             case 1:
   6617                op = Iop_ShlN32x4;
   6618                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
   6619                break;
   6620             case 2:
   6621                op = Iop_ShlN64x2;
   6622                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
   6623                break;
   6624             case 3:
   6625                return False;
   6626             default:
   6627                vassert(0);
   6628          }
   6629          assign(res, binop(op, unop(cvt, getDRegI64(mreg)), mkU8(shift_imm)));
   6630          putQReg(dreg, mkexpr(res), condT);
   6631          if (shift_imm == 0) {
   6632             DIP("vmovl.%c%d q%u, d%u\n", U ? 'u' : 's', 8 << size,
   6633                 dreg, mreg);
   6634          } else {
   6635             DIP("vshll.%c%d q%u, d%u, #%u\n", U ? 'u' : 's', 8 << size,
   6636                 dreg, mreg, shift_imm);
   6637          }
   6638          return True;
   6639       case 14:
   6640       case 15:
   6641          /* VCVT floating-point <-> fixed-point */
   6642          if ((theInstr >> 8) & 1) {
   6643             if (U) {
   6644                op = Q ? Iop_F32ToFixed32Ux4_RZ : Iop_F32ToFixed32Ux2_RZ;
   6645             } else {
   6646                op = Q ? Iop_F32ToFixed32Sx4_RZ : Iop_F32ToFixed32Sx2_RZ;
   6647             }
   6648             DIP("vcvt.%c32.f32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
   6649                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
   6650                 64 - ((theInstr >> 16) & 0x3f));
   6651          } else {
   6652             if (U) {
   6653                op = Q ? Iop_Fixed32UToF32x4_RN : Iop_Fixed32UToF32x2_RN;
   6654             } else {
   6655                op = Q ? Iop_Fixed32SToF32x4_RN : Iop_Fixed32SToF32x2_RN;
   6656             }
   6657             DIP("vcvt.f32.%c32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
   6658                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
   6659                 64 - ((theInstr >> 16) & 0x3f));
   6660          }
   6661          if (((theInstr >> 21) & 1) == 0)
   6662             return False;
   6663          if (Q) {
   6664             putQReg(dreg, binop(op, getQReg(mreg),
   6665                      mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
   6666          } else {
   6667             putDRegI64(dreg, binop(op, getDRegI64(mreg),
   6668                        mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
   6669          }
   6670          return True;
   6671       default:
   6672          return False;
   6673 
   6674    }
   6675    return False;
   6676 }
   6677 
   6678 /* A7.4.5 Two registers, miscellaneous */
   6679 static
   6680 Bool dis_neon_data_2reg_misc ( UInt theInstr, IRTemp condT )
   6681 {
   6682    UInt A = (theInstr >> 16) & 3;
   6683    UInt B = (theInstr >> 6) & 0x1f;
   6684    UInt Q = (theInstr >> 6) & 1;
   6685    UInt U = (theInstr >> 24) & 1;
   6686    UInt size = (theInstr >> 18) & 3;
   6687    UInt dreg = get_neon_d_regno(theInstr);
   6688    UInt mreg = get_neon_m_regno(theInstr);
   6689    UInt F = (theInstr >> 10) & 1;
   6690    IRTemp arg_d = IRTemp_INVALID;
   6691    IRTemp arg_m = IRTemp_INVALID;
   6692    IRTemp res = IRTemp_INVALID;
   6693    switch (A) {
   6694       case 0:
   6695          if (Q) {
   6696             arg_m = newTemp(Ity_V128);
   6697             res = newTemp(Ity_V128);
   6698             assign(arg_m, getQReg(mreg));
   6699          } else {
   6700             arg_m = newTemp(Ity_I64);
   6701             res = newTemp(Ity_I64);
   6702             assign(arg_m, getDRegI64(mreg));
   6703          }
   6704          switch (B >> 1) {
   6705             case 0: {
   6706                /* VREV64 */
   6707                IROp op;
   6708                switch (size) {
   6709                   case 0:
   6710                      op = Q ? Iop_Reverse8sIn64_x2 : Iop_Reverse8sIn64_x1;
   6711                      break;
   6712                   case 1:
   6713                      op = Q ? Iop_Reverse16sIn64_x2 : Iop_Reverse16sIn64_x1;
   6714                      break;
   6715                   case 2:
   6716                      op = Q ? Iop_Reverse32sIn64_x2 : Iop_Reverse32sIn64_x1;
   6717                      break;
   6718                   case 3:
   6719                      return False;
   6720                   default:
   6721                      vassert(0);
   6722                }
   6723                assign(res, unop(op, mkexpr(arg_m)));
   6724                DIP("vrev64.%d %c%u, %c%u\n", 8 << size,
   6725                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   6726                break;
   6727             }
   6728             case 1: {
   6729                /* VREV32 */
   6730                IROp op;
   6731                switch (size) {
   6732                   case 0:
   6733                      op = Q ? Iop_Reverse8sIn32_x4 : Iop_Reverse8sIn32_x2;
   6734                      break;
   6735                   case 1:
   6736                      op = Q ? Iop_Reverse16sIn32_x4 : Iop_Reverse16sIn32_x2;
   6737                      break;
   6738                   case 2:
   6739                   case 3:
   6740                      return False;
   6741                   default:
   6742                      vassert(0);
   6743                }
   6744                assign(res, unop(op, mkexpr(arg_m)));
   6745                DIP("vrev32.%d %c%u, %c%u\n", 8 << size,
   6746                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   6747                break;
   6748             }
   6749             case 2: {
   6750                /* VREV16 */
   6751                IROp op;
   6752                switch (size) {
   6753                   case 0:
   6754                      op = Q ? Iop_Reverse8sIn16_x8 : Iop_Reverse8sIn16_x4;
   6755                      break;
   6756                   case 1:
   6757                   case 2:
   6758                   case 3:
   6759                      return False;
   6760                   default:
   6761                      vassert(0);
   6762                }
   6763                assign(res, unop(op, mkexpr(arg_m)));
   6764                DIP("vrev16.%d %c%u, %c%u\n", 8 << size,
   6765                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   6766                break;
   6767             }
   6768             case 3:
   6769                return False;
   6770             case 4:
   6771             case 5: {
   6772                /* VPADDL */
   6773                IROp op;
   6774                U = (theInstr >> 7) & 1;
   6775                if (Q) {
   6776                   switch (size) {
   6777                      case 0: op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16; break;
   6778                      case 1: op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8; break;
   6779                      case 2: op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4; break;
   6780                      case 3: return False;
   6781                      default: vassert(0);
   6782                   }
   6783                } else {
   6784                   switch (size) {
   6785                      case 0: op = U ? Iop_PwAddL8Ux8  : Iop_PwAddL8Sx8;  break;
   6786                      case 1: op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4; break;
   6787                      case 2: op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2; break;
   6788                      case 3: return False;
   6789                      default: vassert(0);
   6790                   }
   6791                }
   6792                assign(res, unop(op, mkexpr(arg_m)));
   6793                DIP("vpaddl.%c%d %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
   6794                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   6795                break;
   6796             }
   6797             case 6:
   6798             case 7:
   6799                return False;
   6800             case 8: {
   6801                /* VCLS */
   6802                IROp op;
   6803                switch (size) {
   6804                   case 0: op = Q ? Iop_Cls8x16 : Iop_Cls8x8; break;
   6805                   case 1: op = Q ? Iop_Cls16x8 : Iop_Cls16x4; break;
   6806                   case 2: op = Q ? Iop_Cls32x4 : Iop_Cls32x2; break;
   6807                   case 3: return False;
   6808                   default: vassert(0);
   6809                }
   6810                assign(res, unop(op, mkexpr(arg_m)));
   6811                DIP("vcls.s%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
   6812                    Q ? 'q' : 'd', mreg);
   6813                break;
   6814             }
   6815             case 9: {
   6816                /* VCLZ */
   6817                IROp op;
   6818                switch (size) {
   6819                   case 0: op = Q ? Iop_Clz8x16 : Iop_Clz8x8; break;
   6820                   case 1: op = Q ? Iop_Clz16x8 : Iop_Clz16x4; break;
   6821                   case 2: op = Q ? Iop_Clz32x4 : Iop_Clz32x2; break;
   6822                   case 3: return False;
   6823                   default: vassert(0);
   6824                }
   6825                assign(res, unop(op, mkexpr(arg_m)));
   6826                DIP("vclz.i%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
   6827                    Q ? 'q' : 'd', mreg);
   6828                break;
   6829             }
   6830             case 10:
   6831                /* VCNT */
   6832                assign(res, unop(Q ? Iop_Cnt8x16 : Iop_Cnt8x8, mkexpr(arg_m)));
   6833                DIP("vcnt.8 %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
   6834                    mreg);
   6835                break;
   6836             case 11:
   6837                /* VMVN */
   6838                if (Q)
   6839                   assign(res, unop(Iop_NotV128, mkexpr(arg_m)));
   6840                else
   6841                   assign(res, unop(Iop_Not64, mkexpr(arg_m)));
   6842                DIP("vmvn %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
   6843                    mreg);
   6844                break;
   6845             case 12:
   6846             case 13: {
   6847                /* VPADAL */
   6848                IROp op, add_op;
   6849                U = (theInstr >> 7) & 1;
   6850                if (Q) {
   6851                   switch (size) {
   6852                      case 0:
   6853                         op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16;
   6854                         add_op = Iop_Add16x8;
   6855                         break;
   6856                      case 1:
   6857                         op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8;
   6858                         add_op = Iop_Add32x4;
   6859                         break;
   6860                      case 2:
   6861                         op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4;
   6862                         add_op = Iop_Add64x2;
   6863                         break;
   6864                      case 3:
   6865                         return False;
   6866                      default:
   6867                         vassert(0);
   6868                   }
   6869                } else {
   6870                   switch (size) {
   6871                      case 0:
   6872                         op = U ? Iop_PwAddL8Ux8 : Iop_PwAddL8Sx8;
   6873                         add_op = Iop_Add16x4;
   6874                         break;
   6875                      case 1:
   6876                         op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4;
   6877                         add_op = Iop_Add32x2;
   6878                         break;
   6879                      case 2:
   6880                         op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2;
   6881                         add_op = Iop_Add64;
   6882                         break;
   6883                      case 3:
   6884                         return False;
   6885                      default:
   6886                         vassert(0);
   6887                   }
   6888                }
   6889                if (Q) {
   6890                   arg_d = newTemp(Ity_V128);
   6891                   assign(arg_d, getQReg(dreg));
   6892                } else {
   6893                   arg_d = newTemp(Ity_I64);
   6894                   assign(arg_d, getDRegI64(dreg));
   6895                }
   6896                assign(res, binop(add_op, unop(op, mkexpr(arg_m)),
   6897                                          mkexpr(arg_d)));
   6898                DIP("vpadal.%c%d %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
   6899                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   6900                break;
   6901             }
   6902             case 14: {
   6903                /* VQABS */
   6904                IROp op_sub, op_qsub, op_cmp;
   6905                IRTemp mask, tmp;
   6906                IRExpr *zero1, *zero2;
   6907                IRExpr *neg, *neg2;
   6908                if (Q) {
   6909                   zero1 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   6910                   zero2 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   6911                   mask = newTemp(Ity_V128);
   6912                   tmp = newTemp(Ity_V128);
   6913                } else {
   6914                   zero1 = mkU64(0);
   6915                   zero2 = mkU64(0);
   6916                   mask = newTemp(Ity_I64);
   6917                   tmp = newTemp(Ity_I64);
   6918                }
   6919                switch (size) {
   6920                   case 0:
   6921                      op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   6922                      op_qsub = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
   6923                      op_cmp = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
   6924                      break;
   6925                   case 1:
   6926                      op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   6927                      op_qsub = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
   6928                      op_cmp = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4;
   6929                      break;
   6930                   case 2:
   6931                      op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   6932                      op_qsub = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
   6933                      op_cmp = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2;
   6934                      break;
   6935                   case 3:
   6936                      return False;
   6937                   default:
   6938                      vassert(0);
   6939                }
   6940                assign(mask, binop(op_cmp, mkexpr(arg_m), zero1));
   6941                neg = binop(op_qsub, zero2, mkexpr(arg_m));
   6942                neg2 = binop(op_sub, zero2, mkexpr(arg_m));
   6943                assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
   6944                                  binop(Q ? Iop_AndV128 : Iop_And64,
   6945                                        mkexpr(mask),
   6946                                        mkexpr(arg_m)),
   6947                                  binop(Q ? Iop_AndV128 : Iop_And64,
   6948                                        unop(Q ? Iop_NotV128 : Iop_Not64,
   6949                                             mkexpr(mask)),
   6950                                        neg)));
   6951                assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
   6952                                  binop(Q ? Iop_AndV128 : Iop_And64,
   6953                                        mkexpr(mask),
   6954                                        mkexpr(arg_m)),
   6955                                  binop(Q ? Iop_AndV128 : Iop_And64,
   6956                                        unop(Q ? Iop_NotV128 : Iop_Not64,
   6957                                             mkexpr(mask)),
   6958                                        neg2)));
   6959                setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
   6960                DIP("vqabs.s%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
   6961                    Q ? 'q' : 'd', mreg);
   6962                break;
   6963             }
   6964             case 15: {
   6965                /* VQNEG */
   6966                IROp op, op2;
   6967                IRExpr *zero;
   6968                if (Q) {
   6969                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   6970                } else {
   6971                   zero = mkU64(0);
   6972                }
   6973                switch (size) {
   6974                   case 0:
   6975                      op = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
   6976                      op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   6977                      break;
   6978                   case 1:
   6979                      op = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
   6980                      op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   6981                      break;
   6982                   case 2:
   6983                      op = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
   6984                      op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   6985                      break;
   6986                   case 3:
   6987                      return False;
   6988                   default:
   6989                      vassert(0);
   6990                }
   6991                assign(res, binop(op, zero, mkexpr(arg_m)));
   6992                setFlag_QC(mkexpr(res), binop(op2, zero, mkexpr(arg_m)),
   6993                           Q, condT);
   6994                DIP("vqneg.s%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
   6995                    Q ? 'q' : 'd', mreg);
   6996                break;
   6997             }
   6998             default:
   6999                vassert(0);
   7000          }
   7001          if (Q) {
   7002             putQReg(dreg, mkexpr(res), condT);
   7003          } else {
   7004             putDRegI64(dreg, mkexpr(res), condT);
   7005          }
   7006          return True;
   7007       case 1:
   7008          if (Q) {
   7009             arg_m = newTemp(Ity_V128);
   7010             res = newTemp(Ity_V128);
   7011             assign(arg_m, getQReg(mreg));
   7012          } else {
   7013             arg_m = newTemp(Ity_I64);
   7014             res = newTemp(Ity_I64);
   7015             assign(arg_m, getDRegI64(mreg));
   7016          }
   7017          switch ((B >> 1) & 0x7) {
   7018             case 0: {
   7019                /* VCGT #0 */
   7020                IRExpr *zero;
   7021                IROp op;
   7022                if (Q) {
   7023                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   7024                } else {
   7025                   zero = mkU64(0);
   7026                }
   7027                if (F) {
   7028                   switch (size) {
   7029                      case 0: case 1: case 3: return False;
   7030                      case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
   7031                      default: vassert(0);
   7032                   }
   7033                } else {
   7034                   switch (size) {
   7035                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
   7036                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
   7037                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
   7038                      case 3: return False;
   7039                      default: vassert(0);
   7040                   }
   7041                }
   7042                assign(res, binop(op, mkexpr(arg_m), zero));
   7043                DIP("vcgt.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
   7044                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7045                break;
   7046             }
   7047             case 1: {
   7048                /* VCGE #0 */
   7049                IROp op;
   7050                IRExpr *zero;
   7051                if (Q) {
   7052                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   7053                } else {
   7054                   zero = mkU64(0);
   7055                }
   7056                if (F) {
   7057                   switch (size) {
   7058                      case 0: case 1: case 3: return False;
   7059                      case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
   7060                      default: vassert(0);
   7061                   }
   7062                   assign(res, binop(op, mkexpr(arg_m), zero));
   7063                } else {
   7064                   switch (size) {
   7065                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
   7066                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
   7067                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
   7068                      case 3: return False;
   7069                      default: vassert(0);
   7070                   }
   7071                   assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
   7072                                    binop(op, zero, mkexpr(arg_m))));
   7073                }
   7074                DIP("vcge.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
   7075                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7076                break;
   7077             }
   7078             case 2: {
   7079                /* VCEQ #0 */
   7080                IROp op;
   7081                IRExpr *zero;
   7082                if (F) {
   7083                   if (Q) {
   7084                      zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   7085                   } else {
   7086                      zero = mkU64(0);
   7087                   }
   7088                   switch (size) {
   7089                      case 0: case 1: case 3: return False;
   7090                      case 2: op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2; break;
   7091                      default: vassert(0);
   7092                   }
   7093                   assign(res, binop(op, zero, mkexpr(arg_m)));
   7094                } else {
   7095                   switch (size) {
   7096                      case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
   7097                      case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
   7098                      case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
   7099                      case 3: return False;
   7100                      default: vassert(0);
   7101                   }
   7102                   assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
   7103                                    unop(op, mkexpr(arg_m))));
   7104                }
   7105                DIP("vceq.%c%d %c%u, %c%u, #0\n", F ? 'f' : 'i', 8 << size,
   7106                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7107                break;
   7108             }
   7109             case 3: {
   7110                /* VCLE #0 */
   7111                IRExpr *zero;
   7112                IROp op;
   7113                if (Q) {
   7114                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   7115                } else {
   7116                   zero = mkU64(0);
   7117                }
   7118                if (F) {
   7119                   switch (size) {
   7120                      case 0: case 1: case 3: return False;
   7121                      case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
   7122                      default: vassert(0);
   7123                   }
   7124                   assign(res, binop(op, zero, mkexpr(arg_m)));
   7125                } else {
   7126                   switch (size) {
   7127                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
   7128                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
   7129                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
   7130                      case 3: return False;
   7131                      default: vassert(0);
   7132                   }
   7133                   assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
   7134                                    binop(op, mkexpr(arg_m), zero)));
   7135                }
   7136                DIP("vcle.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
   7137                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7138                break;
   7139             }
   7140             case 4: {
   7141                /* VCLT #0 */
   7142                IROp op;
   7143                IRExpr *zero;
   7144                if (Q) {
   7145                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   7146                } else {
   7147                   zero = mkU64(0);
   7148                }
   7149                if (F) {
   7150                   switch (size) {
   7151                      case 0: case 1: case 3: return False;
   7152                      case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
   7153                      default: vassert(0);
   7154                   }
   7155                   assign(res, binop(op, zero, mkexpr(arg_m)));
   7156                } else {
   7157                   switch (size) {
   7158                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
   7159                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
   7160                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
   7161                      case 3: return False;
   7162                      default: vassert(0);
   7163                   }
   7164                   assign(res, binop(op, zero, mkexpr(arg_m)));
   7165                }
   7166                DIP("vclt.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
   7167                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7168                break;
   7169             }
   7170             case 5:
   7171                return False;
   7172             case 6: {
   7173                /* VABS */
   7174                if (!F) {
   7175                   IROp op;
   7176                   switch(size) {
   7177                      case 0: op = Q ? Iop_Abs8x16 : Iop_Abs8x8; break;
   7178                      case 1: op = Q ? Iop_Abs16x8 : Iop_Abs16x4; break;
   7179                      case 2: op = Q ? Iop_Abs32x4 : Iop_Abs32x2; break;
   7180                      case 3: return False;
   7181                      default: vassert(0);
   7182                   }
   7183                   assign(res, unop(op, mkexpr(arg_m)));
   7184                } else {
   7185                   assign(res, unop(Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2,
   7186                                    mkexpr(arg_m)));
   7187                }
   7188                DIP("vabs.%c%d %c%u, %c%u\n",
   7189                    F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
   7190                    Q ? 'q' : 'd', mreg);
   7191                break;
   7192             }
   7193             case 7: {
   7194                /* VNEG */
   7195                IROp op;
   7196                IRExpr *zero;
   7197                if (F) {
   7198                   switch (size) {
   7199                      case 0: case 1: case 3: return False;
   7200                      case 2: op = Q ? Iop_Neg32Fx4 : Iop_Neg32Fx2; break;
   7201                      default: vassert(0);
   7202                   }
   7203                   assign(res, unop(op, mkexpr(arg_m)));
   7204                } else {
   7205                   if (Q) {
   7206                      zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
   7207                   } else {
   7208                      zero = mkU64(0);
   7209                   }
   7210                   switch (size) {
   7211                      case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
   7212                      case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
   7213                      case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
   7214                      case 3: return False;
   7215                      default: vassert(0);
   7216                   }
   7217                   assign(res, binop(op, zero, mkexpr(arg_m)));
   7218                }
   7219                DIP("vneg.%c%d %c%u, %c%u\n",
   7220                    F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
   7221                    Q ? 'q' : 'd', mreg);
   7222                break;
   7223             }
   7224             default:
   7225                vassert(0);
   7226          }
   7227          if (Q) {
   7228             putQReg(dreg, mkexpr(res), condT);
   7229          } else {
   7230             putDRegI64(dreg, mkexpr(res), condT);
   7231          }
   7232          return True;
   7233       case 2:
   7234          if ((B >> 1) == 0) {
   7235             /* VSWP */
   7236             if (Q) {
   7237                arg_m = newTemp(Ity_V128);
   7238                assign(arg_m, getQReg(mreg));
   7239                putQReg(mreg, getQReg(dreg), condT);
   7240                putQReg(dreg, mkexpr(arg_m), condT);
   7241             } else {
   7242                arg_m = newTemp(Ity_I64);
   7243                assign(arg_m, getDRegI64(mreg));
   7244                putDRegI64(mreg, getDRegI64(dreg), condT);
   7245                putDRegI64(dreg, mkexpr(arg_m), condT);
   7246             }
   7247             DIP("vswp %c%u, %c%u\n",
   7248                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7249             return True;
   7250          } else if ((B >> 1) == 1) {
   7251             /* VTRN */
   7252             IROp op_odd = Iop_INVALID, op_even = Iop_INVALID;
   7253             IRTemp old_m, old_d, new_d, new_m;
   7254             if (Q) {
   7255                old_m = newTemp(Ity_V128);
   7256                old_d = newTemp(Ity_V128);
   7257                new_m = newTemp(Ity_V128);
   7258                new_d = newTemp(Ity_V128);
   7259                assign(old_m, getQReg(mreg));
   7260                assign(old_d, getQReg(dreg));
   7261             } else {
   7262                old_m = newTemp(Ity_I64);
   7263                old_d = newTemp(Ity_I64);
   7264                new_m = newTemp(Ity_I64);
   7265                new_d = newTemp(Ity_I64);
   7266                assign(old_m, getDRegI64(mreg));
   7267                assign(old_d, getDRegI64(dreg));
   7268             }
   7269             if (Q) {
   7270                switch (size) {
   7271                   case 0:
   7272                      op_odd  = Iop_InterleaveOddLanes8x16;
   7273                      op_even = Iop_InterleaveEvenLanes8x16;
   7274                      break;
   7275                   case 1:
   7276                      op_odd  = Iop_InterleaveOddLanes16x8;
   7277                      op_even = Iop_InterleaveEvenLanes16x8;
   7278                      break;
   7279                   case 2:
   7280                      op_odd  = Iop_InterleaveOddLanes32x4;
   7281                      op_even = Iop_InterleaveEvenLanes32x4;
   7282                      break;
   7283                   case 3:
   7284                      return False;
   7285                   default:
   7286                      vassert(0);
   7287                }
   7288             } else {
   7289                switch (size) {
   7290                   case 0:
   7291                      op_odd  = Iop_InterleaveOddLanes8x8;
   7292                      op_even = Iop_InterleaveEvenLanes8x8;
   7293                      break;
   7294                   case 1:
   7295                      op_odd  = Iop_InterleaveOddLanes16x4;
   7296                      op_even = Iop_InterleaveEvenLanes16x4;
   7297                      break;
   7298                   case 2:
   7299                      op_odd  = Iop_InterleaveHI32x2;
   7300                      op_even = Iop_InterleaveLO32x2;
   7301                      break;
   7302                   case 3:
   7303                      return False;
   7304                   default:
   7305                      vassert(0);
   7306                }
   7307             }
   7308             assign(new_d, binop(op_even, mkexpr(old_m), mkexpr(old_d)));
   7309             assign(new_m, binop(op_odd, mkexpr(old_m), mkexpr(old_d)));
   7310             if (Q) {
   7311                putQReg(dreg, mkexpr(new_d), condT);
   7312                putQReg(mreg, mkexpr(new_m), condT);
   7313             } else {
   7314                putDRegI64(dreg, mkexpr(new_d), condT);
   7315                putDRegI64(mreg, mkexpr(new_m), condT);
   7316             }
   7317             DIP("vtrn.%d %c%u, %c%u\n",
   7318                 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7319             return True;
   7320          } else if ((B >> 1) == 2) {
   7321             /* VUZP */
   7322             IROp op_even, op_odd;
   7323             IRTemp old_m, old_d, new_m, new_d;
   7324             if (!Q && size == 2)
   7325                return False;
   7326             if (Q) {
   7327                old_m = newTemp(Ity_V128);
   7328                old_d = newTemp(Ity_V128);
   7329                new_m = newTemp(Ity_V128);
   7330                new_d = newTemp(Ity_V128);
   7331                assign(old_m, getQReg(mreg));
   7332                assign(old_d, getQReg(dreg));
   7333             } else {
   7334                old_m = newTemp(Ity_I64);
   7335                old_d = newTemp(Ity_I64);
   7336                new_m = newTemp(Ity_I64);
   7337                new_d = newTemp(Ity_I64);
   7338                assign(old_m, getDRegI64(mreg));
   7339                assign(old_d, getDRegI64(dreg));
   7340             }
   7341             switch (size) {
   7342                case 0:
   7343                   op_odd  = Q ? Iop_CatOddLanes8x16 : Iop_CatOddLanes8x8;
   7344                   op_even = Q ? Iop_CatEvenLanes8x16 : Iop_CatEvenLanes8x8;
   7345                   break;
   7346                case 1:
   7347                   op_odd  = Q ? Iop_CatOddLanes16x8 : Iop_CatOddLanes16x4;
   7348                   op_even = Q ? Iop_CatEvenLanes16x8 : Iop_CatEvenLanes16x4;
   7349                   break;
   7350                case 2:
   7351                   op_odd  = Iop_CatOddLanes32x4;
   7352                   op_even = Iop_CatEvenLanes32x4;
   7353                   break;
   7354                case 3:
   7355                   return False;
   7356                default:
   7357                   vassert(0);
   7358             }
   7359             assign(new_d, binop(op_even, mkexpr(old_m), mkexpr(old_d)));
   7360             assign(new_m, binop(op_odd,  mkexpr(old_m), mkexpr(old_d)));
   7361             if (Q) {
   7362                putQReg(dreg, mkexpr(new_d), condT);
   7363                putQReg(mreg, mkexpr(new_m), condT);
   7364             } else {
   7365                putDRegI64(dreg, mkexpr(new_d), condT);
   7366                putDRegI64(mreg, mkexpr(new_m), condT);
   7367             }
   7368             DIP("vuzp.%d %c%u, %c%u\n",
   7369                 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7370             return True;
   7371          } else if ((B >> 1) == 3) {
   7372             /* VZIP */
   7373             IROp op_lo, op_hi;
   7374             IRTemp old_m, old_d, new_m, new_d;
   7375             if (!Q && size == 2)
   7376                return False;
   7377             if (Q) {
   7378                old_m = newTemp(Ity_V128);
   7379                old_d = newTemp(Ity_V128);
   7380                new_m = newTemp(Ity_V128);
   7381                new_d = newTemp(Ity_V128);
   7382                assign(old_m, getQReg(mreg));
   7383                assign(old_d, getQReg(dreg));
   7384             } else {
   7385                old_m = newTemp(Ity_I64);
   7386                old_d = newTemp(Ity_I64);
   7387                new_m = newTemp(Ity_I64);
   7388                new_d = newTemp(Ity_I64);
   7389                assign(old_m, getDRegI64(mreg));
   7390                assign(old_d, getDRegI64(dreg));
   7391             }
   7392             switch (size) {
   7393                case 0:
   7394                   op_hi = Q ? Iop_InterleaveHI8x16 : Iop_InterleaveHI8x8;
   7395                   op_lo = Q ? Iop_InterleaveLO8x16 : Iop_InterleaveLO8x8;
   7396                   break;
   7397                case 1:
   7398                   op_hi = Q ? Iop_InterleaveHI16x8 : Iop_InterleaveHI16x4;
   7399                   op_lo = Q ? Iop_InterleaveLO16x8 : Iop_InterleaveLO16x4;
   7400                   break;
   7401                case 2:
   7402                   op_hi = Iop_InterleaveHI32x4;
   7403                   op_lo = Iop_InterleaveLO32x4;
   7404                   break;
   7405                case 3:
   7406                   return False;
   7407                default:
   7408                   vassert(0);
   7409             }
   7410             assign(new_d, binop(op_lo, mkexpr(old_m), mkexpr(old_d)));
   7411             assign(new_m, binop(op_hi, mkexpr(old_m), mkexpr(old_d)));
   7412             if (Q) {
   7413                putQReg(dreg, mkexpr(new_d), condT);
   7414                putQReg(mreg, mkexpr(new_m), condT);
   7415             } else {
   7416                putDRegI64(dreg, mkexpr(new_d), condT);
   7417                putDRegI64(mreg, mkexpr(new_m), condT);
   7418             }
   7419             DIP("vzip.%d %c%u, %c%u\n",
   7420                 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7421             return True;
   7422          } else if (B == 8) {
   7423             /* VMOVN */
   7424             IROp op;
   7425             mreg >>= 1;
   7426             switch (size) {
   7427                case 0: op = Iop_NarrowUn16to8x8;  break;
   7428                case 1: op = Iop_NarrowUn32to16x4; break;
   7429                case 2: op = Iop_NarrowUn64to32x2; break;
   7430                case 3: return False;
   7431                default: vassert(0);
   7432             }
   7433             putDRegI64(dreg, unop(op, getQReg(mreg)), condT);
   7434             DIP("vmovn.i%d d%u, q%u\n", 16 << size, dreg, mreg);
   7435             return True;
   7436          } else if (B == 9 || (B >> 1) == 5) {
   7437             /* VQMOVN, VQMOVUN */
   7438             IROp op, op2;
   7439             IRTemp tmp;
   7440             dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
   7441             mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
   7442             if (mreg & 1)
   7443                return False;
   7444             mreg >>= 1;
   7445             switch (size) {
   7446                case 0: op2 = Iop_NarrowUn16to8x8;  break;
   7447                case 1: op2 = Iop_NarrowUn32to16x4; break;
   7448                case 2: op2 = Iop_NarrowUn64to32x2; break;
   7449                case 3: return False;
   7450                default: vassert(0);
   7451             }
   7452             switch (B & 3) {
   7453                case 0:
   7454                   vassert(0);
   7455                case 1:
   7456                   switch (size) {
   7457                      case 0: op = Iop_QNarrowUn16Sto8Ux8;  break;
   7458                      case 1: op = Iop_QNarrowUn32Sto16Ux4; break;
   7459                      case 2: op = Iop_QNarrowUn64Sto32Ux2; break;
   7460                      case 3: return False;
   7461                      default: vassert(0);
   7462                   }
   7463                   DIP("vqmovun.s%d d%u, q%u\n", 16 << size, dreg, mreg);
   7464                   break;
   7465                case 2:
   7466                   switch (size) {
   7467                      case 0: op = Iop_QNarrowUn16Sto8Sx8;  break;
   7468                      case 1: op = Iop_QNarrowUn32Sto16Sx4; break;
   7469                      case 2: op = Iop_QNarrowUn64Sto32Sx2; break;
   7470                      case 3: return False;
   7471                      default: vassert(0);
   7472                   }
   7473                   DIP("vqmovn.s%d d%u, q%u\n", 16 << size, dreg, mreg);
   7474                   break;
   7475                case 3:
   7476                   switch (size) {
   7477                      case 0: op = Iop_QNarrowUn16Uto8Ux8;  break;
   7478                      case 1: op = Iop_QNarrowUn32Uto16Ux4; break;
   7479                      case 2: op = Iop_QNarrowUn64Uto32Ux2; break;
   7480                      case 3: return False;
   7481                      default: vassert(0);
   7482                   }
   7483                   DIP("vqmovn.u%d d%u, q%u\n", 16 << size, dreg, mreg);
   7484                   break;
   7485                default:
   7486                   vassert(0);
   7487             }
   7488             res = newTemp(Ity_I64);
   7489             tmp = newTemp(Ity_I64);
   7490             assign(res, unop(op, getQReg(mreg)));
   7491             assign(tmp, unop(op2, getQReg(mreg)));
   7492             setFlag_QC(mkexpr(res), mkexpr(tmp), False, condT);
   7493             putDRegI64(dreg, mkexpr(res), condT);
   7494             return True;
   7495          } else if (B == 12) {
   7496             /* VSHLL (maximum shift) */
   7497             IROp op, cvt;
   7498             UInt shift_imm;
   7499             if (Q)
   7500                return False;
   7501             if (dreg & 1)
   7502                return False;
   7503             dreg >>= 1;
   7504             shift_imm = 8 << size;
   7505             res = newTemp(Ity_V128);
   7506             switch (size) {
   7507                case 0: op = Iop_ShlN16x8; cvt = Iop_Widen8Uto16x8;  break;
   7508                case 1: op = Iop_ShlN32x4; cvt = Iop_Widen16Uto32x4; break;
   7509                case 2: op = Iop_ShlN64x2; cvt = Iop_Widen32Uto64x2; break;
   7510                case 3: return False;
   7511                default: vassert(0);
   7512             }
   7513             assign(res, binop(op, unop(cvt, getDRegI64(mreg)),
   7514                                   mkU8(shift_imm)));
   7515             putQReg(dreg, mkexpr(res), condT);
   7516             DIP("vshll.i%d q%u, d%u, #%d\n", 8 << size, dreg, mreg, 8 << size);
   7517             return True;
   7518          } else if ((B >> 3) == 3 && (B & 3) == 0) {
   7519             /* VCVT (half<->single) */
   7520             /* Half-precision extensions are needed to run this */
   7521             vassert(0); // ATC
   7522             if (((theInstr >> 18) & 3) != 1)
   7523                return False;
   7524             if ((theInstr >> 8) & 1) {
   7525                if (dreg & 1)
   7526                   return False;
   7527                dreg >>= 1;
   7528                putQReg(dreg, unop(Iop_F16toF32x4, getDRegI64(mreg)),
   7529                      condT);
   7530                DIP("vcvt.f32.f16 q%u, d%u\n", dreg, mreg);
   7531             } else {
   7532                if (mreg & 1)
   7533                   return False;
   7534                mreg >>= 1;
   7535                putDRegI64(dreg, unop(Iop_F32toF16x4, getQReg(mreg)),
   7536                                 condT);
   7537                DIP("vcvt.f16.f32 d%u, q%u\n", dreg, mreg);
   7538             }
   7539             return True;
   7540          } else {
   7541             return False;
   7542          }
   7543          vassert(0);
   7544          return True;
   7545       case 3:
   7546          if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,0)) {
   7547             /* VRECPE */
   7548             IROp op;
   7549             F = (theInstr >> 8) & 1;
   7550             if (size != 2)
   7551                return False;
   7552             if (Q) {
   7553                op = F ? Iop_RecipEst32Fx4 : Iop_RecipEst32Ux4;
   7554                putQReg(dreg, unop(op, getQReg(mreg)), condT);
   7555                DIP("vrecpe.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
   7556             } else {
   7557                op = F ? Iop_RecipEst32Fx2 : Iop_RecipEst32Ux2;
   7558                putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
   7559                DIP("vrecpe.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
   7560             }
   7561             return True;
   7562          } else if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,1)) {
   7563             /* VRSQRTE */
   7564             IROp op;
   7565             F = (B >> 2) & 1;
   7566             if (size != 2)
   7567                return False;
   7568             if (F) {
   7569                /* fp */
   7570                op = Q ? Iop_RSqrtEst32Fx4 : Iop_RSqrtEst32Fx2;
   7571             } else {
   7572                /* unsigned int */
   7573                op = Q ? Iop_RSqrtEst32Ux4 : Iop_RSqrtEst32Ux2;
   7574             }
   7575             if (Q) {
   7576                putQReg(dreg, unop(op, getQReg(mreg)), condT);
   7577                DIP("vrsqrte.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
   7578             } else {
   7579                putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
   7580                DIP("vrsqrte.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
   7581             }
   7582             return True;
   7583          } else if ((B >> 3) == 3) {
   7584             /* VCVT (fp<->integer) */
   7585             IROp op;
   7586             if (size != 2)
   7587                return False;
   7588             switch ((B >> 1) & 3) {
   7589                case 0:
   7590                   op = Q ? Iop_I32StoFx4 : Iop_I32StoFx2;
   7591                   DIP("vcvt.f32.s32 %c%u, %c%u\n",
   7592                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7593                   break;
   7594                case 1:
   7595                   op = Q ? Iop_I32UtoFx4 : Iop_I32UtoFx2;
   7596                   DIP("vcvt.f32.u32 %c%u, %c%u\n",
   7597                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7598                   break;
   7599                case 2:
   7600                   op = Q ? Iop_FtoI32Sx4_RZ : Iop_FtoI32Sx2_RZ;
   7601                   DIP("vcvt.s32.f32 %c%u, %c%u\n",
   7602                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7603                   break;
   7604                case 3:
   7605                   op = Q ? Iop_FtoI32Ux4_RZ : Iop_FtoI32Ux2_RZ;
   7606                   DIP("vcvt.u32.f32 %c%u, %c%u\n",
   7607                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
   7608                   break;
   7609                default:
   7610                   vassert(0);
   7611             }
   7612             if (Q) {
   7613                putQReg(dreg, unop(op, getQReg(mreg)), condT);
   7614             } else {
   7615                putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
   7616             }
   7617             return True;
   7618          } else {
   7619             return False;
   7620          }
   7621          vassert(0);
   7622          return True;
   7623       default:
   7624          vassert(0);
   7625    }
   7626    return False;
   7627 }
   7628 
   7629 /* A7.4.6 One register and a modified immediate value */
   7630 static
   7631 void ppNeonImm(UInt imm, UInt cmode, UInt op)
   7632 {
   7633    int i;
   7634    switch (cmode) {
   7635       case 0: case 1: case 8: case 9:
   7636          vex_printf("0x%x", imm);
   7637          break;
   7638       case 2: case 3: case 10: case 11:
   7639          vex_printf("0x%x00", imm);
   7640          break;
   7641       case 4: case 5:
   7642          vex_printf("0x%x0000", imm);
   7643          break;
   7644       case 6: case 7:
   7645          vex_printf("0x%x000000", imm);
   7646          break;
   7647       case 12:
   7648          vex_printf("0x%xff", imm);
   7649          break;
   7650       case 13:
   7651          vex_printf("0x%xffff", imm);
   7652          break;
   7653       case 14:
   7654          if (op) {
   7655             vex_printf("0x");
   7656             for (i = 7; i >= 0; i--)
   7657                vex_printf("%s", (imm & (1 << i)) ? "ff" : "00");
   7658          } else {
   7659             vex_printf("0x%x", imm);
   7660          }
   7661          break;
   7662       case 15:
   7663          vex_printf("0x%x", imm);
   7664          break;
   7665    }
   7666 }
   7667 
   7668 static
   7669 const char *ppNeonImmType(UInt cmode, UInt op)
   7670 {
   7671    switch (cmode) {
   7672       case 0 ... 7:
   7673       case 12: case 13:
   7674          return "i32";
   7675       case 8 ... 11:
   7676          return "i16";
   7677       case 14:
   7678          if (op)
   7679             return "i64";
   7680          else
   7681             return "i8";
   7682       case 15:
   7683          if (op)
   7684             vassert(0);
   7685          else
   7686             return "f32";
   7687       default:
   7688          vassert(0);
   7689    }
   7690 }
   7691 
   7692 static
   7693 void DIPimm(UInt imm, UInt cmode, UInt op,
   7694             const char *instr, UInt Q, UInt dreg)
   7695 {
   7696    if (vex_traceflags & VEX_TRACE_FE) {
   7697       vex_printf("%s.%s %c%u, #", instr,
   7698                  ppNeonImmType(cmode, op), Q ? 'q' : 'd', dreg);
   7699       ppNeonImm(imm, cmode, op);
   7700       vex_printf("\n");
   7701    }
   7702 }
   7703 
   7704 static
   7705 Bool dis_neon_data_1reg_and_imm ( UInt theInstr, IRTemp condT )
   7706 {
   7707    UInt dreg = get_neon_d_regno(theInstr);
   7708    ULong imm_raw = ((theInstr >> 17) & 0x80) | ((theInstr >> 12) & 0x70) |
   7709                   (theInstr & 0xf);
   7710    ULong imm_raw_pp = imm_raw;
   7711    UInt cmode = (theInstr >> 8) & 0xf;
   7712    UInt op_bit = (theInstr >> 5) & 1;
   7713    ULong imm = 0;
   7714    UInt Q = (theInstr >> 6) & 1;
   7715    int i, j;
   7716    UInt tmp;
   7717    IRExpr *imm_val;
   7718    IRExpr *expr;
   7719    IRTemp tmp_var;
   7720    switch(cmode) {
   7721       case 7: case 6:
   7722          imm_raw = imm_raw << 8;
   7723          /* fallthrough */
   7724       case 5: case 4:
   7725          imm_raw = imm_raw << 8;
   7726          /* fallthrough */
   7727       case 3: case 2:
   7728          imm_raw = imm_raw << 8;
   7729          /* fallthrough */
   7730       case 0: case 1:
   7731          imm = (imm_raw << 32) | imm_raw;
   7732          break;
   7733       case 11: case 10:
   7734          imm_raw = imm_raw << 8;
   7735          /* fallthrough */
   7736       case 9: case 8:
   7737          imm_raw = (imm_raw << 16) | imm_raw;
   7738          imm = (imm_raw << 32) | imm_raw;
   7739          break;
   7740       case 13:
   7741          imm_raw = (imm_raw << 8) | 0xff;
   7742          /* fallthrough */
   7743       case 12:
   7744          imm_raw = (imm_raw << 8) | 0xff;
   7745          imm = (imm_raw << 32) | imm_raw;
   7746          break;
   7747       case 14:
   7748          if (! op_bit) {
   7749             for(i = 0; i < 8; i++) {
   7750                imm = (imm << 8) | imm_raw;
   7751             }
   7752          } else {
   7753             for(i = 7; i >= 0; i--) {
   7754                tmp = 0;
   7755                for(j = 0; j < 8; j++) {
   7756                   tmp = (tmp << 1) | ((imm_raw >> i) & 1);
   7757                }
   7758                imm = (imm << 8) | tmp;
   7759             }
   7760          }
   7761          break;
   7762       case 15:
   7763          imm = (imm_raw & 0x80) << 5;
   7764          imm |= ((~imm_raw & 0x40) << 5);
   7765          for(i = 1; i <= 4; i++)
   7766             imm |= (imm_raw & 0x40) << i;
   7767          imm |= (imm_raw & 0x7f);
   7768          imm = imm << 19;
   7769          imm = (imm << 32) | imm;
   7770          break;
   7771       default:
   7772          return False;
   7773    }
   7774    if (Q) {
   7775       imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
   7776    } else {
   7777       imm_val = mkU64(imm);
   7778    }
   7779    if (((op_bit == 0) &&
   7780       (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 12) == 12))) ||
   7781       ((op_bit == 1) && (cmode == 14))) {
   7782       /* VMOV (immediate) */
   7783       if (Q) {
   7784          putQReg(dreg, imm_val, condT);
   7785       } else {
   7786          putDRegI64(dreg, imm_val, condT);
   7787       }
   7788       DIPimm(imm_raw_pp, cmode, op_bit, "vmov", Q, dreg);
   7789       return True;
   7790    }
   7791    if ((op_bit == 1) &&
   7792       (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 14) == 12))) {
   7793       /* VMVN (immediate) */
   7794       if (Q) {
   7795          putQReg(dreg, unop(Iop_NotV128, imm_val), condT);
   7796       } else {
   7797          putDRegI64(dreg, unop(Iop_Not64, imm_val), condT);
   7798       }
   7799       DIPimm(imm_raw_pp, cmode, op_bit, "vmvn", Q, dreg);
   7800       return True;
   7801    }
   7802    if (Q) {
   7803       tmp_var = newTemp(Ity_V128);
   7804       assign(tmp_var, getQReg(dreg));
   7805    } else {
   7806       tmp_var = newTemp(Ity_I64);
   7807       assign(tmp_var, getDRegI64(dreg));
   7808    }
   7809    if ((op_bit == 0) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
   7810       /* VORR (immediate) */
   7811       if (Q)
   7812          expr = binop(Iop_OrV128, mkexpr(tmp_var), imm_val);
   7813       else
   7814          expr = binop(Iop_Or64, mkexpr(tmp_var), imm_val);
   7815       DIPimm(imm_raw_pp, cmode, op_bit, "vorr", Q, dreg);
   7816    } else if ((op_bit == 1) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
   7817       /* VBIC (immediate) */
   7818       if (Q)
   7819          expr = binop(Iop_AndV128, mkexpr(tmp_var),
   7820                                    unop(Iop_NotV128, imm_val));
   7821       else
   7822          expr = binop(Iop_And64, mkexpr(tmp_var), unop(Iop_Not64, imm_val));
   7823       DIPimm(imm_raw_pp, cmode, op_bit, "vbic", Q, dreg);
   7824    } else {
   7825       return False;
   7826    }
   7827    if (Q)
   7828       putQReg(dreg, expr, condT);
   7829    else
   7830       putDRegI64(dreg, expr, condT);
   7831    return True;
   7832 }
   7833 
   7834 /* A7.4 Advanced SIMD data-processing instructions */
   7835 static
   7836 Bool dis_neon_data_processing ( UInt theInstr, IRTemp condT )
   7837 {
   7838    UInt A = (theInstr >> 19) & 0x1F;
   7839    UInt B = (theInstr >>  8) & 0xF;
   7840    UInt C = (theInstr >>  4) & 0xF;
   7841    UInt U = (theInstr >> 24) & 0x1;
   7842 
   7843    if (! (A & 0x10)) {
   7844       return dis_neon_data_3same(theInstr, condT);
   7845    }
   7846    if (((A & 0x17) == 0x10) && ((C & 0x9) == 0x1)) {
   7847       return dis_neon_data_1reg_and_imm(theInstr, condT);
   7848    }
   7849    if ((C & 1) == 1) {
   7850       return dis_neon_data_2reg_and_shift(theInstr, condT);
   7851    }
   7852    if (((C & 5) == 0) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
   7853       return dis_neon_data_3diff(theInstr, condT);
   7854    }
   7855    if (((C & 5) == 4) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
   7856       return dis_neon_data_2reg_and_scalar(theInstr, condT);
   7857    }
   7858    if ((A & 0x16) == 0x16) {
   7859       if ((U == 0) && ((C & 1) == 0)) {
   7860          return dis_neon_vext(theInstr, condT);
   7861       }
   7862       if ((U != 1) || ((C & 1) == 1))
   7863          return False;
   7864       if ((B & 8) == 0) {
   7865          return dis_neon_data_2reg_misc(theInstr, condT);
   7866       }
   7867       if ((B & 12) == 8) {
   7868          return dis_neon_vtb(theInstr, condT);
   7869       }
   7870       if ((B == 12) && ((C & 9) == 0)) {
   7871          return dis_neon_vdup(theInstr, condT);
   7872       }
   7873    }
   7874    return False;
   7875 }
   7876 
   7877 
   7878 /*------------------------------------------------------------*/
   7879 /*--- NEON loads and stores                                ---*/
   7880 /*------------------------------------------------------------*/
   7881 
   7882 /* For NEON memory operations, we use the standard scheme to handle
   7883    conditionalisation: generate a jump around the instruction if the
   7884    condition is false.  That's only necessary in Thumb mode, however,
   7885    since in ARM mode NEON instructions are unconditional. */
   7886 
   7887 /* A helper function for what follows.  It assumes we already went
   7888    uncond as per comments at the top of this section. */
   7889 static
   7890 void mk_neon_elem_load_to_one_lane( UInt rD, UInt inc, UInt index,
   7891                                     UInt N, UInt size, IRTemp addr )
   7892 {
   7893    UInt i;
   7894    switch (size) {
   7895       case 0:
   7896          putDRegI64(rD, triop(Iop_SetElem8x8, getDRegI64(rD), mkU8(index),
   7897                     loadLE(Ity_I8, mkexpr(addr))), IRTemp_INVALID);
   7898          break;
   7899       case 1:
   7900          putDRegI64(rD, triop(Iop_SetElem16x4, getDRegI64(rD), mkU8(index),
   7901                     loadLE(Ity_I16, mkexpr(addr))), IRTemp_INVALID);
   7902          break;
   7903       case 2:
   7904          putDRegI64(rD, triop(Iop_SetElem32x2, getDRegI64(rD), mkU8(index),
   7905                     loadLE(Ity_I32, mkexpr(addr))), IRTemp_INVALID);
   7906          break;
   7907       default:
   7908          vassert(0);
   7909    }
   7910    for (i = 1; i <= N; i++) {
   7911       switch (size) {
   7912          case 0:
   7913             putDRegI64(rD + i * inc,
   7914                        triop(Iop_SetElem8x8,
   7915                              getDRegI64(rD + i * inc),
   7916                              mkU8(index),
   7917                              loadLE(Ity_I8, binop(Iop_Add32,
   7918                                                   mkexpr(addr),
   7919                                                   mkU32(i * 1)))),
   7920                        IRTemp_INVALID);
   7921             break;
   7922          case 1:
   7923             putDRegI64(rD + i * inc,
   7924                        triop(Iop_SetElem16x4,
   7925                              getDRegI64(rD + i * inc),
   7926                              mkU8(index),
   7927                              loadLE(Ity_I16, binop(Iop_Add32,
   7928                                                    mkexpr(addr),
   7929                                                    mkU32(i * 2)))),
   7930                        IRTemp_INVALID);
   7931             break;
   7932          case 2:
   7933             putDRegI64(rD + i * inc,
   7934                        triop(Iop_SetElem32x2,
   7935                              getDRegI64(rD + i * inc),
   7936                              mkU8(index),
   7937                              loadLE(Ity_I32, binop(Iop_Add32,
   7938                                                    mkexpr(addr),
   7939                                                    mkU32(i * 4)))),
   7940                        IRTemp_INVALID);
   7941             break;
   7942          default:
   7943             vassert(0);
   7944       }
   7945    }
   7946 }
   7947 
   7948 /* A(nother) helper function for what follows.  It assumes we already
   7949    went uncond as per comments at the top of this section. */
   7950 static
   7951 void mk_neon_elem_store_from_one_lane( UInt rD, UInt inc, UInt index,
   7952                                        UInt N, UInt size, IRTemp addr )
   7953 {
   7954    UInt i;
   7955    switch (size) {
   7956       case 0:
   7957          storeLE(mkexpr(addr),
   7958                  binop(Iop_GetElem8x8, getDRegI64(rD), mkU8(index)));
   7959          break;
   7960       case 1:
   7961          storeLE(mkexpr(addr),
   7962                  binop(Iop_GetElem16x4, getDRegI64(rD), mkU8(index)));
   7963          break;
   7964       case 2:
   7965          storeLE(mkexpr(addr),
   7966                  binop(Iop_GetElem32x2, getDRegI64(rD), mkU8(index)));
   7967          break;
   7968       default:
   7969          vassert(0);
   7970    }
   7971    for (i = 1; i <= N; i++) {
   7972       switch (size) {
   7973          case 0:
   7974             storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 1)),
   7975                     binop(Iop_GetElem8x8, getDRegI64(rD + i * inc),
   7976                                           mkU8(index)));
   7977             break;
   7978          case 1:
   7979             storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 2)),
   7980                     binop(Iop_GetElem16x4, getDRegI64(rD + i * inc),
   7981                                            mkU8(index)));
   7982             break;
   7983          case 2:
   7984             storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 4)),
   7985                     binop(Iop_GetElem32x2, getDRegI64(rD + i * inc),
   7986                                            mkU8(index)));
   7987             break;
   7988          default:
   7989             vassert(0);
   7990       }
   7991    }
   7992 }
   7993 
   7994 /* Generate 2x64 -> 2x64 deinterleave code, for VLD2.  Caller must
   7995    make *u0 and *u1 be valid IRTemps before the call. */
   7996 static void math_DEINTERLEAVE_2 (/*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1,
   7997                                  IRTemp i0, IRTemp i1, Int laneszB)
   7998 {
   7999    /* The following assumes that the guest is little endian, and hence
   8000       that the memory-side (interleaved) data is stored
   8001       little-endianly. */
   8002    vassert(u0 && u1);
   8003    /* This is pretty easy, since we have primitives directly to
   8004       hand. */
   8005    if (laneszB == 4) {
   8006       // memLE(128 bits) == A0 B0 A1 B1
   8007       // i0 == B0 A0, i1 == B1 A1
   8008       // u0 == A1 A0, u1 == B1 B0
   8009       assign(*u0, binop(Iop_InterleaveLO32x2, mkexpr(i1), mkexpr(i0)));
   8010       assign(*u1, binop(Iop_InterleaveHI32x2, mkexpr(i1), mkexpr(i0)));
   8011    } else if (laneszB == 2) {
   8012       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3
   8013       // i0 == B1 A1 B0 A0, i1 == B3 A3 B2 A2
   8014       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0
   8015       assign(*u0, binop(Iop_CatEvenLanes16x4, mkexpr(i1), mkexpr(i0)));
   8016       assign(*u1, binop(Iop_CatOddLanes16x4,  mkexpr(i1), mkexpr(i0)));
   8017    } else if (laneszB == 1) {
   8018       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A6 B6 A7 B7
   8019       // i0 == B3 A3 B2 A2 B1 A1 B0 A0, i1 == B7 A7 B6 A6 B5 A5 B4 A4
   8020       // u0 == A7 A6 A5 A4 A3 A2 A1 A0, u1 == B7 B6 B5 B4 B3 B2 B1 B0
   8021       assign(*u0, binop(Iop_CatEvenLanes8x8, mkexpr(i1), mkexpr(i0)));
   8022       assign(*u1, binop(Iop_CatOddLanes8x8,  mkexpr(i1), mkexpr(i0)));
   8023    } else {
   8024       // Can never happen, since VLD2 only has valid lane widths of 32,
   8025       // 16 or 8 bits.
   8026       vpanic("math_DEINTERLEAVE_2");
   8027    }
   8028 }
   8029 
   8030 /* Generate 2x64 -> 2x64 interleave code, for VST2.  Caller must make
   8031    *u0 and *u1 be valid IRTemps before the call. */
   8032 static void math_INTERLEAVE_2 (/*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1,
   8033                                IRTemp u0, IRTemp u1, Int laneszB)
   8034 {
   8035    /* The following assumes that the guest is little endian, and hence
   8036       that the memory-side (interleaved) data is stored
   8037       little-endianly. */
   8038    vassert(i0 && i1);
   8039    /* This is pretty easy, since we have primitives directly to
   8040       hand. */
   8041    if (laneszB == 4) {
   8042       // memLE(128 bits) == A0 B0 A1 B1
   8043       // i0 == B0 A0, i1 == B1 A1
   8044       // u0 == A1 A0, u1 == B1 B0
   8045       assign(*i0, binop(Iop_InterleaveLO32x2, mkexpr(u1), mkexpr(u0)));
   8046       assign(*i1, binop(Iop_InterleaveHI32x2, mkexpr(u1), mkexpr(u0)));
   8047    } else if (laneszB == 2) {
   8048       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3
   8049       // i0 == B1 A1 B0 A0, i1 == B3 A3 B2 A2
   8050       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0
   8051       assign(*i0, binop(Iop_InterleaveLO16x4, mkexpr(u1), mkexpr(u0)));
   8052       assign(*i1, binop(Iop_InterleaveHI16x4, mkexpr(u1), mkexpr(u0)));
   8053    } else if (laneszB == 1) {
   8054       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A6 B6 A7 B7
   8055       // i0 == B3 A3 B2 A2 B1 A1 B0 A0, i1 == B7 A7 B6 A6 B5 A5 B4 A4
   8056       // u0 == A7 A6 A5 A4 A3 A2 A1 A0, u1 == B7 B6 B5 B4 B3 B2 B1 B0
   8057       assign(*i0, binop(Iop_InterleaveLO8x8, mkexpr(u1), mkexpr(u0)));
   8058       assign(*i1, binop(Iop_InterleaveHI8x8, mkexpr(u1), mkexpr(u0)));
   8059    } else {
   8060       // Can never happen, since VST2 only has valid lane widths of 32,
   8061       // 16 or 8 bits.
   8062       vpanic("math_INTERLEAVE_2");
   8063    }
   8064 }
   8065 
   8066 // Helper function for generating arbitrary slicing 'n' dicing of
   8067 // 3 8x8 vectors, as needed for VLD3.8 and VST3.8.
   8068 static IRExpr* math_PERM_8x8x3(const UChar* desc,
   8069                                IRTemp s0, IRTemp s1, IRTemp s2)
   8070 {
   8071    // desc is an array of 8 pairs, encoded as 16 bytes,
   8072    // that describe how to assemble the result lanes, starting with
   8073    // lane 7.  Each pair is: first component (0..2) says which of
   8074    // s0/s1/s2 to use.  Second component (0..7) is the lane number
   8075    // in the source to use.
   8076    UInt si;
   8077    for (si = 0; si < 7; si++) {
   8078       vassert(desc[2 * si + 0] <= 2);
   8079       vassert(desc[2 * si + 1] <= 7);
   8080    }
   8081    IRTemp h3 = newTemp(Ity_I64);
   8082    IRTemp h2 = newTemp(Ity_I64);
   8083    IRTemp h1 = newTemp(Ity_I64);
   8084    IRTemp h0 = newTemp(Ity_I64);
   8085    IRTemp srcs[3] = {s0, s1, s2};
   8086 #  define SRC_VEC(_lane)   mkexpr(srcs[desc[2 * (7-(_lane)) + 0]])
   8087 #  define SRC_SHIFT(_lane) mkU8(56-8*(desc[2 * (7-(_lane)) + 1]))
   8088    assign(h3, binop(Iop_InterleaveHI8x8,
   8089                     binop(Iop_Shl64, SRC_VEC(7), SRC_SHIFT(7)),
   8090                     binop(Iop_Shl64, SRC_VEC(6), SRC_SHIFT(6))));
   8091    assign(h2, binop(Iop_InterleaveHI8x8,
   8092                     binop(Iop_Shl64, SRC_VEC(5), SRC_SHIFT(5)),
   8093                     binop(Iop_Shl64, SRC_VEC(4), SRC_SHIFT(4))));
   8094    assign(h1, binop(Iop_InterleaveHI8x8,
   8095                     binop(Iop_Shl64, SRC_VEC(3), SRC_SHIFT(3)),
   8096                     binop(Iop_Shl64, SRC_VEC(2), SRC_SHIFT(2))));
   8097    assign(h0, binop(Iop_InterleaveHI8x8,
   8098                     binop(Iop_Shl64, SRC_VEC(1), SRC_SHIFT(1)),
   8099                     binop(Iop_Shl64, SRC_VEC(0), SRC_SHIFT(0))));
   8100 #  undef SRC_VEC
   8101 #  undef SRC_SHIFT
   8102    // Now h3..h0 are 64 bit vectors with useful information only
   8103    // in the top 16 bits.  We now concatentate those four 16-bit
   8104    // groups so as to produce the final result.
   8105    IRTemp w1 = newTemp(Ity_I64);
   8106    IRTemp w0 = newTemp(Ity_I64);
   8107    assign(w1, binop(Iop_InterleaveHI16x4, mkexpr(h3), mkexpr(h2)));
   8108    assign(w0, binop(Iop_InterleaveHI16x4, mkexpr(h1), mkexpr(h0)));
   8109    return binop(Iop_InterleaveHI32x2, mkexpr(w1), mkexpr(w0));
   8110 }
   8111 
   8112 /* Generate 3x64 -> 3x64 deinterleave code, for VLD3.  Caller must
   8113    make *u0, *u1 and *u2 be valid IRTemps before the call. */
   8114 static void math_DEINTERLEAVE_3 (
   8115                /*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1, /*OUT*/IRTemp* u2,
   8116                IRTemp i0, IRTemp i1, IRTemp i2, Int laneszB
   8117             )
   8118 {
   8119 #  define IHI32x2(_e1, _e2) binop(Iop_InterleaveHI32x2, (_e1), (_e2))
   8120 #  define IHI16x4(_e1, _e2) binop(Iop_InterleaveHI16x4, (_e1), (_e2))
   8121 #  define SHL64(_tmp, _amt) binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
   8122    /* The following assumes that the guest is little endian, and hence
   8123       that the memory-side (interleaved) data is stored
   8124       little-endianly. */
   8125    vassert(u0 && u1 && u2);
   8126    if (laneszB == 4) {
   8127       // memLE(192 bits) == A0 B0 C0 A1 B1 C1
   8128       // i0 == B0 A0, i1 == A1 C0, i2 == C1 B1
   8129       // u0 == A1 A0, u1 == B1 B0, u2 == C1 C0
   8130       assign(*u0, IHI32x2(SHL64(i1,  0), SHL64(i0, 32)));
   8131       assign(*u1, IHI32x2(SHL64(i2, 32), SHL64(i0,  0)));
   8132       assign(*u2, IHI32x2(SHL64(i2,  0), SHL64(i1, 32)));
   8133    } else if (laneszB == 2) {
   8134       // memLE(192 bits) == A0 B0 C0 A1, B1 C1 A2 B2, C2 A3 B3 C3
   8135       // i0 == A1 C0 B0 A0, i1 == B2 A2 C1 B1, i2 == C3 B3 A3 C2
   8136       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0, u2 == C3 C2 C1 C0
   8137 #     define XXX(_tmp3,_la3,_tmp2,_la2,_tmp1,_la1,_tmp0,_la0) \
   8138                 IHI32x2(                                      \
   8139                    IHI16x4(SHL64((_tmp3),48-16*(_la3)),       \
   8140                            SHL64((_tmp2),48-16*(_la2))),      \
   8141                    IHI16x4(SHL64((_tmp1),48-16*(_la1)),       \
   8142                            SHL64((_tmp0),48-16*(_la0))))
   8143       assign(*u0, XXX(i2,1, i1,2, i0,3, i0,0));
   8144       assign(*u1, XXX(i2,2, i1,3, i1,0, i0,1));
   8145       assign(*u2, XXX(i2,3, i2,0, i1,1, i0,2));
   8146 #     undef XXX
   8147    } else if (laneszB == 1) {
   8148       // These describe how the result vectors [7..0] are
   8149       // assembled from the source vectors.  Each pair is
   8150       // (source vector number, lane number).
   8151       static const UChar de0[16] = {2,5, 2,2, 1,7, 1,4, 1,1, 0,6, 0,3, 0,0};
   8152       static const UChar de1[16] = {2,6, 2,3, 2,0, 1,5, 1,2, 0,7, 0,4, 0,1};
   8153       static const UChar de2[16] = {2,7, 2,4, 2,1, 1,6, 1,3, 1,0, 0,5, 0,2};
   8154       assign(*u0, math_PERM_8x8x3(de0, i0, i1, i2));
   8155       assign(*u1, math_PERM_8x8x3(de1, i0, i1, i2));
   8156       assign(*u2, math_PERM_8x8x3(de2, i0, i1, i2));
   8157    } else {
   8158       // Can never happen, since VLD3 only has valid lane widths of 32,
   8159       // 16 or 8 bits.
   8160       vpanic("math_DEINTERLEAVE_3");
   8161    }
   8162 #  undef SHL64
   8163 #  undef IHI16x4
   8164 #  undef IHI32x2
   8165 }
   8166 
   8167 /* Generate 3x64 -> 3x64 interleave code, for VST3.  Caller must
   8168    make *i0, *i1 and *i2 be valid IRTemps before the call. */
   8169 static void math_INTERLEAVE_3 (
   8170                /*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1, /*OUT*/IRTemp* i2,
   8171                IRTemp u0, IRTemp u1, IRTemp u2, Int laneszB
   8172             )
   8173 {
   8174 #  define IHI32x2(_e1, _e2) binop(Iop_InterleaveHI32x2, (_e1), (_e2))
   8175 #  define IHI16x4(_e1, _e2) binop(Iop_InterleaveHI16x4, (_e1), (_e2))
   8176 #  define SHL64(_tmp, _amt) binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
   8177    /* The following assumes that the guest is little endian, and hence
   8178       that the memory-side (interleaved) data is stored
   8179       little-endianly. */
   8180    vassert(i0 && i1 && i2);
   8181    if (laneszB == 4) {
   8182       // memLE(192 bits) == A0 B0 C0 A1 B1 C1
   8183       // i0 == B0 A0, i1 == A1 C0, i2 == C1 B1
   8184       // u0 == A1 A0, u1 == B1 B0, u2 == C1 C0
   8185       assign(*i0, IHI32x2(SHL64(u1, 32), SHL64(u0, 32)));
   8186       assign(*i1, IHI32x2(SHL64(u0,  0), SHL64(u2, 32)));
   8187       assign(*i2, IHI32x2(SHL64(u2,  0), SHL64(u1,  0)));
   8188    } else if (laneszB == 2) {
   8189       // memLE(192 bits) == A0 B0 C0 A1, B1 C1 A2 B2, C2 A3 B3 C3
   8190       // i0 == A1 C0 B0 A0, i1 == B2 A2 C1 B1, i2 == C3 B3 A3 C2
   8191       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0, u2 == C3 C2 C1 C0
   8192 #     define XXX(_tmp3,_la3,_tmp2,_la2,_tmp1,_la1,_tmp0,_la0) \
   8193                 IHI32x2(                                      \
   8194                    IHI16x4(SHL64((_tmp3),48-16*(_la3)),       \
   8195                            SHL64((_tmp2),48-16*(_la2))),      \
   8196                    IHI16x4(SHL64((_tmp1),48-16*(_la1)),       \
   8197                            SHL64((_tmp0),48-16*(_la0))))
   8198       assign(*i0, XXX(u0,1, u2,0, u1,0, u0,0));
   8199       assign(*i1, XXX(u1,2, u0,2, u2,1, u1,1));
   8200       assign(*i2, XXX(u2,3, u1,3, u0,3, u2,2));
   8201 #     undef XXX
   8202    } else if (laneszB == 1) {
   8203       // These describe how the result vectors [7..0] are
   8204       // assembled from the source vectors.  Each pair is
   8205       // (source vector number, lane number).
   8206       static const UChar in0[16] = {1,2, 0,2, 2,1, 1,1, 0,1, 2,0, 1,0, 0,0};
   8207       static const UChar in1[16] = {0,5, 2,4, 1,4, 0,4, 2,3, 1,3, 0,3, 2,2};
   8208       static const UChar in2[16] = {2,7, 1,7, 0,7, 2,6, 1,6, 0,6, 2,5, 1,5};
   8209       assign(*i0, math_PERM_8x8x3(in0, u0, u1, u2));
   8210       assign(*i1, math_PERM_8x8x3(in1, u0, u1, u2));
   8211       assign(*i2, math_PERM_8x8x3(in2, u0, u1, u2));
   8212    } else {
   8213       // Can never happen, since VST3 only has valid lane widths of 32,
   8214       // 16 or 8 bits.
   8215       vpanic("math_INTERLEAVE_3");
   8216    }
   8217 #  undef SHL64
   8218 #  undef IHI16x4
   8219 #  undef IHI32x2
   8220 }
   8221 
   8222 /* Generate 4x64 -> 4x64 deinterleave code, for VLD4.  Caller must
   8223    make *u0, *u1, *u2 and *u3 be valid IRTemps before the call. */
   8224 static void math_DEINTERLEAVE_4 (
   8225                /*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1,
   8226                /*OUT*/IRTemp* u2, /*OUT*/IRTemp* u3,
   8227                IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3, Int laneszB
   8228             )
   8229 {
   8230 #  define IHI32x2(_t1, _t2) \
   8231              binop(Iop_InterleaveHI32x2, mkexpr(_t1), mkexpr(_t2))
   8232 #  define ILO32x2(_t1, _t2) \
   8233              binop(Iop_InterleaveLO32x2, mkexpr(_t1), mkexpr(_t2))
   8234 #  define IHI16x4(_t1, _t2) \
   8235              binop(Iop_InterleaveHI16x4, mkexpr(_t1), mkexpr(_t2))
   8236 #  define ILO16x4(_t1, _t2) \
   8237              binop(Iop_InterleaveLO16x4, mkexpr(_t1), mkexpr(_t2))
   8238 #  define IHI8x8(_t1, _e2) \
   8239              binop(Iop_InterleaveHI8x8, mkexpr(_t1), _e2)
   8240 #  define SHL64(_tmp, _amt) \
   8241              binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
   8242    /* The following assumes that the guest is little endian, and hence
   8243       that the memory-side (interleaved) data is stored
   8244       little-endianly. */
   8245    vassert(u0 && u1 && u2 && u3);
   8246    if (laneszB == 4) {
   8247       assign(*u0, ILO32x2(i2, i0));
   8248       assign(*u1, IHI32x2(i2, i0));
   8249       assign(*u2, ILO32x2(i3, i1));
   8250       assign(*u3, IHI32x2(i3, i1));
   8251    } else if (laneszB == 2) {
   8252       IRTemp b1b0a1a0 = newTemp(Ity_I64);
   8253       IRTemp b3b2a3a2 = newTemp(Ity_I64);
   8254       IRTemp d1d0c1c0 = newTemp(Ity_I64);
   8255       IRTemp d3d2c3c2 = newTemp(Ity_I64);
   8256       assign(b1b0a1a0, ILO16x4(i1, i0));
   8257       assign(b3b2a3a2, ILO16x4(i3, i2));
   8258       assign(d1d0c1c0, IHI16x4(i1, i0));
   8259       assign(d3d2c3c2, IHI16x4(i3, i2));
   8260       // And now do what we did for the 32-bit case.
   8261       assign(*u0, ILO32x2(b3b2a3a2, b1b0a1a0));
   8262       assign(*u1, IHI32x2(b3b2a3a2, b1b0a1a0));
   8263       assign(*u2, ILO32x2(d3d2c3c2, d1d0c1c0));
   8264       assign(*u3, IHI32x2(d3d2c3c2, d1d0c1c0));
   8265    } else if (laneszB == 1) {
   8266       // Deinterleave into 16-bit chunks, then do as the 16-bit case.
   8267       IRTemp i0x = newTemp(Ity_I64);
   8268       IRTemp i1x = newTemp(Ity_I64);
   8269       IRTemp i2x = newTemp(Ity_I64);
   8270       IRTemp i3x = newTemp(Ity_I64);
   8271       assign(i0x, IHI8x8(i0, SHL64(i0, 32)));
   8272       assign(i1x, IHI8x8(i1, SHL64(i1, 32)));
   8273       assign(i2x, IHI8x8(i2, SHL64(i2, 32)));
   8274       assign(i3x, IHI8x8(i3, SHL64(i3, 32)));
   8275       // From here on is like the 16 bit case.
   8276       IRTemp b1b0a1a0 = newTemp(Ity_I64);
   8277       IRTemp b3b2a3a2 = newTemp(Ity_I64);
   8278       IRTemp d1d0c1c0 = newTemp(Ity_I64);
   8279       IRTemp d3d2c3c2 = newTemp(Ity_I64);
   8280       assign(b1b0a1a0, ILO16x4(i1x, i0x));
   8281       assign(b3b2a3a2, ILO16x4(i3x, i2x));
   8282       assign(d1d0c1c0, IHI16x4(i1x, i0x));
   8283       assign(d3d2c3c2, IHI16x4(i3x, i2x));
   8284       // And now do what we did for the 32-bit case.
   8285       assign(*u0, ILO32x2(b3b2a3a2, b1b0a1a0));
   8286       assign(*u1, IHI32x2(b3b2a3a2, b1b0a1a0));
   8287       assign(*u2, ILO32x2(d3d2c3c2, d1d0c1c0));
   8288       assign(*u3, IHI32x2(d3d2c3c2, d1d0c1c0));
   8289    } else {
   8290       // Can never happen, since VLD4 only has valid lane widths of 32,
   8291       // 16 or 8 bits.
   8292       vpanic("math_DEINTERLEAVE_4");
   8293    }
   8294 #  undef SHL64
   8295 #  undef IHI8x8
   8296 #  undef ILO16x4
   8297 #  undef IHI16x4
   8298 #  undef ILO32x2
   8299 #  undef IHI32x2
   8300 }
   8301 
   8302 /* Generate 4x64 -> 4x64 interleave code, for VST4.  Caller must
   8303    make *i0, *i1, *i2 and *i3 be valid IRTemps before the call. */
   8304 static void math_INTERLEAVE_4 (
   8305                /*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1,
   8306                /*OUT*/IRTemp* i2, /*OUT*/IRTemp* i3,
   8307                IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3, Int laneszB
   8308             )
   8309 {
   8310 #  define IHI32x2(_t1, _t2) \
   8311              binop(Iop_InterleaveHI32x2, mkexpr(_t1), mkexpr(_t2))
   8312 #  define ILO32x2(_t1, _t2) \
   8313              binop(Iop_InterleaveLO32x2, mkexpr(_t1), mkexpr(_t2))
   8314 #  define CEV16x4(_t1, _t2) \
   8315              binop(Iop_CatEvenLanes16x4, mkexpr(_t1), mkexpr(_t2))
   8316 #  define COD16x4(_t1, _t2) \
   8317              binop(Iop_CatOddLanes16x4, mkexpr(_t1), mkexpr(_t2))
   8318 #  define COD8x8(_t1, _e2) \
   8319              binop(Iop_CatOddLanes8x8, mkexpr(_t1), _e2)
   8320 #  define SHL64(_tmp, _amt) \
   8321              binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
   8322    /* The following assumes that the guest is little endian, and hence
   8323       that the memory-side (interleaved) data is stored
   8324       little-endianly. */
   8325    vassert(u0 && u1 && u2 && u3);
   8326    if (laneszB == 4) {
   8327       assign(*i0, ILO32x2(u1, u0));
   8328       assign(*i1, ILO32x2(u3, u2));
   8329       assign(*i2, IHI32x2(u1, u0));
   8330       assign(*i3, IHI32x2(u3, u2));
   8331    } else if (laneszB == 2) {
   8332       // First, interleave at the 32-bit lane size.
   8333       IRTemp b1b0a1a0 = newTemp(Ity_I64);
   8334       IRTemp b3b2a3a2 = newTemp(Ity_I64);
   8335       IRTemp d1d0c1c0 = newTemp(Ity_I64);
   8336       IRTemp d3d2c3c2 = newTemp(Ity_I64);
   8337       assign(b1b0a1a0, ILO32x2(u1, u0));
   8338       assign(b3b2a3a2, IHI32x2(u1, u0));
   8339       assign(d1d0c1c0, ILO32x2(u3, u2));
   8340       assign(d3d2c3c2, IHI32x2(u3, u2));
   8341       // And interleave (cat) at the 16 bit size.
   8342       assign(*i0, CEV16x4(d1d0c1c0, b1b0a1a0));
   8343       assign(*i1, COD16x4(d1d0c1c0, b1b0a1a0));
   8344       assign(*i2, CEV16x4(d3d2c3c2, b3b2a3a2));
   8345       assign(*i3, COD16x4(d3d2c3c2, b3b2a3a2));
   8346    } else if (laneszB == 1) {
   8347       // First, interleave at the 32-bit lane size.
   8348       IRTemp b1b0a1a0 = newTemp(Ity_I64);
   8349       IRTemp b3b2a3a2 = newTemp(Ity_I64);
   8350       IRTemp d1d0c1c0 = newTemp(Ity_I64);
   8351       IRTemp d3d2c3c2 = newTemp(Ity_I64);
   8352       assign(b1b0a1a0, ILO32x2(u1, u0));
   8353       assign(b3b2a3a2, IHI32x2(u1, u0));
   8354       assign(d1d0c1c0, ILO32x2(u3, u2));
   8355       assign(d3d2c3c2, IHI32x2(u3, u2));
   8356       // And interleave (cat) at the 16 bit size.
   8357       IRTemp i0x = newTemp(Ity_I64);
   8358       IRTemp i1x = newTemp(Ity_I64);
   8359       IRTemp i2x = newTemp(Ity_I64);
   8360       IRTemp i3x = newTemp(Ity_I64);
   8361       assign(i0x, CEV16x4(d1d0c1c0, b1b0a1a0));
   8362       assign(i1x, COD16x4(d1d0c1c0, b1b0a1a0));
   8363       assign(i2x, CEV16x4(d3d2c3c2, b3b2a3a2));
   8364       assign(i3x, COD16x4(d3d2c3c2, b3b2a3a2));
   8365       // And rearrange within each word, to get the right 8 bit lanes.
   8366       assign(*i0, COD8x8(i0x, SHL64(i0x, 8)));
   8367       assign(*i1, COD8x8(i1x, SHL64(i1x, 8)));
   8368       assign(*i2, COD8x8(i2x, SHL64(i2x, 8)));
   8369       assign(*i3, COD8x8(i3x, SHL64(i3x, 8)));
   8370    } else {
   8371       // Can never happen, since VLD4 only has valid lane widths of 32,
   8372       // 16 or 8 bits.
   8373       vpanic("math_DEINTERLEAVE_4");
   8374    }
   8375 #  undef SHL64
   8376 #  undef COD8x8
   8377 #  undef COD16x4
   8378 #  undef CEV16x4
   8379 #  undef ILO32x2
   8380 #  undef IHI32x2
   8381 }
   8382 
   8383 /* A7.7 Advanced SIMD element or structure load/store instructions */
   8384 static
   8385 Bool dis_neon_load_or_store ( UInt theInstr,
   8386                               Bool isT, IRTemp condT )
   8387 {
   8388 #  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
   8389    UInt bA = INSN(23,23);
   8390    UInt fB = INSN(11,8);
   8391    UInt bL = INSN(21,21);
   8392    UInt rD = (INSN(22,22) << 4) | INSN(15,12);
   8393    UInt rN = INSN(19,16);
   8394    UInt rM = INSN(3,0);
   8395    UInt N, size, i, j;
   8396    UInt inc;
   8397    UInt regs = 1;
   8398 
   8399    if (isT) {
   8400       vassert(condT != IRTemp_INVALID);
   8401    } else {
   8402       vassert(condT == IRTemp_INVALID);
   8403    }
   8404    /* So now, if condT is not IRTemp_INVALID, we know we're
   8405       dealing with Thumb code. */
   8406 
   8407    if (INSN(20,20) != 0)
   8408       return False;
   8409 
   8410    IRTemp initialRn = newTemp(Ity_I32);
   8411    assign(initialRn, isT ? getIRegT(rN) : getIRegA(rN));
   8412 
   8413    IRTemp initialRm = newTemp(Ity_I32);
   8414    assign(initialRm, isT ? getIRegT(rM) : getIRegA(rM));
   8415 
   8416    /* There are 3 cases:
   8417       (1) VSTn / VLDn (n-element structure from/to one lane)
   8418       (2) VLDn (single element to all lanes)
   8419       (3) VSTn / VLDn (multiple n-element structures)
   8420    */
   8421    if (bA) {
   8422       N = fB & 3;
   8423       if ((fB >> 2) < 3) {
   8424          /* ------------ Case (1) ------------
   8425             VSTn / VLDn (n-element structure from/to one lane) */
   8426 
   8427          size = fB >> 2;
   8428 
   8429          switch (size) {
   8430             case 0: i = INSN(7,5); inc = 1; break;
   8431             case 1: i = INSN(7,6); inc = INSN(5,5) ? 2 : 1; break;
   8432             case 2: i = INSN(7,7); inc = INSN(6,6) ? 2 : 1; break;
   8433             case 3: return False;
   8434             default: vassert(0);
   8435          }
   8436 
   8437          IRTemp addr = newTemp(Ity_I32);
   8438          assign(addr, mkexpr(initialRn));
   8439 
   8440          // go uncond
   8441          if (condT != IRTemp_INVALID)
   8442             mk_skip_over_T32_if_cond_is_false(condT);
   8443          // now uncond
   8444 
   8445          if (bL)
   8446             mk_neon_elem_load_to_one_lane(rD, inc, i, N, size, addr);
   8447          else
   8448             mk_neon_elem_store_from_one_lane(rD, inc, i, N, size, addr);
   8449          DIP("v%s%u.%d {", bL ? "ld" : "st", N + 1, 8 << size);
   8450          for (j = 0; j <= N; j++) {
   8451             if (j)
   8452                DIP(", ");
   8453             DIP("d%u[%u]", rD + j * inc, i);
   8454          }
   8455          DIP("}, [r%u]", rN);
   8456          if (rM != 13 && rM != 15) {
   8457             DIP(", r%u\n", rM);
   8458          } else {
   8459             DIP("%s\n", (rM != 15) ? "!" : "");
   8460          }
   8461       } else {
   8462          /* ------------ Case (2) ------------
   8463             VLDn (single element to all lanes) */
   8464          UInt r;
   8465          if (bL == 0)
   8466             return False;
   8467 
   8468          inc = INSN(5,5) + 1;
   8469          size = INSN(7,6);
   8470 
   8471          /* size == 3 and size == 2 cases differ in alignment constraints */
   8472          if (size == 3 && N == 3 && INSN(4,4) == 1)
   8473             size = 2;
   8474 
   8475          if (size == 0 && N == 0 && INSN(4,4) == 1)
   8476             return False;
   8477          if (N == 2 && INSN(4,4) == 1)
   8478             return False;
   8479          if (size == 3)
   8480             return False;
   8481 
   8482          // go uncond
   8483          if (condT != IRTemp_INVALID)
   8484             mk_skip_over_T32_if_cond_is_false(condT);
   8485          // now uncond
   8486 
   8487          IRTemp addr = newTemp(Ity_I32);
   8488          assign(addr, mkexpr(initialRn));
   8489 
   8490          if (N == 0 && INSN(5,5))
   8491             regs = 2;
   8492 
   8493          for (r = 0; r < regs; r++) {
   8494             switch (size) {
   8495                case 0:
   8496                   putDRegI64(rD + r, unop(Iop_Dup8x8,
   8497                                           loadLE(Ity_I8, mkexpr(addr))),
   8498                              IRTemp_INVALID);
   8499                   break;
   8500                case 1:
   8501                   putDRegI64(rD + r, unop(Iop_Dup16x4,
   8502                                           loadLE(Ity_I16, mkexpr(addr))),
   8503                              IRTemp_INVALID);
   8504                   break;
   8505                case 2:
   8506                   putDRegI64(rD + r, unop(Iop_Dup32x2,
   8507                                           loadLE(Ity_I32, mkexpr(addr))),
   8508                              IRTemp_INVALID);
   8509                   break;
   8510                default:
   8511                   vassert(0);
   8512             }
   8513             for (i = 1; i <= N; i++) {
   8514                switch (size) {
   8515                   case 0:
   8516                      putDRegI64(rD + r + i * inc,
   8517                                 unop(Iop_Dup8x8,
   8518                                      loadLE(Ity_I8, binop(Iop_Add32,
   8519                                                           mkexpr(addr),
   8520                                                           mkU32(i * 1)))),
   8521                                 IRTemp_INVALID);
   8522                      break;
   8523                   case 1:
   8524                      putDRegI64(rD + r + i * inc,
   8525                                 unop(Iop_Dup16x4,
   8526                                      loadLE(Ity_I16, binop(Iop_Add32,
   8527                                                            mkexpr(addr),
   8528                                                            mkU32(i * 2)))),
   8529                                 IRTemp_INVALID);
   8530                      break;
   8531                   case 2:
   8532                      putDRegI64(rD + r + i * inc,
   8533                                 unop(Iop_Dup32x2,
   8534                                      loadLE(Ity_I32, binop(Iop_Add32,
   8535                                                            mkexpr(addr),
   8536                                                            mkU32(i * 4)))),
   8537                                 IRTemp_INVALID);
   8538                      break;
   8539                   default:
   8540                      vassert(0);
   8541                }
   8542             }
   8543          }
   8544          DIP("vld%u.%d {", N + 1, 8 << size);
   8545          for (r = 0; r < regs; r++) {
   8546             for (i = 0; i <= N; i++) {
   8547                if (i || r)
   8548                   DIP(", ");
   8549                DIP("d%u[]", rD + r + i * inc);
   8550             }
   8551          }
   8552          DIP("}, [r%u]", rN);
   8553          if (rM != 13 && rM != 15) {
   8554             DIP(", r%u\n", rM);
   8555          } else {
   8556             DIP("%s\n", (rM != 15) ? "!" : "");
   8557          }
   8558       }
   8559       /* Writeback.  We're uncond here, so no condT-ing. */
   8560       if (rM != 15) {
   8561          if (rM == 13) {
   8562             IRExpr* e = binop(Iop_Add32,
   8563                               mkexpr(initialRn),
   8564                               mkU32((1 << size) * (N + 1)));
   8565             if (isT)
   8566                putIRegT(rN, e, IRTemp_INVALID);
   8567             else
   8568                putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
   8569          } else {
   8570             IRExpr* e = binop(Iop_Add32,
   8571                               mkexpr(initialRn),
   8572                               mkexpr(initialRm));
   8573             if (isT)
   8574                putIRegT(rN, e, IRTemp_INVALID);
   8575             else
   8576                putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
   8577          }
   8578       }
   8579       return True;
   8580    } else {
   8581       /* ------------ Case (3) ------------
   8582          VSTn / VLDn (multiple n-element structures) */
   8583       inc = (fB & 1) + 1;
   8584 
   8585       if (fB == BITS4(0,0,1,0)       // Dd, Dd+1, Dd+2, Dd+3  inc = 1  regs = 4
   8586           || fB == BITS4(0,1,1,0)    // Dd, Dd+1, Dd+2        inc = 1  regs = 3
   8587           || fB == BITS4(0,1,1,1)    // Dd                    inc = 2  regs = 1
   8588           || fB == BITS4(1,0,1,0)) { // Dd, Dd+1              inc = 1  regs = 2
   8589          N = 0; // VLD1/VST1.  'inc' does not appear to have any
   8590                 // meaning for the VLD1/VST1 cases.  'regs' is the number of
   8591                 // registers involved.
   8592          if (rD + regs > 32) return False;
   8593       }
   8594       else
   8595       if (fB == BITS4(0,0,1,1)       // Dd, Dd+1, Dd+2, Dd+3  inc=2  regs = 2
   8596           || fB == BITS4(1,0,0,0)    // Dd, Dd+1              inc=1  regs = 1
   8597           || fB == BITS4(1,0,0,1)) { // Dd, Dd+2              inc=2  regs = 1
   8598          N = 1; // VLD2/VST2.  'regs' is the number of register-pairs involved
   8599          if (regs == 1 && inc == 1 && rD + 1 >= 32) return False;
   8600          if (regs == 1 && inc == 2 && rD + 2 >= 32) return False;
   8601          if (regs == 2 && inc == 2 && rD + 3 >= 32) return False;
   8602       } else if (fB == BITS4(0,1,0,0) || fB == BITS4(0,1,0,1)) {
   8603          N = 2; // VLD3/VST3
   8604          if (inc == 1 && rD + 2 >= 32) return False;
   8605          if (inc == 2 && rD + 4 >= 32) return False;
   8606       } else if (fB == BITS4(0,0,0,0) || fB == BITS4(0,0,0,1)) {
   8607          N = 3; // VLD4/VST4
   8608          if (inc == 1 && rD + 3 >= 32) return False;
   8609          if (inc == 2 && rD + 6 >= 32) return False;
   8610       } else {
   8611          return False;
   8612       }
   8613 
   8614       if (N == 1 && fB == BITS4(0,0,1,1)) {
   8615          regs = 2;
   8616       } else if (N == 0) {
   8617          if (fB == BITS4(1,0,1,0)) {
   8618             regs = 2;
   8619          } else if (fB == BITS4(0,1,1,0)) {
   8620             regs = 3;
   8621          } else if (fB == BITS4(0,0,1,0)) {
   8622             regs = 4;
   8623          }
   8624       }
   8625 
   8626       size = INSN(7,6);
   8627       if (N == 0 && size == 3)
   8628          size = 2;
   8629       if (size == 3)
   8630          return False;
   8631 
   8632       // go uncond
   8633       if (condT != IRTemp_INVALID)
   8634          mk_skip_over_T32_if_cond_is_false(condT);
   8635       // now uncond
   8636 
   8637       IRTemp addr = newTemp(Ity_I32);
   8638       assign(addr, mkexpr(initialRn));
   8639 
   8640       if (N == 0 /* No interleaving -- VLD1/VST1 */) {
   8641          UInt r;
   8642          vassert(regs == 1 || regs == 2 || regs == 3 || regs == 4);
   8643          /* inc has no relevance here */
   8644          for (r = 0; r < regs; r++) {
   8645             if (bL)
   8646                putDRegI64(rD+r, loadLE(Ity_I64, mkexpr(addr)), IRTemp_INVALID);
   8647             else
   8648                storeLE(mkexpr(addr), getDRegI64(rD+r));
   8649             IRTemp tmp = newTemp(Ity_I32);
   8650             assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(8)));
   8651             addr = tmp;
   8652          }
   8653       }
   8654       else
   8655       if (N == 1 /* 2-interleaving -- VLD2/VST2 */) {
   8656          vassert( (regs == 1 && (inc == 1 || inc == 2))
   8657                    || (regs == 2 && inc == 2) );
   8658          // Make 'nregs' be the number of registers and 'regstep'
   8659          // equal the actual register-step.  The ARM encoding, using 'regs'
   8660          // and 'inc', is bizarre.  After this, we have:
   8661          // Dd, Dd+1              regs = 1, inc = 1,   nregs = 2, regstep = 1
   8662          // Dd, Dd+2              regs = 1, inc = 2,   nregs = 2, regstep = 2
   8663          // Dd, Dd+1, Dd+2, Dd+3  regs = 2, inc = 2,   nregs = 4, regstep = 1
   8664          UInt nregs   = 2;
   8665          UInt regstep = 1;
   8666          if (regs == 1 && inc == 1) {
   8667             /* nothing */
   8668          } else if (regs == 1 && inc == 2) {
   8669             regstep = 2;
   8670          } else if (regs == 2 && inc == 2) {
   8671             nregs = 4;
   8672          } else {
   8673             vassert(0);
   8674          }
   8675          // 'a' is address,
   8676          // 'di' is interleaved data, 'du' is uninterleaved data
   8677          if (nregs == 2) {
   8678             IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
   8679             IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
   8680             IRTemp  di0 = newTemp(Ity_I64);
   8681             IRTemp  di1 = newTemp(Ity_I64);
   8682             IRTemp  du0 = newTemp(Ity_I64);
   8683             IRTemp  du1 = newTemp(Ity_I64);
   8684             if (bL) {
   8685                assign(di0, loadLE(Ity_I64, a0));
   8686                assign(di1, loadLE(Ity_I64, a1));
   8687                math_DEINTERLEAVE_2(&du0, &du1, di0, di1, 1 << size);
   8688                putDRegI64(rD + 0 * regstep, mkexpr(du0), IRTemp_INVALID);
   8689                putDRegI64(rD + 1 * regstep, mkexpr(du1), IRTemp_INVALID);
   8690             } else {
   8691                assign(du0, getDRegI64(rD + 0 * regstep));
   8692                assign(du1, getDRegI64(rD + 1 * regstep));
   8693                math_INTERLEAVE_2(&di0, &di1, du0, du1, 1 << size);
   8694                storeLE(a0, mkexpr(di0));
   8695                storeLE(a1, mkexpr(di1));
   8696             }
   8697             IRTemp tmp = newTemp(Ity_I32);
   8698             assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(16)));
   8699             addr = tmp;
   8700          } else {
   8701             vassert(nregs == 4);
   8702             vassert(regstep == 1);
   8703             IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
   8704             IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
   8705             IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
   8706             IRExpr* a3  = binop(Iop_Add32, mkexpr(addr), mkU32(24));
   8707             IRTemp  di0 = newTemp(Ity_I64);
   8708             IRTemp  di1 = newTemp(Ity_I64);
   8709             IRTemp  di2 = newTemp(Ity_I64);
   8710             IRTemp  di3 = newTemp(Ity_I64);
   8711             IRTemp  du0 = newTemp(Ity_I64);
   8712             IRTemp  du1 = newTemp(Ity_I64);
   8713             IRTemp  du2 = newTemp(Ity_I64);
   8714             IRTemp  du3 = newTemp(Ity_I64);
   8715             if (bL) {
   8716                assign(di0, loadLE(Ity_I64, a0));
   8717                assign(di1, loadLE(Ity_I64, a1));
   8718                assign(di2, loadLE(Ity_I64, a2));
   8719                assign(di3, loadLE(Ity_I64, a3));
   8720                // Note spooky interleaving: du0, du2, di0, di1 etc
   8721                math_DEINTERLEAVE_2(&du0, &du2, di0, di1, 1 << size);
   8722                math_DEINTERLEAVE_2(&du1, &du3, di2, di3, 1 << size);
   8723                putDRegI64(rD + 0 * regstep, mkexpr(du0), IRTemp_INVALID);
   8724                putDRegI64(rD + 1 * regstep, mkexpr(du1), IRTemp_INVALID);
   8725                putDRegI64(rD + 2 * regstep, mkexpr(du2), IRTemp_INVALID);
   8726                putDRegI64(rD + 3 * regstep, mkexpr(du3), IRTemp_INVALID);
   8727             } else {
   8728                assign(du0, getDRegI64(rD + 0 * regstep));
   8729                assign(du1, getDRegI64(rD + 1 * regstep));
   8730                assign(du2, getDRegI64(rD + 2 * regstep));
   8731                assign(du3, getDRegI64(rD + 3 * regstep));
   8732                // Note spooky interleaving: du0, du2, di0, di1 etc
   8733                math_INTERLEAVE_2(&di0, &di1, du0, du2, 1 << size);
   8734                math_INTERLEAVE_2(&di2, &di3, du1, du3, 1 << size);
   8735                storeLE(a0, mkexpr(di0));
   8736                storeLE(a1, mkexpr(di1));
   8737                storeLE(a2, mkexpr(di2));
   8738                storeLE(a3, mkexpr(di3));
   8739             }
   8740 
   8741             IRTemp tmp = newTemp(Ity_I32);
   8742             assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(32)));
   8743             addr = tmp;
   8744          }
   8745       }
   8746       else
   8747       if (N == 2 /* 3-interleaving -- VLD3/VST3 */) {
   8748          // Dd, Dd+1, Dd+2   regs = 1, inc = 1
   8749          // Dd, Dd+2, Dd+4   regs = 1, inc = 2
   8750          vassert(regs == 1 && (inc == 1 || inc == 2));
   8751          IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
   8752          IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
   8753          IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
   8754          IRTemp  di0 = newTemp(Ity_I64);
   8755          IRTemp  di1 = newTemp(Ity_I64);
   8756          IRTemp  di2 = newTemp(Ity_I64);
   8757          IRTemp  du0 = newTemp(Ity_I64);
   8758          IRTemp  du1 = newTemp(Ity_I64);
   8759          IRTemp  du2 = newTemp(Ity_I64);
   8760          if (bL) {
   8761             assign(di0, loadLE(Ity_I64, a0));
   8762             assign(di1, loadLE(Ity_I64, a1));
   8763             assign(di2, loadLE(Ity_I64, a2));
   8764             math_DEINTERLEAVE_3(&du0, &du1, &du2, di0, di1, di2, 1 << size);
   8765             putDRegI64(rD + 0 * inc, mkexpr(du0), IRTemp_INVALID);
   8766             putDRegI64(rD + 1 * inc, mkexpr(du1), IRTemp_INVALID);
   8767             putDRegI64(rD + 2 * inc, mkexpr(du2), IRTemp_INVALID);
   8768          } else {
   8769             assign(du0, getDRegI64(rD + 0 * inc));
   8770             assign(du1, getDRegI64(rD + 1 * inc));
   8771             assign(du2, getDRegI64(rD + 2 * inc));
   8772             math_INTERLEAVE_3(&di0, &di1, &di2, du0, du1, du2, 1 << size);
   8773             storeLE(a0, mkexpr(di0));
   8774             storeLE(a1, mkexpr(di1));
   8775             storeLE(a2, mkexpr(di2));
   8776          }
   8777          IRTemp tmp = newTemp(Ity_I32);
   8778          assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(24)));
   8779          addr = tmp;
   8780       }
   8781       else
   8782       if (N == 3 /* 4-interleaving -- VLD4/VST4 */) {
   8783          // Dd, Dd+1, Dd+2, Dd+3   regs = 1, inc = 1
   8784          // Dd, Dd+2, Dd+4, Dd+6   regs = 1, inc = 2
   8785          vassert(regs == 1 && (inc == 1 || inc == 2));
   8786          IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
   8787          IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
   8788          IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
   8789          IRExpr* a3  = binop(Iop_Add32, mkexpr(addr), mkU32(24));
   8790          IRTemp  di0 = newTemp(Ity_I64);
   8791          IRTemp  di1 = newTemp(Ity_I64);
   8792          IRTemp  di2 = newTemp(Ity_I64);
   8793          IRTemp  di3 = newTemp(Ity_I64);
   8794          IRTemp  du0 = newTemp(Ity_I64);
   8795          IRTemp  du1 = newTemp(Ity_I64);
   8796          IRTemp  du2 = newTemp(Ity_I64);
   8797          IRTemp  du3 = newTemp(Ity_I64);
   8798          if (bL) {
   8799             assign(di0, loadLE(Ity_I64, a0));
   8800             assign(di1, loadLE(Ity_I64, a1));
   8801             assign(di2, loadLE(Ity_I64, a2));
   8802             assign(di3, loadLE(Ity_I64, a3));
   8803             math_DEINTERLEAVE_4(&du0, &du1, &du2, &du3,
   8804                                 di0, di1, di2, di3, 1 << size);
   8805             putDRegI64(rD + 0 * inc, mkexpr(du0), IRTemp_INVALID);
   8806             putDRegI64(rD + 1 * inc, mkexpr(du1), IRTemp_INVALID);
   8807             putDRegI64(rD + 2 * inc, mkexpr(du2), IRTemp_INVALID);
   8808             putDRegI64(rD + 3 * inc, mkexpr(du3), IRTemp_INVALID);
   8809          } else {
   8810             assign(du0, getDRegI64(rD + 0 * inc));
   8811             assign(du1, getDRegI64(rD + 1 * inc));
   8812             assign(du2, getDRegI64(rD + 2 * inc));
   8813             assign(du3, getDRegI64(rD + 3 * inc));
   8814             math_INTERLEAVE_4(&di0, &di1, &di2, &di3,
   8815                               du0, du1, du2, du3, 1 << size);
   8816             storeLE(a0, mkexpr(di0));
   8817             storeLE(a1, mkexpr(di1));
   8818             storeLE(a2, mkexpr(di2));
   8819             storeLE(a3, mkexpr(di3));
   8820          }
   8821          IRTemp tmp = newTemp(Ity_I32);
   8822          assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(32)));
   8823          addr = tmp;
   8824       }
   8825       else {
   8826          vassert(0);
   8827       }
   8828 
   8829       /* Writeback */
   8830       if (rM != 15) {
   8831          IRExpr* e;
   8832          if (rM == 13) {
   8833             e = binop(Iop_Add32, mkexpr(initialRn),
   8834                                  mkU32(8 * (N + 1) * regs));
   8835          } else {
   8836             e = binop(Iop_Add32, mkexpr(initialRn),
   8837                                  mkexpr(initialRm));
   8838          }
   8839          if (isT)
   8840             putIRegT(rN, e, IRTemp_INVALID);
   8841          else
   8842             putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
   8843       }
   8844 
   8845       DIP("v%s%u.%d {", bL ? "ld" : "st", N + 1, 8 << INSN(7,6));
   8846       if ((inc == 1 && regs * (N + 1) > 1)
   8847           || (inc == 2 && regs > 1 && N > 0)) {
   8848          DIP("d%u-d%u", rD, rD + regs * (N + 1) - 1);
   8849       } else {
   8850          UInt r;
   8851          for (r = 0; r < regs; r++) {
   8852             for (i = 0; i <= N; i++) {
   8853                if (i || r)
   8854                   DIP(", ");
   8855                DIP("d%u", rD + r + i * inc);
   8856             }
   8857          }
   8858       }
   8859       DIP("}, [r%u]", rN);
   8860       if (rM != 13 && rM != 15) {
   8861          DIP(", r%u\n", rM);
   8862       } else {
   8863          DIP("%s\n", (rM != 15) ? "!" : "");
   8864       }
   8865       return True;
   8866    }
   8867 #  undef INSN
   8868 }
   8869 
   8870 
   8871 /*------------------------------------------------------------*/
   8872 /*--- NEON, top level control                              ---*/
   8873 /*------------------------------------------------------------*/
   8874 
   8875 /* Both ARM and Thumb */
   8876 
   8877 /* Translate a NEON instruction.    If successful, returns
   8878    True and *dres may or may not be updated.  If failure, returns
   8879    False and doesn't change *dres nor create any IR.
   8880 
   8881    The Thumb and ARM encodings are similar for the 24 bottom bits, but
   8882    the top 8 bits are slightly different.  In both cases, the caller
   8883    must pass the entire 32 bits.  Callers may pass any instruction;
   8884    this ignores non-NEON ones.
   8885 
   8886    Caller must supply an IRTemp 'condT' holding the gating condition,
   8887    or IRTemp_INVALID indicating the insn is always executed.  In ARM
   8888    code, this must always be IRTemp_INVALID because NEON insns are
   8889    unconditional for ARM.
   8890 
   8891    Finally, the caller must indicate whether this occurs in ARM or in
   8892    Thumb code.
   8893 
   8894    This only handles NEON for ARMv7 and below.  The NEON extensions
   8895    for v8 are handled by decode_V8_instruction.
   8896 */
   8897 static Bool decode_NEON_instruction_ARMv7_and_below (
   8898                /*MOD*/DisResult* dres,
   8899                UInt              insn32,
   8900                IRTemp            condT,
   8901                Bool              isT
   8902             )
   8903 {
   8904 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn32, (_bMax), (_bMin))
   8905 
   8906    /* There are two kinds of instruction to deal with: load/store and
   8907       data processing.  In each case, in ARM mode we merely identify
   8908       the kind, and pass it on to the relevant sub-handler.  In Thumb
   8909       mode we identify the kind, swizzle the bits around to make it
   8910       have the same encoding as in ARM, and hand it on to the
   8911       sub-handler.
   8912    */
   8913 
   8914    /* In ARM mode, NEON instructions can't be conditional. */
   8915    if (!isT)
   8916       vassert(condT == IRTemp_INVALID);
   8917 
   8918    /* Data processing:
   8919       Thumb: 111U 1111 AAAA Axxx xxxx BBBB CCCC xxxx
   8920       ARM:   1111 001U AAAA Axxx xxxx BBBB CCCC xxxx
   8921    */
   8922    if (!isT && INSN(31,25) == BITS7(1,1,1,1,0,0,1)) {
   8923       // ARM, DP
   8924       return dis_neon_data_processing(INSN(31,0), condT);
   8925    }
   8926    if (isT && INSN(31,29) == BITS3(1,1,1)
   8927        && INSN(27,24) == BITS4(1,1,1,1)) {
   8928       // Thumb, DP
   8929       UInt reformatted = INSN(23,0);
   8930       reformatted |= (((UInt)INSN(28,28)) << 24); // U bit
   8931       reformatted |= (((UInt)BITS7(1,1,1,1,0,0,1)) << 25);
   8932       return dis_neon_data_processing(reformatted, condT);
   8933    }
   8934 
   8935    /* Load/store:
   8936       Thumb: 1111 1001 AxL0 xxxx xxxx BBBB xxxx xxxx
   8937       ARM:   1111 0100 AxL0 xxxx xxxx BBBB xxxx xxxx
   8938    */
   8939    if (!isT && INSN(31,24) == BITS8(1,1,1,1,0,1,0,0)) {
   8940       // ARM, memory
   8941       return dis_neon_load_or_store(INSN(31,0), isT, condT);
   8942    }
   8943    if (isT && INSN(31,24) == BITS8(1,1,1,1,1,0,0,1)) {
   8944       UInt reformatted = INSN(23,0);
   8945       reformatted |= (((UInt)BITS8(1,1,1,1,0,1,0,0)) << 24);
   8946       return dis_neon_load_or_store(reformatted, isT, condT);
   8947    }
   8948 
   8949    /* Doesn't match. */
   8950    return False;
   8951 
   8952 #  undef INSN
   8953 }
   8954 
   8955 
   8956 /*------------------------------------------------------------*/
   8957 /*--- V6 MEDIA instructions                                ---*/
   8958 /*------------------------------------------------------------*/
   8959 
   8960 /* Both ARM and Thumb */
   8961 
   8962 /* Translate a V6 media instruction.    If successful, returns
   8963    True and *dres may or may not be updated.  If failure, returns
   8964    False and doesn't change *dres nor create any IR.
   8965 
   8966    The Thumb and ARM encodings are completely different.  In Thumb
   8967    mode, the caller must pass the entire 32 bits.  In ARM mode it must
   8968    pass the lower 28 bits.  Apart from that, callers may pass any
   8969    instruction; this function ignores anything it doesn't recognise.
   8970 
   8971    Caller must supply an IRTemp 'condT' holding the gating condition,
   8972    or IRTemp_INVALID indicating the insn is always executed.
   8973 
   8974    Caller must also supply an ARMCondcode 'conq'.  This is only used
   8975    for debug printing, no other purpose.  For ARM, this is simply the
   8976    top 4 bits of the original instruction.  For Thumb, the condition
   8977    is not (really) known until run time, and so ARMCondAL should be
   8978    passed, only so that printing of these instructions does not show
   8979    any condition.
   8980 
   8981    Finally, the caller must indicate whether this occurs in ARM or in
   8982    Thumb code.
   8983 */
   8984 static Bool decode_V6MEDIA_instruction (
   8985                /*MOD*/DisResult* dres,
   8986                UInt              insnv6m,
   8987                IRTemp            condT,
   8988                ARMCondcode       conq,
   8989                Bool              isT
   8990             )
   8991 {
   8992 #  define INSNA(_bMax,_bMin)   SLICE_UInt(insnv6m, (_bMax), (_bMin))
   8993 #  define INSNT0(_bMax,_bMin)  SLICE_UInt( ((insnv6m >> 16) & 0xFFFF), \
   8994                                            (_bMax), (_bMin) )
   8995 #  define INSNT1(_bMax,_bMin)  SLICE_UInt( ((insnv6m >> 0)  & 0xFFFF), \
   8996                                            (_bMax), (_bMin) )
   8997    HChar dis_buf[128];
   8998    dis_buf[0] = 0;
   8999 
   9000    if (isT) {
   9001       vassert(conq == ARMCondAL);
   9002    } else {
   9003       vassert(INSNA(31,28) == BITS4(0,0,0,0)); // caller's obligation
   9004       vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
   9005    }
   9006 
   9007    /* ----------- smulbb, smulbt, smultb, smultt ----------- */
   9008    {
   9009      UInt regD = 99, regM = 99, regN = 99, bitM = 0, bitN = 0;
   9010      Bool gate = False;
   9011 
   9012      if (isT) {
   9013         if (INSNT0(15,4) == 0xFB1 && INSNT1(15,12) == BITS4(1,1,1,1)
   9014             && INSNT1(7,6) == BITS2(0,0)) {
   9015            regD = INSNT1(11,8);
   9016            regM = INSNT1(3,0);
   9017            regN = INSNT0(3,0);
   9018            bitM = INSNT1(4,4);
   9019            bitN = INSNT1(5,5);
   9020            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9021               gate = True;
   9022         }
   9023      } else {
   9024         if (BITS8(0,0,0,1,0,1,1,0) == INSNA(27,20) &&
   9025             BITS4(0,0,0,0)         == INSNA(15,12) &&
   9026             BITS4(1,0,0,0)         == (INSNA(7,4) & BITS4(1,0,0,1)) ) {
   9027            regD = INSNA(19,16);
   9028            regM = INSNA(11,8);
   9029            regN = INSNA(3,0);
   9030            bitM = INSNA(6,6);
   9031            bitN = INSNA(5,5);
   9032            if (regD != 15 && regN != 15 && regM != 15)
   9033               gate = True;
   9034         }
   9035      }
   9036 
   9037      if (gate) {
   9038         IRTemp srcN = newTemp(Ity_I32);
   9039         IRTemp srcM = newTemp(Ity_I32);
   9040         IRTemp res  = newTemp(Ity_I32);
   9041 
   9042         assign( srcN, binop(Iop_Sar32,
   9043                             binop(Iop_Shl32,
   9044                                   isT ? getIRegT(regN) : getIRegA(regN),
   9045                                   mkU8(bitN ? 0 : 16)), mkU8(16)) );
   9046         assign( srcM, binop(Iop_Sar32,
   9047                             binop(Iop_Shl32,
   9048                                   isT ? getIRegT(regM) : getIRegA(regM),
   9049                                   mkU8(bitM ? 0 : 16)), mkU8(16)) );
   9050         assign( res, binop(Iop_Mul32, mkexpr(srcN), mkexpr(srcM)) );
   9051 
   9052         if (isT)
   9053            putIRegT( regD, mkexpr(res), condT );
   9054         else
   9055            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9056 
   9057         DIP( "smul%c%c%s r%u, r%u, r%u\n", bitN ? 't' : 'b', bitM ? 't' : 'b',
   9058              nCC(conq), regD, regN, regM );
   9059         return True;
   9060      }
   9061      /* fall through */
   9062    }
   9063 
   9064    /* ------------ smulwb<y><c> <Rd>,<Rn>,<Rm> ------------- */
   9065    /* ------------ smulwt<y><c> <Rd>,<Rn>,<Rm> ------------- */
   9066    {
   9067      UInt regD = 99, regN = 99, regM = 99, bitM = 0;
   9068      Bool gate = False;
   9069 
   9070      if (isT) {
   9071         if (INSNT0(15,4) == 0xFB3 && INSNT1(15,12) == BITS4(1,1,1,1)
   9072             && INSNT1(7,5) == BITS3(0,0,0)) {
   9073           regN = INSNT0(3,0);
   9074           regD = INSNT1(11,8);
   9075           regM = INSNT1(3,0);
   9076           bitM = INSNT1(4,4);
   9077           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9078              gate = True;
   9079         }
   9080      } else {
   9081         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
   9082             INSNA(15,12) == BITS4(0,0,0,0)         &&
   9083             (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,1,0)) {
   9084            regD = INSNA(19,16);
   9085            regN = INSNA(3,0);
   9086            regM = INSNA(11,8);
   9087            bitM = INSNA(6,6);
   9088            if (regD != 15 && regN != 15 && regM != 15)
   9089               gate = True;
   9090         }
   9091      }
   9092 
   9093      if (gate) {
   9094         IRTemp irt_prod = newTemp(Ity_I64);
   9095 
   9096         assign( irt_prod,
   9097                 binop(Iop_MullS32,
   9098                       isT ? getIRegT(regN) : getIRegA(regN),
   9099                       binop(Iop_Sar32,
   9100                             binop(Iop_Shl32,
   9101                                   isT ? getIRegT(regM) : getIRegA(regM),
   9102                                   mkU8(bitM ? 0 : 16)),
   9103                             mkU8(16))) );
   9104 
   9105         IRExpr* ire_result = binop(Iop_Or32,
   9106                                    binop( Iop_Shl32,
   9107                                           unop(Iop_64HIto32, mkexpr(irt_prod)),
   9108                                           mkU8(16) ),
   9109                                    binop( Iop_Shr32,
   9110                                           unop(Iop_64to32, mkexpr(irt_prod)),
   9111                                           mkU8(16) ) );
   9112 
   9113         if (isT)
   9114            putIRegT( regD, ire_result, condT );
   9115         else
   9116            putIRegA( regD, ire_result, condT, Ijk_Boring );
   9117 
   9118         DIP("smulw%c%s r%u, r%u, r%u\n",
   9119             bitM ? 't' : 'b', nCC(conq),regD,regN,regM);
   9120         return True;
   9121      }
   9122      /* fall through */
   9123    }
   9124 
   9125    /* ------------ pkhbt<c> Rd, Rn, Rm {,LSL #imm} ------------- */
   9126    /* ------------ pkhtb<c> Rd, Rn, Rm {,ASR #imm} ------------- */
   9127    {
   9128      UInt regD = 99, regN = 99, regM = 99, imm5 = 99, shift_type = 99;
   9129      Bool tbform = False;
   9130      Bool gate = False;
   9131 
   9132      if (isT) {
   9133         if (INSNT0(15,4) == 0xEAC
   9134             && INSNT1(15,15) == 0 && INSNT1(4,4) == 0) {
   9135            regN = INSNT0(3,0);
   9136            regD = INSNT1(11,8);
   9137            regM = INSNT1(3,0);
   9138            imm5 = (INSNT1(14,12) << 2) | INSNT1(7,6);
   9139            shift_type = (INSNT1(5,5) << 1) | 0;
   9140            tbform = (INSNT1(5,5) == 0) ? False : True;
   9141            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9142               gate = True;
   9143         }
   9144      } else {
   9145         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
   9146             INSNA(5,4)   == BITS2(0,1)             &&
   9147             (INSNA(6,6)  == 0 || INSNA(6,6) == 1) ) {
   9148            regD = INSNA(15,12);
   9149            regN = INSNA(19,16);
   9150            regM = INSNA(3,0);
   9151            imm5 = INSNA(11,7);
   9152            shift_type = (INSNA(6,6) << 1) | 0;
   9153            tbform = (INSNA(6,6) == 0) ? False : True;
   9154            if (regD != 15 && regN != 15 && regM != 15)
   9155               gate = True;
   9156         }
   9157      }
   9158 
   9159      if (gate) {
   9160         IRTemp irt_regM       = newTemp(Ity_I32);
   9161         IRTemp irt_regM_shift = newTemp(Ity_I32);
   9162         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   9163         compute_result_and_C_after_shift_by_imm5(
   9164            dis_buf, &irt_regM_shift, NULL, irt_regM, shift_type, imm5, regM );
   9165 
   9166         UInt mask = (tbform == True) ? 0x0000FFFF : 0xFFFF0000;
   9167         IRExpr* ire_result
   9168           = binop( Iop_Or32,
   9169                    binop(Iop_And32, mkexpr(irt_regM_shift), mkU32(mask)),
   9170                    binop(Iop_And32, isT ? getIRegT(regN) : getIRegA(regN),
   9171                                     unop(Iop_Not32, mkU32(mask))) );
   9172 
   9173         if (isT)
   9174            putIRegT( regD, ire_result, condT );
   9175         else
   9176            putIRegA( regD, ire_result, condT, Ijk_Boring );
   9177 
   9178         DIP( "pkh%s%s r%u, r%u, r%u %s\n", tbform ? "tb" : "bt",
   9179              nCC(conq), regD, regN, regM, dis_buf );
   9180 
   9181         return True;
   9182      }
   9183      /* fall through */
   9184    }
   9185 
   9186    /* ---------- usat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
   9187    {
   9188      UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
   9189      Bool gate = False;
   9190 
   9191      if (isT) {
   9192         if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,1,0)
   9193             && INSNT0(4,4) == 0
   9194             && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
   9195            regD       = INSNT1(11,8);
   9196            regN       = INSNT0(3,0);
   9197            shift_type = (INSNT0(5,5) << 1) | 0;
   9198            imm5       = (INSNT1(14,12) << 2) | INSNT1(7,6);
   9199            sat_imm    = INSNT1(4,0);
   9200            if (!isBadRegT(regD) && !isBadRegT(regN))
   9201               gate = True;
   9202            if (shift_type == BITS2(1,0) && imm5 == 0)
   9203               gate = False;
   9204         }
   9205      } else {
   9206         if (INSNA(27,21) == BITS7(0,1,1,0,1,1,1) &&
   9207             INSNA(5,4)   == BITS2(0,1)) {
   9208            regD       = INSNA(15,12);
   9209            regN       = INSNA(3,0);
   9210            shift_type = (INSNA(6,6) << 1) | 0;
   9211            imm5       = INSNA(11,7);
   9212            sat_imm    = INSNA(20,16);
   9213            if (regD != 15 && regN != 15)
   9214               gate = True;
   9215         }
   9216      }
   9217 
   9218      if (gate) {
   9219         IRTemp irt_regN       = newTemp(Ity_I32);
   9220         IRTemp irt_regN_shift = newTemp(Ity_I32);
   9221         IRTemp irt_sat_Q      = newTemp(Ity_I32);
   9222         IRTemp irt_result     = newTemp(Ity_I32);
   9223 
   9224         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   9225         compute_result_and_C_after_shift_by_imm5(
   9226                 dis_buf, &irt_regN_shift, NULL,
   9227                 irt_regN, shift_type, imm5, regN );
   9228 
   9229         armUnsignedSatQ( &irt_result, &irt_sat_Q, irt_regN_shift, sat_imm );
   9230         or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
   9231 
   9232         if (isT)
   9233            putIRegT( regD, mkexpr(irt_result), condT );
   9234         else
   9235            putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
   9236 
   9237         DIP("usat%s r%u, #0x%04x, %s\n",
   9238             nCC(conq), regD, imm5, dis_buf);
   9239         return True;
   9240      }
   9241      /* fall through */
   9242    }
   9243 
   9244   /* ----------- ssat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
   9245    {
   9246      UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
   9247      Bool gate = False;
   9248 
   9249      if (isT) {
   9250         if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,0,0)
   9251             && INSNT0(4,4) == 0
   9252             && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
   9253            regD       = INSNT1(11,8);
   9254            regN       = INSNT0(3,0);
   9255            shift_type = (INSNT0(5,5) << 1) | 0;
   9256            imm5       = (INSNT1(14,12) << 2) | INSNT1(7,6);
   9257            sat_imm    = INSNT1(4,0) + 1;
   9258            if (!isBadRegT(regD) && !isBadRegT(regN))
   9259               gate = True;
   9260            if (shift_type == BITS2(1,0) && imm5 == 0)
   9261               gate = False;
   9262         }
   9263      } else {
   9264         if (INSNA(27,21) == BITS7(0,1,1,0,1,0,1) &&
   9265             INSNA(5,4)   == BITS2(0,1)) {
   9266            regD       = INSNA(15,12);
   9267            regN       = INSNA(3,0);
   9268            shift_type = (INSNA(6,6) << 1) | 0;
   9269            imm5       = INSNA(11,7);
   9270            sat_imm    = INSNA(20,16) + 1;
   9271            if (regD != 15 && regN != 15)
   9272               gate = True;
   9273         }
   9274      }
   9275 
   9276      if (gate) {
   9277         IRTemp irt_regN       = newTemp(Ity_I32);
   9278         IRTemp irt_regN_shift = newTemp(Ity_I32);
   9279         IRTemp irt_sat_Q      = newTemp(Ity_I32);
   9280         IRTemp irt_result     = newTemp(Ity_I32);
   9281 
   9282         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   9283         compute_result_and_C_after_shift_by_imm5(
   9284                 dis_buf, &irt_regN_shift, NULL,
   9285                 irt_regN, shift_type, imm5, regN );
   9286 
   9287         armSignedSatQ( irt_regN_shift, sat_imm, &irt_result, &irt_sat_Q );
   9288         or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
   9289 
   9290         if (isT)
   9291            putIRegT( regD, mkexpr(irt_result), condT );
   9292         else
   9293            putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
   9294 
   9295         DIP( "ssat%s r%u, #0x%04x, %s\n",
   9296              nCC(conq), regD, imm5, dis_buf);
   9297         return True;
   9298     }
   9299     /* fall through */
   9300   }
   9301 
   9302    /* ----------- ssat16<c> <Rd>,#<imm>,<Rn> ----------- */
   9303    {
   9304      UInt regD = 99, regN = 99, sat_imm = 99;
   9305      Bool gate = False;
   9306 
   9307      if (isT) {
   9308         if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,0,0)
   9309             && INSNT0(5,4) == BITS2(1,0)
   9310             && INSNT1(15,12) == BITS4(0,0,0,0)
   9311             && INSNT1(7,4) == BITS4(0,0,0,0)) {
   9312            regD       = INSNT1(11,8);
   9313            regN       = INSNT0(3,0);
   9314            sat_imm    = INSNT1(3,0) + 1;
   9315            if (!isBadRegT(regD) && !isBadRegT(regN))
   9316               gate = True;
   9317         }
   9318      } else {
   9319         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,1,0) &&
   9320             INSNA(11,4)   == BITS8(1,1,1,1,0,0,1,1)) {
   9321            regD       = INSNA(15,12);
   9322            regN       = INSNA(3,0);
   9323            sat_imm    = INSNA(19,16) + 1;
   9324            if (regD != 15 && regN != 15)
   9325               gate = True;
   9326         }
   9327      }
   9328 
   9329      if (gate) {
   9330         IRTemp irt_regN    = newTemp(Ity_I32);
   9331         IRTemp irt_regN_lo = newTemp(Ity_I32);
   9332         IRTemp irt_regN_hi = newTemp(Ity_I32);
   9333         IRTemp irt_Q_lo    = newTemp(Ity_I32);
   9334         IRTemp irt_Q_hi    = newTemp(Ity_I32);
   9335         IRTemp irt_res_lo  = newTemp(Ity_I32);
   9336         IRTemp irt_res_hi  = newTemp(Ity_I32);
   9337 
   9338         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   9339         assign( irt_regN_lo,
   9340                 binop( Iop_Sar32,
   9341                        binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
   9342                        mkU8(16)) );
   9343         assign( irt_regN_hi, binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)) );
   9344 
   9345         armSignedSatQ( irt_regN_lo, sat_imm, &irt_res_lo, &irt_Q_lo );
   9346         or_into_QFLAG32( mkexpr(irt_Q_lo), condT );
   9347 
   9348         armSignedSatQ( irt_regN_hi, sat_imm, &irt_res_hi, &irt_Q_hi );
   9349         or_into_QFLAG32( mkexpr(irt_Q_hi), condT );
   9350 
   9351         IRExpr* ire_result
   9352            = binop(Iop_Or32,
   9353                    binop(Iop_And32, mkexpr(irt_res_lo), mkU32(0xFFFF)),
   9354                    binop(Iop_Shl32, mkexpr(irt_res_hi), mkU8(16)));
   9355         if (isT)
   9356            putIRegT( regD, ire_result, condT );
   9357         else
   9358            putIRegA( regD, ire_result, condT, Ijk_Boring );
   9359 
   9360         DIP( "ssat16%s r%u, #0x%04x, r%u\n", nCC(conq), regD, sat_imm, regN );
   9361         return True;
   9362      }
   9363      /* fall through */
   9364    }
   9365 
   9366    /* -------------- usat16<c> <Rd>,#<imm4>,<Rn> --------------- */
   9367    {
   9368      UInt regD = 99, regN = 99, sat_imm = 99;
   9369      Bool gate = False;
   9370 
   9371      if (isT) {
   9372         if (INSNT0(15,4) == 0xF3A && (INSNT1(15,0) & 0xF0F0) == 0x0000) {
   9373            regN = INSNT0(3,0);
   9374            regD = INSNT1(11,8);
   9375            sat_imm = INSNT1(3,0);
   9376            if (!isBadRegT(regD) && !isBadRegT(regN))
   9377               gate = True;
   9378        }
   9379      } else {
   9380         if (INSNA(27,20) == BITS8(0,1,1,0,1,1,1,0) &&
   9381             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9382             INSNA(7,4)   == BITS4(0,0,1,1)) {
   9383            regD    = INSNA(15,12);
   9384            regN    = INSNA(3,0);
   9385            sat_imm = INSNA(19,16);
   9386            if (regD != 15 && regN != 15)
   9387               gate = True;
   9388         }
   9389      }
   9390 
   9391      if (gate) {
   9392         IRTemp irt_regN    = newTemp(Ity_I32);
   9393         IRTemp irt_regN_lo = newTemp(Ity_I32);
   9394         IRTemp irt_regN_hi = newTemp(Ity_I32);
   9395         IRTemp irt_Q_lo    = newTemp(Ity_I32);
   9396         IRTemp irt_Q_hi    = newTemp(Ity_I32);
   9397         IRTemp irt_res_lo  = newTemp(Ity_I32);
   9398         IRTemp irt_res_hi  = newTemp(Ity_I32);
   9399 
   9400         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   9401         assign( irt_regN_lo, binop( Iop_Sar32,
   9402                                     binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
   9403                                     mkU8(16)) );
   9404         assign( irt_regN_hi, binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)) );
   9405 
   9406         armUnsignedSatQ( &irt_res_lo, &irt_Q_lo, irt_regN_lo, sat_imm );
   9407         or_into_QFLAG32( mkexpr(irt_Q_lo), condT );
   9408 
   9409         armUnsignedSatQ( &irt_res_hi, &irt_Q_hi, irt_regN_hi, sat_imm );
   9410         or_into_QFLAG32( mkexpr(irt_Q_hi), condT );
   9411 
   9412         IRExpr* ire_result = binop( Iop_Or32,
   9413                                     binop(Iop_Shl32, mkexpr(irt_res_hi), mkU8(16)),
   9414                                     mkexpr(irt_res_lo) );
   9415 
   9416         if (isT)
   9417            putIRegT( regD, ire_result, condT );
   9418         else
   9419            putIRegA( regD, ire_result, condT, Ijk_Boring );
   9420 
   9421         DIP( "usat16%s r%u, #0x%04x, r%u\n", nCC(conq), regD, sat_imm, regN );
   9422         return True;
   9423      }
   9424      /* fall through */
   9425    }
   9426 
   9427    /* -------------- uadd16<c> <Rd>,<Rn>,<Rm> -------------- */
   9428    {
   9429      UInt regD = 99, regN = 99, regM = 99;
   9430      Bool gate = False;
   9431 
   9432      if (isT) {
   9433         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
   9434            regN = INSNT0(3,0);
   9435            regD = INSNT1(11,8);
   9436            regM = INSNT1(3,0);
   9437            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9438               gate = True;
   9439         }
   9440      } else {
   9441         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
   9442             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9443             INSNA(7,4)   == BITS4(0,0,0,1)) {
   9444            regD = INSNA(15,12);
   9445            regN = INSNA(19,16);
   9446            regM = INSNA(3,0);
   9447            if (regD != 15 && regN != 15 && regM != 15)
   9448               gate = True;
   9449         }
   9450      }
   9451 
   9452      if (gate) {
   9453         IRTemp rNt  = newTemp(Ity_I32);
   9454         IRTemp rMt  = newTemp(Ity_I32);
   9455         IRTemp res  = newTemp(Ity_I32);
   9456         IRTemp reso = newTemp(Ity_I32);
   9457 
   9458         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9459         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9460 
   9461         assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
   9462         if (isT)
   9463            putIRegT( regD, mkexpr(res), condT );
   9464         else
   9465            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9466 
   9467         assign(reso, binop(Iop_HAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
   9468         set_GE_32_10_from_bits_31_15(reso, condT);
   9469 
   9470         DIP("uadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9471         return True;
   9472      }
   9473      /* fall through */
   9474    }
   9475 
   9476    /* -------------- sadd16<c> <Rd>,<Rn>,<Rm> -------------- */
   9477    {
   9478      UInt regD = 99, regN = 99, regM = 99;
   9479      Bool gate = False;
   9480 
   9481      if (isT) {
   9482         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
   9483            regN = INSNT0(3,0);
   9484            regD = INSNT1(11,8);
   9485            regM = INSNT1(3,0);
   9486            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9487               gate = True;
   9488         }
   9489      } else {
   9490         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
   9491             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9492             INSNA(7,4)   == BITS4(0,0,0,1)) {
   9493            regD = INSNA(15,12);
   9494            regN = INSNA(19,16);
   9495            regM = INSNA(3,0);
   9496            if (regD != 15 && regN != 15 && regM != 15)
   9497               gate = True;
   9498         }
   9499      }
   9500 
   9501      if (gate) {
   9502         IRTemp rNt  = newTemp(Ity_I32);
   9503         IRTemp rMt  = newTemp(Ity_I32);
   9504         IRTemp res  = newTemp(Ity_I32);
   9505         IRTemp reso = newTemp(Ity_I32);
   9506 
   9507         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9508         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9509 
   9510         assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
   9511         if (isT)
   9512            putIRegT( regD, mkexpr(res), condT );
   9513         else
   9514            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9515 
   9516         assign(reso, unop(Iop_Not32,
   9517                           binop(Iop_HAdd16Sx2, mkexpr(rNt), mkexpr(rMt))));
   9518         set_GE_32_10_from_bits_31_15(reso, condT);
   9519 
   9520         DIP("sadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9521         return True;
   9522      }
   9523      /* fall through */
   9524    }
   9525 
   9526    /* ---------------- usub16<c> <Rd>,<Rn>,<Rm> ---------------- */
   9527    {
   9528      UInt regD = 99, regN = 99, regM = 99;
   9529      Bool gate = False;
   9530 
   9531      if (isT) {
   9532         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
   9533            regN = INSNT0(3,0);
   9534            regD = INSNT1(11,8);
   9535            regM = INSNT1(3,0);
   9536            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9537               gate = True;
   9538         }
   9539      } else {
   9540         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
   9541             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9542             INSNA(7,4)   == BITS4(0,1,1,1)) {
   9543            regD = INSNA(15,12);
   9544            regN = INSNA(19,16);
   9545            regM = INSNA(3,0);
   9546            if (regD != 15 && regN != 15 && regM != 15)
   9547              gate = True;
   9548         }
   9549      }
   9550 
   9551      if (gate) {
   9552         IRTemp rNt  = newTemp(Ity_I32);
   9553         IRTemp rMt  = newTemp(Ity_I32);
   9554         IRTemp res  = newTemp(Ity_I32);
   9555         IRTemp reso = newTemp(Ity_I32);
   9556 
   9557         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9558         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9559 
   9560         assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
   9561         if (isT)
   9562            putIRegT( regD, mkexpr(res), condT );
   9563         else
   9564            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9565 
   9566         assign(reso, unop(Iop_Not32,
   9567                           binop(Iop_HSub16Ux2, mkexpr(rNt), mkexpr(rMt))));
   9568         set_GE_32_10_from_bits_31_15(reso, condT);
   9569 
   9570         DIP("usub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9571         return True;
   9572      }
   9573      /* fall through */
   9574    }
   9575 
   9576    /* -------------- ssub16<c> <Rd>,<Rn>,<Rm> -------------- */
   9577    {
   9578      UInt regD = 99, regN = 99, regM = 99;
   9579      Bool gate = False;
   9580 
   9581      if (isT) {
   9582         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
   9583            regN = INSNT0(3,0);
   9584            regD = INSNT1(11,8);
   9585            regM = INSNT1(3,0);
   9586            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9587               gate = True;
   9588         }
   9589      } else {
   9590         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
   9591             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9592             INSNA(7,4)   == BITS4(0,1,1,1)) {
   9593            regD = INSNA(15,12);
   9594            regN = INSNA(19,16);
   9595            regM = INSNA(3,0);
   9596            if (regD != 15 && regN != 15 && regM != 15)
   9597               gate = True;
   9598         }
   9599      }
   9600 
   9601      if (gate) {
   9602         IRTemp rNt  = newTemp(Ity_I32);
   9603         IRTemp rMt  = newTemp(Ity_I32);
   9604         IRTemp res  = newTemp(Ity_I32);
   9605         IRTemp reso = newTemp(Ity_I32);
   9606 
   9607         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9608         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9609 
   9610         assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
   9611         if (isT)
   9612            putIRegT( regD, mkexpr(res), condT );
   9613         else
   9614            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9615 
   9616         assign(reso, unop(Iop_Not32,
   9617                           binop(Iop_HSub16Sx2, mkexpr(rNt), mkexpr(rMt))));
   9618         set_GE_32_10_from_bits_31_15(reso, condT);
   9619 
   9620         DIP("ssub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9621         return True;
   9622      }
   9623      /* fall through */
   9624    }
   9625 
   9626    /* ----------------- uadd8<c> <Rd>,<Rn>,<Rm> ---------------- */
   9627    {
   9628      UInt regD = 99, regN = 99, regM = 99;
   9629      Bool gate = False;
   9630 
   9631      if (isT) {
   9632         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
   9633            regN = INSNT0(3,0);
   9634            regD = INSNT1(11,8);
   9635            regM = INSNT1(3,0);
   9636            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9637               gate = True;
   9638         }
   9639      } else {
   9640         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
   9641             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9642             (INSNA(7,4)  == BITS4(1,0,0,1))) {
   9643            regD = INSNA(15,12);
   9644            regN = INSNA(19,16);
   9645            regM = INSNA(3,0);
   9646            if (regD != 15 && regN != 15 && regM != 15)
   9647               gate = True;
   9648         }
   9649      }
   9650 
   9651      if (gate) {
   9652         IRTemp rNt  = newTemp(Ity_I32);
   9653         IRTemp rMt  = newTemp(Ity_I32);
   9654         IRTemp res  = newTemp(Ity_I32);
   9655         IRTemp reso = newTemp(Ity_I32);
   9656 
   9657         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9658         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9659 
   9660         assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
   9661         if (isT)
   9662            putIRegT( regD, mkexpr(res), condT );
   9663         else
   9664            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9665 
   9666         assign(reso, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
   9667         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
   9668 
   9669         DIP("uadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9670         return True;
   9671      }
   9672      /* fall through */
   9673    }
   9674 
   9675    /* ------------------- sadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
   9676    {
   9677      UInt regD = 99, regN = 99, regM = 99;
   9678      Bool gate = False;
   9679 
   9680      if (isT) {
   9681         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
   9682            regN = INSNT0(3,0);
   9683            regD = INSNT1(11,8);
   9684            regM = INSNT1(3,0);
   9685            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9686               gate = True;
   9687         }
   9688      } else {
   9689         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
   9690             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9691             (INSNA(7,4)  == BITS4(1,0,0,1))) {
   9692            regD = INSNA(15,12);
   9693            regN = INSNA(19,16);
   9694            regM = INSNA(3,0);
   9695            if (regD != 15 && regN != 15 && regM != 15)
   9696               gate = True;
   9697         }
   9698      }
   9699 
   9700      if (gate) {
   9701         IRTemp rNt  = newTemp(Ity_I32);
   9702         IRTemp rMt  = newTemp(Ity_I32);
   9703         IRTemp res  = newTemp(Ity_I32);
   9704         IRTemp reso = newTemp(Ity_I32);
   9705 
   9706         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9707         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9708 
   9709         assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
   9710         if (isT)
   9711            putIRegT( regD, mkexpr(res), condT );
   9712         else
   9713            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9714 
   9715         assign(reso, unop(Iop_Not32,
   9716                           binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt))));
   9717         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
   9718 
   9719         DIP("sadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9720         return True;
   9721      }
   9722      /* fall through */
   9723    }
   9724 
   9725    /* ------------------- usub8<c> <Rd>,<Rn>,<Rm> ------------------ */
   9726    {
   9727      UInt regD = 99, regN = 99, regM = 99;
   9728      Bool gate = False;
   9729 
   9730      if (isT) {
   9731         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
   9732            regN = INSNT0(3,0);
   9733            regD = INSNT1(11,8);
   9734            regM = INSNT1(3,0);
   9735            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9736               gate = True;
   9737         }
   9738      } else {
   9739         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
   9740             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9741             (INSNA(7,4)  == BITS4(1,1,1,1))) {
   9742            regD = INSNA(15,12);
   9743            regN = INSNA(19,16);
   9744            regM = INSNA(3,0);
   9745            if (regD != 15 && regN != 15 && regM != 15)
   9746              gate = True;
   9747         }
   9748      }
   9749 
   9750      if (gate) {
   9751         IRTemp rNt  = newTemp(Ity_I32);
   9752         IRTemp rMt  = newTemp(Ity_I32);
   9753         IRTemp res  = newTemp(Ity_I32);
   9754         IRTemp reso = newTemp(Ity_I32);
   9755 
   9756         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9757         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9758 
   9759         assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
   9760         if (isT)
   9761            putIRegT( regD, mkexpr(res), condT );
   9762         else
   9763            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9764 
   9765         assign(reso, unop(Iop_Not32,
   9766                           binop(Iop_HSub8Ux4, mkexpr(rNt), mkexpr(rMt))));
   9767         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
   9768 
   9769         DIP("usub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9770         return True;
   9771      }
   9772      /* fall through */
   9773    }
   9774 
   9775    /* ------------------- ssub8<c> <Rd>,<Rn>,<Rm> ------------------ */
   9776    {
   9777      UInt regD = 99, regN = 99, regM = 99;
   9778      Bool gate = False;
   9779 
   9780      if (isT) {
   9781         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
   9782            regN = INSNT0(3,0);
   9783            regD = INSNT1(11,8);
   9784            regM = INSNT1(3,0);
   9785            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9786               gate = True;
   9787         }
   9788      } else {
   9789         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
   9790             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9791             INSNA(7,4)   == BITS4(1,1,1,1)) {
   9792            regD = INSNA(15,12);
   9793            regN = INSNA(19,16);
   9794            regM = INSNA(3,0);
   9795            if (regD != 15 && regN != 15 && regM != 15)
   9796               gate = True;
   9797         }
   9798      }
   9799 
   9800      if (gate) {
   9801         IRTemp rNt  = newTemp(Ity_I32);
   9802         IRTemp rMt  = newTemp(Ity_I32);
   9803         IRTemp res  = newTemp(Ity_I32);
   9804         IRTemp reso = newTemp(Ity_I32);
   9805 
   9806         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9807         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9808 
   9809         assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
   9810         if (isT)
   9811            putIRegT( regD, mkexpr(res), condT );
   9812         else
   9813            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
   9814 
   9815         assign(reso, unop(Iop_Not32,
   9816                           binop(Iop_HSub8Sx4, mkexpr(rNt), mkexpr(rMt))));
   9817         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
   9818 
   9819         DIP("ssub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9820         return True;
   9821      }
   9822      /* fall through */
   9823    }
   9824 
   9825    /* ------------------ qadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
   9826    {
   9827      UInt regD = 99, regN = 99, regM = 99;
   9828      Bool gate = False;
   9829 
   9830      if (isT) {
   9831         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
   9832            regN = INSNT0(3,0);
   9833            regD = INSNT1(11,8);
   9834            regM = INSNT1(3,0);
   9835            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9836               gate = True;
   9837         }
   9838      } else {
   9839         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
   9840             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9841             INSNA(7,4)   == BITS4(1,0,0,1)) {
   9842            regD = INSNA(15,12);
   9843            regN = INSNA(19,16);
   9844            regM = INSNA(3,0);
   9845            if (regD != 15 && regN != 15 && regM != 15)
   9846               gate = True;
   9847         }
   9848      }
   9849 
   9850      if (gate) {
   9851         IRTemp rNt   = newTemp(Ity_I32);
   9852         IRTemp rMt   = newTemp(Ity_I32);
   9853         IRTemp res_q = newTemp(Ity_I32);
   9854 
   9855         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9856         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9857 
   9858         assign(res_q, binop(Iop_QAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
   9859         if (isT)
   9860            putIRegT( regD, mkexpr(res_q), condT );
   9861         else
   9862            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   9863 
   9864         DIP("qadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9865         return True;
   9866      }
   9867      /* fall through */
   9868    }
   9869 
   9870    /* ------------------ qsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
   9871    {
   9872      UInt regD = 99, regN = 99, regM = 99;
   9873      Bool gate = False;
   9874 
   9875      if (isT) {
   9876         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
   9877            regN = INSNT0(3,0);
   9878            regD = INSNT1(11,8);
   9879            regM = INSNT1(3,0);
   9880            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9881               gate = True;
   9882         }
   9883      } else {
   9884         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
   9885             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9886             INSNA(7,4)   == BITS4(1,1,1,1)) {
   9887            regD = INSNA(15,12);
   9888            regN = INSNA(19,16);
   9889            regM = INSNA(3,0);
   9890            if (regD != 15 && regN != 15 && regM != 15)
   9891               gate = True;
   9892         }
   9893      }
   9894 
   9895      if (gate) {
   9896         IRTemp rNt   = newTemp(Ity_I32);
   9897         IRTemp rMt   = newTemp(Ity_I32);
   9898         IRTemp res_q = newTemp(Ity_I32);
   9899 
   9900         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9901         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9902 
   9903         assign(res_q, binop(Iop_QSub8Sx4, mkexpr(rNt), mkexpr(rMt)));
   9904         if (isT)
   9905            putIRegT( regD, mkexpr(res_q), condT );
   9906         else
   9907            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   9908 
   9909         DIP("qsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9910         return True;
   9911      }
   9912      /* fall through */
   9913    }
   9914 
   9915    /* ------------------ uqadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
   9916    {
   9917      UInt regD = 99, regN = 99, regM = 99;
   9918      Bool gate = False;
   9919 
   9920      if (isT) {
   9921         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
   9922            regN = INSNT0(3,0);
   9923            regD = INSNT1(11,8);
   9924            regM = INSNT1(3,0);
   9925            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9926               gate = True;
   9927         }
   9928      } else {
   9929         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
   9930             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9931             (INSNA(7,4)  == BITS4(1,0,0,1))) {
   9932            regD = INSNA(15,12);
   9933            regN = INSNA(19,16);
   9934            regM = INSNA(3,0);
   9935            if (regD != 15 && regN != 15 && regM != 15)
   9936               gate = True;
   9937         }
   9938      }
   9939 
   9940      if (gate) {
   9941         IRTemp rNt   = newTemp(Ity_I32);
   9942         IRTemp rMt   = newTemp(Ity_I32);
   9943         IRTemp res_q = newTemp(Ity_I32);
   9944 
   9945         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9946         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9947 
   9948         assign(res_q, binop(Iop_QAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
   9949         if (isT)
   9950            putIRegT( regD, mkexpr(res_q), condT );
   9951         else
   9952            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   9953 
   9954         DIP("uqadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   9955         return True;
   9956      }
   9957      /* fall through */
   9958    }
   9959 
   9960    /* ------------------ uqsub8<c> <Rd>,<Rn>,<Rm> ------------------ */
   9961    {
   9962      UInt regD = 99, regN = 99, regM = 99;
   9963      Bool gate = False;
   9964 
   9965      if (isT) {
   9966         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
   9967            regN = INSNT0(3,0);
   9968            regD = INSNT1(11,8);
   9969            regM = INSNT1(3,0);
   9970            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   9971               gate = True;
   9972         }
   9973      } else {
   9974         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
   9975             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   9976             (INSNA(7,4)  == BITS4(1,1,1,1))) {
   9977            regD = INSNA(15,12);
   9978            regN = INSNA(19,16);
   9979            regM = INSNA(3,0);
   9980            if (regD != 15 && regN != 15 && regM != 15)
   9981              gate = True;
   9982         }
   9983      }
   9984 
   9985      if (gate) {
   9986         IRTemp rNt   = newTemp(Ity_I32);
   9987         IRTemp rMt   = newTemp(Ity_I32);
   9988         IRTemp res_q = newTemp(Ity_I32);
   9989 
   9990         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   9991         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   9992 
   9993         assign(res_q, binop(Iop_QSub8Ux4, mkexpr(rNt), mkexpr(rMt)));
   9994         if (isT)
   9995            putIRegT( regD, mkexpr(res_q), condT );
   9996         else
   9997            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   9998 
   9999         DIP("uqsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   10000         return True;
   10001      }
   10002      /* fall through */
   10003    }
   10004 
   10005    /* ----------------- uhadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
   10006    {
   10007      UInt regD = 99, regN = 99, regM = 99;
   10008      Bool gate = False;
   10009 
   10010      if (isT) {
   10011         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
   10012            regN = INSNT0(3,0);
   10013            regD = INSNT1(11,8);
   10014            regM = INSNT1(3,0);
   10015            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10016               gate = True;
   10017         }
   10018      } else {
   10019         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
   10020             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10021             INSNA(7,4)   == BITS4(1,0,0,1)) {
   10022            regD = INSNA(15,12);
   10023            regN = INSNA(19,16);
   10024            regM = INSNA(3,0);
   10025            if (regD != 15 && regN != 15 && regM != 15)
   10026               gate = True;
   10027         }
   10028      }
   10029 
   10030      if (gate) {
   10031         IRTemp rNt   = newTemp(Ity_I32);
   10032         IRTemp rMt   = newTemp(Ity_I32);
   10033         IRTemp res_q = newTemp(Ity_I32);
   10034 
   10035         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   10036         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   10037 
   10038         assign(res_q, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
   10039         if (isT)
   10040            putIRegT( regD, mkexpr(res_q), condT );
   10041         else
   10042            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   10043 
   10044         DIP("uhadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   10045         return True;
   10046      }
   10047      /* fall through */
   10048    }
   10049 
   10050    /* ----------------- uhadd16<c> <Rd>,<Rn>,<Rm> ------------------- */
   10051    {
   10052      UInt regD = 99, regN = 99, regM = 99;
   10053      Bool gate = False;
   10054 
   10055      if (isT) {
   10056         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
   10057            regN = INSNT0(3,0);
   10058            regD = INSNT1(11,8);
   10059            regM = INSNT1(3,0);
   10060            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10061               gate = True;
   10062         }
   10063      } else {
   10064         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
   10065             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10066             INSNA(7,4)   == BITS4(0,0,0,1)) {
   10067            regD = INSNA(15,12);
   10068            regN = INSNA(19,16);
   10069            regM = INSNA(3,0);
   10070            if (regD != 15 && regN != 15 && regM != 15)
   10071               gate = True;
   10072         }
   10073      }
   10074 
   10075      if (gate) {
   10076         IRTemp rNt   = newTemp(Ity_I32);
   10077         IRTemp rMt   = newTemp(Ity_I32);
   10078         IRTemp res_q = newTemp(Ity_I32);
   10079 
   10080         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   10081         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   10082 
   10083         assign(res_q, binop(Iop_HAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
   10084         if (isT)
   10085            putIRegT( regD, mkexpr(res_q), condT );
   10086         else
   10087            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   10088 
   10089         DIP("uhadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   10090         return True;
   10091      }
   10092      /* fall through */
   10093    }
   10094 
   10095    /* ----------------- shadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
   10096    {
   10097      UInt regD = 99, regN = 99, regM = 99;
   10098      Bool gate = False;
   10099 
   10100      if (isT) {
   10101         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
   10102            regN = INSNT0(3,0);
   10103            regD = INSNT1(11,8);
   10104            regM = INSNT1(3,0);
   10105            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10106               gate = True;
   10107         }
   10108      } else {
   10109         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
   10110             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10111             INSNA(7,4)   == BITS4(1,0,0,1)) {
   10112            regD = INSNA(15,12);
   10113            regN = INSNA(19,16);
   10114            regM = INSNA(3,0);
   10115            if (regD != 15 && regN != 15 && regM != 15)
   10116               gate = True;
   10117         }
   10118      }
   10119 
   10120      if (gate) {
   10121         IRTemp rNt   = newTemp(Ity_I32);
   10122         IRTemp rMt   = newTemp(Ity_I32);
   10123         IRTemp res_q = newTemp(Ity_I32);
   10124 
   10125         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   10126         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   10127 
   10128         assign(res_q, binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
   10129         if (isT)
   10130            putIRegT( regD, mkexpr(res_q), condT );
   10131         else
   10132            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   10133 
   10134         DIP("shadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   10135         return True;
   10136      }
   10137      /* fall through */
   10138    }
   10139 
   10140    /* ------------------ qadd16<c> <Rd>,<Rn>,<Rm> ------------------ */
   10141    {
   10142      UInt regD = 99, regN = 99, regM = 99;
   10143      Bool gate = False;
   10144 
   10145      if (isT) {
   10146         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
   10147            regN = INSNT0(3,0);
   10148            regD = INSNT1(11,8);
   10149            regM = INSNT1(3,0);
   10150            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10151               gate = True;
   10152         }
   10153      } else {
   10154         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
   10155             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10156             INSNA(7,4)   == BITS4(0,0,0,1)) {
   10157            regD = INSNA(15,12);
   10158            regN = INSNA(19,16);
   10159            regM = INSNA(3,0);
   10160            if (regD != 15 && regN != 15 && regM != 15)
   10161               gate = True;
   10162         }
   10163      }
   10164 
   10165      if (gate) {
   10166         IRTemp rNt   = newTemp(Ity_I32);
   10167         IRTemp rMt   = newTemp(Ity_I32);
   10168         IRTemp res_q = newTemp(Ity_I32);
   10169 
   10170         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   10171         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   10172 
   10173         assign(res_q, binop(Iop_QAdd16Sx2, mkexpr(rNt), mkexpr(rMt)));
   10174         if (isT)
   10175            putIRegT( regD, mkexpr(res_q), condT );
   10176         else
   10177            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   10178 
   10179         DIP("qadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   10180         return True;
   10181      }
   10182      /* fall through */
   10183    }
   10184 
   10185    /* ------------------ qsub16<c> <Rd>,<Rn>,<Rm> ------------------ */
   10186    {
   10187      UInt regD = 99, regN = 99, regM = 99;
   10188      Bool gate = False;
   10189 
   10190       if (isT) {
   10191         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
   10192            regN = INSNT0(3,0);
   10193            regD = INSNT1(11,8);
   10194            regM = INSNT1(3,0);
   10195            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10196               gate = True;
   10197         }
   10198      } else {
   10199         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
   10200             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10201             INSNA(7,4)   == BITS4(0,1,1,1)) {
   10202            regD = INSNA(15,12);
   10203            regN = INSNA(19,16);
   10204            regM = INSNA(3,0);
   10205            if (regD != 15 && regN != 15 && regM != 15)
   10206              gate = True;
   10207         }
   10208      }
   10209 
   10210      if (gate) {
   10211         IRTemp rNt   = newTemp(Ity_I32);
   10212         IRTemp rMt   = newTemp(Ity_I32);
   10213         IRTemp res_q = newTemp(Ity_I32);
   10214 
   10215         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   10216         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   10217 
   10218         assign(res_q, binop(Iop_QSub16Sx2, mkexpr(rNt), mkexpr(rMt)));
   10219         if (isT)
   10220            putIRegT( regD, mkexpr(res_q), condT );
   10221         else
   10222            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   10223 
   10224         DIP("qsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   10225         return True;
   10226      }
   10227      /* fall through */
   10228    }
   10229 
   10230    /* ------------------- qsax<c> <Rd>,<Rn>,<Rm> ------------------- */
   10231    /* note: the hardware seems to construct the result differently
   10232       from wot the manual says. */
   10233    {
   10234      UInt regD = 99, regN = 99, regM = 99;
   10235      Bool gate = False;
   10236 
   10237      if (isT) {
   10238         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
   10239            regN = INSNT0(3,0);
   10240            regD = INSNT1(11,8);
   10241            regM = INSNT1(3,0);
   10242            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10243               gate = True;
   10244         }
   10245      } else {
   10246         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
   10247             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10248             INSNA(7,4)   == BITS4(0,1,0,1)) {
   10249            regD = INSNA(15,12);
   10250            regN = INSNA(19,16);
   10251            regM = INSNA(3,0);
   10252            if (regD != 15 && regN != 15 && regM != 15)
   10253               gate = True;
   10254         }
   10255      }
   10256 
   10257      if (gate) {
   10258         IRTemp irt_regN     = newTemp(Ity_I32);
   10259         IRTemp irt_regM     = newTemp(Ity_I32);
   10260         IRTemp irt_sum      = newTemp(Ity_I32);
   10261         IRTemp irt_diff     = newTemp(Ity_I32);
   10262         IRTemp irt_sum_res  = newTemp(Ity_I32);
   10263         IRTemp irt_diff_res = newTemp(Ity_I32);
   10264 
   10265         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   10266         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   10267 
   10268         assign( irt_diff,
   10269                 binop( Iop_Sub32,
   10270                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
   10271                        binop( Iop_Sar32,
   10272                               binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
   10273                               mkU8(16) ) ) );
   10274         armSignedSatQ( irt_diff, 0x10, &irt_diff_res, NULL);
   10275 
   10276         assign( irt_sum,
   10277                 binop( Iop_Add32,
   10278                        binop( Iop_Sar32,
   10279                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   10280                               mkU8(16) ),
   10281                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) )) );
   10282         armSignedSatQ( irt_sum, 0x10, &irt_sum_res, NULL );
   10283 
   10284         IRExpr* ire_result = binop( Iop_Or32,
   10285                                     binop( Iop_Shl32, mkexpr(irt_diff_res),
   10286                                            mkU8(16) ),
   10287                                     binop( Iop_And32, mkexpr(irt_sum_res),
   10288                                            mkU32(0xFFFF)) );
   10289 
   10290         if (isT)
   10291            putIRegT( regD, ire_result, condT );
   10292         else
   10293            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10294 
   10295         DIP( "qsax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   10296         return True;
   10297      }
   10298      /* fall through */
   10299    }
   10300 
   10301    /* ------------------- qasx<c> <Rd>,<Rn>,<Rm> ------------------- */
   10302    {
   10303      UInt regD = 99, regN = 99, regM = 99;
   10304      Bool gate = False;
   10305 
   10306      if (isT) {
   10307         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
   10308            regN = INSNT0(3,0);
   10309            regD = INSNT1(11,8);
   10310            regM = INSNT1(3,0);
   10311            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10312               gate = True;
   10313         }
   10314      } else {
   10315         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
   10316             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10317             INSNA(7,4)   == BITS4(0,0,1,1)) {
   10318            regD = INSNA(15,12);
   10319            regN = INSNA(19,16);
   10320            regM = INSNA(3,0);
   10321            if (regD != 15 && regN != 15 && regM != 15)
   10322               gate = True;
   10323         }
   10324      }
   10325 
   10326      if (gate) {
   10327         IRTemp irt_regN     = newTemp(Ity_I32);
   10328         IRTemp irt_regM     = newTemp(Ity_I32);
   10329         IRTemp irt_sum      = newTemp(Ity_I32);
   10330         IRTemp irt_diff     = newTemp(Ity_I32);
   10331         IRTemp irt_res_sum  = newTemp(Ity_I32);
   10332         IRTemp irt_res_diff = newTemp(Ity_I32);
   10333 
   10334         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   10335         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   10336 
   10337         assign( irt_diff,
   10338                 binop( Iop_Sub32,
   10339                        binop( Iop_Sar32,
   10340                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   10341                               mkU8(16) ),
   10342                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
   10343         armSignedSatQ( irt_diff, 0x10, &irt_res_diff, NULL );
   10344 
   10345         assign( irt_sum,
   10346                 binop( Iop_Add32,
   10347                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
   10348                        binop( Iop_Sar32,
   10349                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
   10350                               mkU8(16) ) ) );
   10351         armSignedSatQ( irt_sum, 0x10, &irt_res_sum, NULL );
   10352 
   10353         IRExpr* ire_result
   10354           = binop( Iop_Or32,
   10355                    binop( Iop_Shl32, mkexpr(irt_res_sum), mkU8(16) ),
   10356                    binop( Iop_And32, mkexpr(irt_res_diff), mkU32(0xFFFF) ) );
   10357 
   10358         if (isT)
   10359            putIRegT( regD, ire_result, condT );
   10360         else
   10361            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10362 
   10363         DIP( "qasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   10364         return True;
   10365      }
   10366      /* fall through */
   10367    }
   10368 
   10369    /* ------------------- sasx<c> <Rd>,<Rn>,<Rm> ------------------- */
   10370    {
   10371      UInt regD = 99, regN = 99, regM = 99;
   10372      Bool gate = False;
   10373 
   10374      if (isT) {
   10375         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
   10376            regN = INSNT0(3,0);
   10377            regD = INSNT1(11,8);
   10378            regM = INSNT1(3,0);
   10379            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10380               gate = True;
   10381         }
   10382      } else {
   10383         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
   10384             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10385             INSNA(7,4)   == BITS4(0,0,1,1)) {
   10386            regD = INSNA(15,12);
   10387            regN = INSNA(19,16);
   10388            regM = INSNA(3,0);
   10389            if (regD != 15 && regN != 15 && regM != 15)
   10390               gate = True;
   10391         }
   10392      }
   10393 
   10394      if (gate) {
   10395         IRTemp irt_regN = newTemp(Ity_I32);
   10396         IRTemp irt_regM = newTemp(Ity_I32);
   10397         IRTemp irt_sum  = newTemp(Ity_I32);
   10398         IRTemp irt_diff = newTemp(Ity_I32);
   10399 
   10400         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   10401         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   10402 
   10403         assign( irt_diff,
   10404                 binop( Iop_Sub32,
   10405                        binop( Iop_Sar32,
   10406                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   10407                               mkU8(16) ),
   10408                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
   10409 
   10410         assign( irt_sum,
   10411                 binop( Iop_Add32,
   10412                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
   10413                        binop( Iop_Sar32,
   10414                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
   10415                               mkU8(16) ) ) );
   10416 
   10417         IRExpr* ire_result
   10418           = binop( Iop_Or32,
   10419                    binop( Iop_Shl32, mkexpr(irt_sum), mkU8(16) ),
   10420                    binop( Iop_And32, mkexpr(irt_diff), mkU32(0xFFFF) ) );
   10421 
   10422         IRTemp ge10 = newTemp(Ity_I32);
   10423         assign(ge10, unop(Iop_Not32, mkexpr(irt_diff)));
   10424         put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
   10425         put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
   10426 
   10427         IRTemp ge32 = newTemp(Ity_I32);
   10428         assign(ge32, unop(Iop_Not32, mkexpr(irt_sum)));
   10429         put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
   10430         put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
   10431 
   10432         if (isT)
   10433            putIRegT( regD, ire_result, condT );
   10434         else
   10435            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10436 
   10437         DIP( "sasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   10438         return True;
   10439      }
   10440      /* fall through */
   10441    }
   10442 
   10443    /* --------------- smuad, smuadx<c><Rd>,<Rn>,<Rm> --------------- */
   10444    /* --------------- smsad, smsadx<c><Rd>,<Rn>,<Rm> --------------- */
   10445    {
   10446      UInt regD = 99, regN = 99, regM = 99, bitM = 99;
   10447      Bool gate = False, isAD = False;
   10448 
   10449      if (isT) {
   10450         if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
   10451             && (INSNT1(15,0) & 0xF0E0) == 0xF000) {
   10452            regN = INSNT0(3,0);
   10453            regD = INSNT1(11,8);
   10454            regM = INSNT1(3,0);
   10455            bitM = INSNT1(4,4);
   10456            isAD = INSNT0(15,4) == 0xFB2;
   10457            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10458               gate = True;
   10459         }
   10460      } else {
   10461         if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
   10462             INSNA(15,12) == BITS4(1,1,1,1)         &&
   10463             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1) ) {
   10464            regD = INSNA(19,16);
   10465            regN = INSNA(3,0);
   10466            regM = INSNA(11,8);
   10467            bitM = INSNA(5,5);
   10468            isAD = INSNA(6,6) == 0;
   10469            if (regD != 15 && regN != 15 && regM != 15)
   10470               gate = True;
   10471         }
   10472      }
   10473 
   10474      if (gate) {
   10475         IRTemp irt_regN    = newTemp(Ity_I32);
   10476         IRTemp irt_regM    = newTemp(Ity_I32);
   10477         IRTemp irt_prod_lo = newTemp(Ity_I32);
   10478         IRTemp irt_prod_hi = newTemp(Ity_I32);
   10479         IRTemp tmpM        = newTemp(Ity_I32);
   10480 
   10481         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   10482 
   10483         assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
   10484         assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
   10485 
   10486         assign( irt_prod_lo,
   10487                 binop( Iop_Mul32,
   10488                        binop( Iop_Sar32,
   10489                               binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
   10490                               mkU8(16) ),
   10491                        binop( Iop_Sar32,
   10492                               binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
   10493                               mkU8(16) ) ) );
   10494         assign( irt_prod_hi, binop(Iop_Mul32,
   10495                                    binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)),
   10496                                    binop(Iop_Sar32, mkexpr(irt_regM), mkU8(16))) );
   10497         IRExpr* ire_result
   10498            = binop( isAD ? Iop_Add32 : Iop_Sub32,
   10499                     mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) );
   10500 
   10501         if (isT)
   10502            putIRegT( regD, ire_result, condT );
   10503         else
   10504            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10505 
   10506         if (isAD) {
   10507            or_into_QFLAG32(
   10508               signed_overflow_after_Add32( ire_result,
   10509                                            irt_prod_lo, irt_prod_hi ),
   10510               condT
   10511            );
   10512         }
   10513 
   10514         DIP("smu%cd%s%s r%u, r%u, r%u\n",
   10515             isAD ? 'a' : 's',
   10516             bitM ? "x" : "", nCC(conq), regD, regN, regM);
   10517         return True;
   10518      }
   10519      /* fall through */
   10520    }
   10521 
   10522    /* --------------- smlad{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
   10523    /* --------------- smlsd{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
   10524    {
   10525      UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
   10526      Bool gate = False, isAD = False;
   10527 
   10528      if (isT) {
   10529        if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
   10530            && INSNT1(7,5) == BITS3(0,0,0)) {
   10531            regN = INSNT0(3,0);
   10532            regD = INSNT1(11,8);
   10533            regM = INSNT1(3,0);
   10534            regA = INSNT1(15,12);
   10535            bitM = INSNT1(4,4);
   10536            isAD = INSNT0(15,4) == 0xFB2;
   10537            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
   10538                && !isBadRegT(regA))
   10539               gate = True;
   10540         }
   10541      } else {
   10542         if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
   10543             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
   10544            regD = INSNA(19,16);
   10545            regA = INSNA(15,12);
   10546            regN = INSNA(3,0);
   10547            regM = INSNA(11,8);
   10548            bitM = INSNA(5,5);
   10549            isAD = INSNA(6,6) == 0;
   10550            if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
   10551               gate = True;
   10552         }
   10553      }
   10554 
   10555      if (gate) {
   10556         IRTemp irt_regN    = newTemp(Ity_I32);
   10557         IRTemp irt_regM    = newTemp(Ity_I32);
   10558         IRTemp irt_regA    = newTemp(Ity_I32);
   10559         IRTemp irt_prod_lo = newTemp(Ity_I32);
   10560         IRTemp irt_prod_hi = newTemp(Ity_I32);
   10561         IRTemp irt_sum     = newTemp(Ity_I32);
   10562         IRTemp tmpM        = newTemp(Ity_I32);
   10563 
   10564         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   10565         assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
   10566 
   10567         assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
   10568         assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
   10569 
   10570         assign( irt_prod_lo,
   10571                 binop(Iop_Mul32,
   10572                       binop(Iop_Sar32,
   10573                             binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   10574                             mkU8(16)),
   10575                       binop(Iop_Sar32,
   10576                             binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
   10577                             mkU8(16))) );
   10578         assign( irt_prod_hi,
   10579                 binop( Iop_Mul32,
   10580                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
   10581                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
   10582         assign( irt_sum, binop( isAD ? Iop_Add32 : Iop_Sub32,
   10583                                 mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) ) );
   10584 
   10585         IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_sum), mkexpr(irt_regA));
   10586 
   10587         if (isT)
   10588            putIRegT( regD, ire_result, condT );
   10589         else
   10590            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10591 
   10592         if (isAD) {
   10593            or_into_QFLAG32(
   10594               signed_overflow_after_Add32( mkexpr(irt_sum),
   10595                                            irt_prod_lo, irt_prod_hi ),
   10596               condT
   10597            );
   10598         }
   10599 
   10600         or_into_QFLAG32(
   10601            signed_overflow_after_Add32( ire_result, irt_sum, irt_regA ),
   10602            condT
   10603         );
   10604 
   10605         DIP("sml%cd%s%s r%u, r%u, r%u, r%u\n",
   10606             isAD ? 'a' : 's',
   10607             bitM ? "x" : "", nCC(conq), regD, regN, regM, regA);
   10608         return True;
   10609      }
   10610      /* fall through */
   10611    }
   10612 
   10613    /* ----- smlabb, smlabt, smlatb, smlatt <Rd>,<Rn>,<Rm>,<Ra> ----- */
   10614    {
   10615      UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99, bitN = 99;
   10616      Bool gate = False;
   10617 
   10618      if (isT) {
   10619         if (INSNT0(15,4) == 0xFB1 && INSNT1(7,6) == BITS2(0,0)) {
   10620            regN = INSNT0(3,0);
   10621            regD = INSNT1(11,8);
   10622            regM = INSNT1(3,0);
   10623            regA = INSNT1(15,12);
   10624            bitM = INSNT1(4,4);
   10625            bitN = INSNT1(5,5);
   10626            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
   10627                && !isBadRegT(regA))
   10628               gate = True;
   10629         }
   10630      } else {
   10631         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
   10632             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(1,0,0,0)) {
   10633            regD = INSNA(19,16);
   10634            regN = INSNA(3,0);
   10635            regM = INSNA(11,8);
   10636            regA = INSNA(15,12);
   10637            bitM = INSNA(6,6);
   10638            bitN = INSNA(5,5);
   10639            if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
   10640               gate = True;
   10641         }
   10642      }
   10643 
   10644      if (gate) {
   10645         IRTemp irt_regA = newTemp(Ity_I32);
   10646         IRTemp irt_prod = newTemp(Ity_I32);
   10647 
   10648         assign( irt_prod,
   10649                 binop(Iop_Mul32,
   10650                       binop(Iop_Sar32,
   10651                             binop(Iop_Shl32,
   10652                                   isT ? getIRegT(regN) : getIRegA(regN),
   10653                                   mkU8(bitN ? 0 : 16)),
   10654                             mkU8(16)),
   10655                       binop(Iop_Sar32,
   10656                             binop(Iop_Shl32,
   10657                                   isT ? getIRegT(regM) : getIRegA(regM),
   10658                                   mkU8(bitM ? 0 : 16)),
   10659                             mkU8(16))) );
   10660 
   10661         assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
   10662 
   10663         IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_prod), mkexpr(irt_regA));
   10664 
   10665         if (isT)
   10666            putIRegT( regD, ire_result, condT );
   10667         else
   10668            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10669 
   10670         or_into_QFLAG32(
   10671            signed_overflow_after_Add32( ire_result, irt_prod, irt_regA ),
   10672            condT
   10673         );
   10674 
   10675         DIP( "smla%c%c%s r%u, r%u, r%u, r%u\n",
   10676              bitN ? 't' : 'b', bitM ? 't' : 'b',
   10677              nCC(conq), regD, regN, regM, regA );
   10678         return True;
   10679      }
   10680      /* fall through */
   10681    }
   10682 
   10683    /* ----- smlalbb, smlalbt, smlaltb, smlaltt <Rd>,<Rn>,<Rm>,<Ra> ----- */
   10684    {
   10685      UInt regDHi = 99, regN = 99, regM = 99, regDLo = 99, bitM = 99, bitN = 99;
   10686      Bool gate = False;
   10687 
   10688      if (isT) {
   10689         if (INSNT0(15,4) == 0xFBC && INSNT1(7,6) == BITS2(1,0)) {
   10690            regN   = INSNT0(3,0);
   10691            regDHi = INSNT1(11,8);
   10692            regM   = INSNT1(3,0);
   10693            regDLo = INSNT1(15,12);
   10694            bitM   = INSNT1(4,4);
   10695            bitN   = INSNT1(5,5);
   10696            if (!isBadRegT(regDHi) && !isBadRegT(regN) && !isBadRegT(regM)
   10697                && !isBadRegT(regDLo) && regDHi != regDLo)
   10698               gate = True;
   10699         }
   10700      } else {
   10701         if (INSNA(27,20) == BITS8(0,0,0,1,0,1,0,0) &&
   10702             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(1,0,0,0)) {
   10703            regDHi = INSNA(19,16);
   10704            regN   = INSNA(3,0);
   10705            regM   = INSNA(11,8);
   10706            regDLo = INSNA(15,12);
   10707            bitM   = INSNA(6,6);
   10708            bitN   = INSNA(5,5);
   10709            if (regDHi != 15 && regN != 15 && regM != 15 && regDLo != 15 &&
   10710                regDHi != regDLo)
   10711               gate = True;
   10712         }
   10713      }
   10714 
   10715      if (gate) {
   10716         IRTemp irt_regD  = newTemp(Ity_I64);
   10717         IRTemp irt_prod  = newTemp(Ity_I64);
   10718         IRTemp irt_res   = newTemp(Ity_I64);
   10719         IRTemp irt_resHi = newTemp(Ity_I32);
   10720         IRTemp irt_resLo = newTemp(Ity_I32);
   10721 
   10722         assign( irt_prod,
   10723                 binop(Iop_MullS32,
   10724                       binop(Iop_Sar32,
   10725                             binop(Iop_Shl32,
   10726                                   isT ? getIRegT(regN) : getIRegA(regN),
   10727                                   mkU8(bitN ? 0 : 16)),
   10728                             mkU8(16)),
   10729                       binop(Iop_Sar32,
   10730                             binop(Iop_Shl32,
   10731                                   isT ? getIRegT(regM) : getIRegA(regM),
   10732                                   mkU8(bitM ? 0 : 16)),
   10733                             mkU8(16))) );
   10734 
   10735         assign( irt_regD, binop(Iop_32HLto64,
   10736                                 isT ? getIRegT(regDHi) : getIRegA(regDHi),
   10737                                 isT ? getIRegT(regDLo) : getIRegA(regDLo)) );
   10738         assign( irt_res, binop(Iop_Add64, mkexpr(irt_regD), mkexpr(irt_prod)) );
   10739         assign( irt_resHi, unop(Iop_64HIto32, mkexpr(irt_res)) );
   10740         assign( irt_resLo, unop(Iop_64to32, mkexpr(irt_res)) );
   10741 
   10742         if (isT) {
   10743            putIRegT( regDHi, mkexpr(irt_resHi), condT );
   10744            putIRegT( regDLo, mkexpr(irt_resLo), condT );
   10745         } else {
   10746            putIRegA( regDHi, mkexpr(irt_resHi), condT, Ijk_Boring );
   10747            putIRegA( regDLo, mkexpr(irt_resLo), condT, Ijk_Boring );
   10748         }
   10749 
   10750         DIP( "smlal%c%c%s r%u, r%u, r%u, r%u\n",
   10751              bitN ? 't' : 'b', bitM ? 't' : 'b',
   10752              nCC(conq), regDHi, regN, regM, regDLo );
   10753         return True;
   10754      }
   10755      /* fall through */
   10756    }
   10757 
   10758    /* ----- smlawb, smlawt <Rd>,<Rn>,<Rm>,<Ra> ----- */
   10759    {
   10760      UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
   10761      Bool gate = False;
   10762 
   10763      if (isT) {
   10764         if (INSNT0(15,4) == 0xFB3 && INSNT1(7,5) == BITS3(0,0,0)) {
   10765            regN = INSNT0(3,0);
   10766            regD = INSNT1(11,8);
   10767            regM = INSNT1(3,0);
   10768            regA = INSNT1(15,12);
   10769            bitM = INSNT1(4,4);
   10770            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
   10771                && !isBadRegT(regA))
   10772               gate = True;
   10773         }
   10774      } else {
   10775         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
   10776             (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,0,0)) {
   10777            regD = INSNA(19,16);
   10778            regN = INSNA(3,0);
   10779            regM = INSNA(11,8);
   10780            regA = INSNA(15,12);
   10781            bitM = INSNA(6,6);
   10782            if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
   10783               gate = True;
   10784         }
   10785      }
   10786 
   10787      if (gate) {
   10788         IRTemp irt_regA = newTemp(Ity_I32);
   10789         IRTemp irt_prod = newTemp(Ity_I64);
   10790 
   10791         assign( irt_prod,
   10792                 binop(Iop_MullS32,
   10793                       isT ? getIRegT(regN) : getIRegA(regN),
   10794                       binop(Iop_Sar32,
   10795                             binop(Iop_Shl32,
   10796                                   isT ? getIRegT(regM) : getIRegA(regM),
   10797                                   mkU8(bitM ? 0 : 16)),
   10798                             mkU8(16))) );
   10799 
   10800         assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
   10801 
   10802         IRTemp prod32 = newTemp(Ity_I32);
   10803         assign(prod32,
   10804                binop(Iop_Or32,
   10805                      binop(Iop_Shl32, unop(Iop_64HIto32, mkexpr(irt_prod)), mkU8(16)),
   10806                      binop(Iop_Shr32, unop(Iop_64to32, mkexpr(irt_prod)), mkU8(16))
   10807         ));
   10808 
   10809         IRExpr* ire_result = binop(Iop_Add32, mkexpr(prod32), mkexpr(irt_regA));
   10810 
   10811         if (isT)
   10812            putIRegT( regD, ire_result, condT );
   10813         else
   10814            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10815 
   10816         or_into_QFLAG32(
   10817            signed_overflow_after_Add32( ire_result, prod32, irt_regA ),
   10818            condT
   10819         );
   10820 
   10821         DIP( "smlaw%c%s r%u, r%u, r%u, r%u\n",
   10822              bitM ? 't' : 'b',
   10823              nCC(conq), regD, regN, regM, regA );
   10824         return True;
   10825      }
   10826      /* fall through */
   10827    }
   10828 
   10829    /* ------------------- sel<c> <Rd>,<Rn>,<Rm> -------------------- */
   10830    /* fixme: fix up the test in v6media.c so that we can pass the ge
   10831       flags as part of the test. */
   10832    {
   10833      UInt regD = 99, regN = 99, regM = 99;
   10834      Bool gate = False;
   10835 
   10836      if (isT) {
   10837         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
   10838            regN = INSNT0(3,0);
   10839            regD = INSNT1(11,8);
   10840            regM = INSNT1(3,0);
   10841            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10842               gate = True;
   10843         }
   10844      } else {
   10845         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
   10846             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   10847             INSNA(7,4)   == BITS4(1,0,1,1)) {
   10848            regD = INSNA(15,12);
   10849            regN = INSNA(19,16);
   10850            regM = INSNA(3,0);
   10851            if (regD != 15 && regN != 15 && regM != 15)
   10852               gate = True;
   10853         }
   10854      }
   10855 
   10856      if (gate) {
   10857         IRTemp irt_ge_flag0 = newTemp(Ity_I32);
   10858         IRTemp irt_ge_flag1 = newTemp(Ity_I32);
   10859         IRTemp irt_ge_flag2 = newTemp(Ity_I32);
   10860         IRTemp irt_ge_flag3 = newTemp(Ity_I32);
   10861 
   10862         assign( irt_ge_flag0, get_GEFLAG32(0) );
   10863         assign( irt_ge_flag1, get_GEFLAG32(1) );
   10864         assign( irt_ge_flag2, get_GEFLAG32(2) );
   10865         assign( irt_ge_flag3, get_GEFLAG32(3) );
   10866 
   10867         IRExpr* ire_ge_flag0_or
   10868           = binop(Iop_Or32, mkexpr(irt_ge_flag0),
   10869                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag0)));
   10870         IRExpr* ire_ge_flag1_or
   10871           = binop(Iop_Or32, mkexpr(irt_ge_flag1),
   10872                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag1)));
   10873         IRExpr* ire_ge_flag2_or
   10874           = binop(Iop_Or32, mkexpr(irt_ge_flag2),
   10875                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag2)));
   10876         IRExpr* ire_ge_flag3_or
   10877           = binop(Iop_Or32, mkexpr(irt_ge_flag3),
   10878                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag3)));
   10879 
   10880         IRExpr* ire_ge_flags
   10881           = binop( Iop_Or32,
   10882                    binop(Iop_Or32,
   10883                          binop(Iop_And32,
   10884                                binop(Iop_Sar32, ire_ge_flag0_or, mkU8(31)),
   10885                                mkU32(0x000000ff)),
   10886                          binop(Iop_And32,
   10887                                binop(Iop_Sar32, ire_ge_flag1_or, mkU8(31)),
   10888                                mkU32(0x0000ff00))),
   10889                    binop(Iop_Or32,
   10890                          binop(Iop_And32,
   10891                                binop(Iop_Sar32, ire_ge_flag2_or, mkU8(31)),
   10892                                mkU32(0x00ff0000)),
   10893                          binop(Iop_And32,
   10894                                binop(Iop_Sar32, ire_ge_flag3_or, mkU8(31)),
   10895                                mkU32(0xff000000))) );
   10896 
   10897         IRExpr* ire_result
   10898           = binop(Iop_Or32,
   10899                   binop(Iop_And32,
   10900                         isT ? getIRegT(regN) : getIRegA(regN),
   10901                         ire_ge_flags ),
   10902                   binop(Iop_And32,
   10903                         isT ? getIRegT(regM) : getIRegA(regM),
   10904                         unop(Iop_Not32, ire_ge_flags)));
   10905 
   10906         if (isT)
   10907            putIRegT( regD, ire_result, condT );
   10908         else
   10909            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10910 
   10911         DIP("sel%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   10912         return True;
   10913      }
   10914      /* fall through */
   10915    }
   10916 
   10917    /* ----------------- uxtab16<c> Rd,Rn,Rm{,rot} ------------------ */
   10918    {
   10919      UInt regD = 99, regN = 99, regM = 99, rotate = 99;
   10920      Bool gate = False;
   10921 
   10922      if (isT) {
   10923         if (INSNT0(15,4) == 0xFA3 && (INSNT1(15,0) & 0xF0C0) == 0xF080) {
   10924            regN   = INSNT0(3,0);
   10925            regD   = INSNT1(11,8);
   10926            regM   = INSNT1(3,0);
   10927            rotate = INSNT1(5,4);
   10928            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   10929               gate = True;
   10930         }
   10931      } else {
   10932         if (INSNA(27,20) == BITS8(0,1,1,0,1,1,0,0) &&
   10933             INSNA(9,4)   == BITS6(0,0,0,1,1,1) ) {
   10934            regD   = INSNA(15,12);
   10935            regN   = INSNA(19,16);
   10936            regM   = INSNA(3,0);
   10937            rotate = INSNA(11,10);
   10938            if (regD != 15 && regN != 15 && regM != 15)
   10939              gate = True;
   10940         }
   10941      }
   10942 
   10943      if (gate) {
   10944         IRTemp irt_regN = newTemp(Ity_I32);
   10945         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   10946 
   10947         IRTemp irt_regM = newTemp(Ity_I32);
   10948         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   10949 
   10950         IRTemp irt_rot = newTemp(Ity_I32);
   10951         assign( irt_rot, binop(Iop_And32,
   10952                                genROR32(irt_regM, 8 * rotate),
   10953                                mkU32(0x00FF00FF)) );
   10954 
   10955         IRExpr* resLo
   10956            = binop(Iop_And32,
   10957                    binop(Iop_Add32, mkexpr(irt_regN), mkexpr(irt_rot)),
   10958                    mkU32(0x0000FFFF));
   10959 
   10960         IRExpr* resHi
   10961            = binop(Iop_Add32,
   10962                    binop(Iop_And32, mkexpr(irt_regN), mkU32(0xFFFF0000)),
   10963                    binop(Iop_And32, mkexpr(irt_rot),  mkU32(0xFFFF0000)));
   10964 
   10965         IRExpr* ire_result
   10966            = binop( Iop_Or32, resHi, resLo );
   10967 
   10968         if (isT)
   10969            putIRegT( regD, ire_result, condT );
   10970         else
   10971            putIRegA( regD, ire_result, condT, Ijk_Boring );
   10972 
   10973         DIP( "uxtab16%s r%u, r%u, r%u, ROR #%u\n",
   10974              nCC(conq), regD, regN, regM, 8 * rotate );
   10975         return True;
   10976      }
   10977      /* fall through */
   10978    }
   10979 
   10980    /* --------------- usad8  Rd,Rn,Rm    ---------------- */
   10981    /* --------------- usada8 Rd,Rn,Rm,Ra ---------------- */
   10982    {
   10983      UInt rD = 99, rN = 99, rM = 99, rA = 99;
   10984      Bool gate = False;
   10985 
   10986      if (isT) {
   10987        if (INSNT0(15,4) == 0xFB7 && INSNT1(7,4) == BITS4(0,0,0,0)) {
   10988            rN = INSNT0(3,0);
   10989            rA = INSNT1(15,12);
   10990            rD = INSNT1(11,8);
   10991            rM = INSNT1(3,0);
   10992            if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && rA != 13)
   10993               gate = True;
   10994         }
   10995      } else {
   10996         if (INSNA(27,20) == BITS8(0,1,1,1,1,0,0,0) &&
   10997             INSNA(7,4)   == BITS4(0,0,0,1) ) {
   10998            rD = INSNA(19,16);
   10999            rA = INSNA(15,12);
   11000            rM = INSNA(11,8);
   11001            rN = INSNA(3,0);
   11002            if (rD != 15 && rN != 15 && rM != 15 /* but rA can be 15 */)
   11003               gate = True;
   11004         }
   11005      }
   11006      /* We allow rA == 15, to denote the usad8 (no accumulator) case. */
   11007 
   11008      if (gate) {
   11009         IRExpr* rNe = isT ? getIRegT(rN) : getIRegA(rN);
   11010         IRExpr* rMe = isT ? getIRegT(rM) : getIRegA(rM);
   11011         IRExpr* rAe = rA == 15 ? mkU32(0)
   11012                                : (isT ? getIRegT(rA) : getIRegA(rA));
   11013         IRExpr* res = binop(Iop_Add32,
   11014                             binop(Iop_Sad8Ux4, rNe, rMe),
   11015                             rAe);
   11016         if (isT)
   11017            putIRegT( rD, res, condT );
   11018         else
   11019            putIRegA( rD, res, condT, Ijk_Boring );
   11020 
   11021         if (rA == 15) {
   11022            DIP( "usad8%s r%u, r%u, r%u\n",
   11023                 nCC(conq), rD, rN, rM );
   11024         } else {
   11025            DIP( "usada8%s r%u, r%u, r%u, r%u\n",
   11026                 nCC(conq), rD, rN, rM, rA );
   11027         }
   11028         return True;
   11029      }
   11030      /* fall through */
   11031    }
   11032 
   11033    /* ------------------ qadd<c> <Rd>,<Rn>,<Rm> ------------------- */
   11034    {
   11035      UInt regD = 99, regN = 99, regM = 99;
   11036      Bool gate = False;
   11037 
   11038      if (isT) {
   11039         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
   11040            regN = INSNT0(3,0);
   11041            regD = INSNT1(11,8);
   11042            regM = INSNT1(3,0);
   11043            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11044               gate = True;
   11045         }
   11046      } else {
   11047         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
   11048             INSNA(11,8)  == BITS4(0,0,0,0)         &&
   11049             INSNA(7,4)   == BITS4(0,1,0,1)) {
   11050            regD = INSNA(15,12);
   11051            regN = INSNA(19,16);
   11052            regM = INSNA(3,0);
   11053            if (regD != 15 && regN != 15 && regM != 15)
   11054               gate = True;
   11055         }
   11056      }
   11057 
   11058      if (gate) {
   11059         IRTemp rNt   = newTemp(Ity_I32);
   11060         IRTemp rMt   = newTemp(Ity_I32);
   11061         IRTemp res_q = newTemp(Ity_I32);
   11062 
   11063         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11064         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11065 
   11066         assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rNt)));
   11067         if (isT)
   11068            putIRegT( regD, mkexpr(res_q), condT );
   11069         else
   11070            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11071 
   11072         or_into_QFLAG32(
   11073            signed_overflow_after_Add32(
   11074               binop(Iop_Add32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
   11075            condT
   11076         );
   11077 
   11078         DIP("qadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
   11079         return True;
   11080      }
   11081      /* fall through */
   11082    }
   11083 
   11084    /* ------------------ qdadd<c> <Rd>,<Rm>,<Rn> ------------------- */
   11085    {
   11086      UInt regD = 99, regN = 99, regM = 99;
   11087      Bool gate = False;
   11088 
   11089      if (isT) {
   11090         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF090) {
   11091            regN = INSNT0(3,0);
   11092            regD = INSNT1(11,8);
   11093            regM = INSNT1(3,0);
   11094            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11095               gate = True;
   11096         }
   11097      } else {
   11098         if (INSNA(27,20) == BITS8(0,0,0,1,0,1,0,0) &&
   11099             INSNA(11,8)  == BITS4(0,0,0,0)         &&
   11100             INSNA(7,4)   == BITS4(0,1,0,1)) {
   11101            regD = INSNA(15,12);
   11102            regN = INSNA(19,16);
   11103            regM = INSNA(3,0);
   11104            if (regD != 15 && regN != 15 && regM != 15)
   11105               gate = True;
   11106         }
   11107      }
   11108 
   11109      if (gate) {
   11110         IRTemp rNt   = newTemp(Ity_I32);
   11111         IRTemp rMt   = newTemp(Ity_I32);
   11112         IRTemp rN_d  = newTemp(Ity_I32);
   11113         IRTemp res_q = newTemp(Ity_I32);
   11114 
   11115         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11116         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11117 
   11118         or_into_QFLAG32(
   11119            signed_overflow_after_Add32(
   11120               binop(Iop_Add32, mkexpr(rNt), mkexpr(rNt)), rNt, rNt),
   11121            condT
   11122         );
   11123 
   11124         assign(rN_d,  binop(Iop_QAdd32S, mkexpr(rNt), mkexpr(rNt)));
   11125         assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rN_d)));
   11126         if (isT)
   11127            putIRegT( regD, mkexpr(res_q), condT );
   11128         else
   11129            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11130 
   11131         or_into_QFLAG32(
   11132            signed_overflow_after_Add32(
   11133               binop(Iop_Add32, mkexpr(rMt), mkexpr(rN_d)), rMt, rN_d),
   11134            condT
   11135         );
   11136 
   11137         DIP("qdadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
   11138         return True;
   11139      }
   11140      /* fall through */
   11141    }
   11142 
   11143    /* ------------------ qsub<c> <Rd>,<Rn>,<Rm> ------------------- */
   11144    {
   11145      UInt regD = 99, regN = 99, regM = 99;
   11146      Bool gate = False;
   11147 
   11148      if (isT) {
   11149         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0A0) {
   11150            regN = INSNT0(3,0);
   11151            regD = INSNT1(11,8);
   11152            regM = INSNT1(3,0);
   11153            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11154               gate = True;
   11155         }
   11156      } else {
   11157         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
   11158             INSNA(11,8)  == BITS4(0,0,0,0)         &&
   11159             INSNA(7,4)   == BITS4(0,1,0,1)) {
   11160            regD = INSNA(15,12);
   11161            regN = INSNA(19,16);
   11162            regM = INSNA(3,0);
   11163            if (regD != 15 && regN != 15 && regM != 15)
   11164               gate = True;
   11165         }
   11166      }
   11167 
   11168      if (gate) {
   11169         IRTemp rNt   = newTemp(Ity_I32);
   11170         IRTemp rMt   = newTemp(Ity_I32);
   11171         IRTemp res_q = newTemp(Ity_I32);
   11172 
   11173         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11174         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11175 
   11176         assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rNt)));
   11177         if (isT)
   11178            putIRegT( regD, mkexpr(res_q), condT );
   11179         else
   11180            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11181 
   11182         or_into_QFLAG32(
   11183            signed_overflow_after_Sub32(
   11184               binop(Iop_Sub32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
   11185            condT
   11186         );
   11187 
   11188         DIP("qsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
   11189         return True;
   11190      }
   11191      /* fall through */
   11192    }
   11193 
   11194    /* ------------------ qdsub<c> <Rd>,<Rm>,<Rn> ------------------- */
   11195    {
   11196      UInt regD = 99, regN = 99, regM = 99;
   11197      Bool gate = False;
   11198 
   11199      if (isT) {
   11200         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0B0) {
   11201            regN = INSNT0(3,0);
   11202            regD = INSNT1(11,8);
   11203            regM = INSNT1(3,0);
   11204            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11205               gate = True;
   11206         }
   11207      } else {
   11208         if (INSNA(27,20) == BITS8(0,0,0,1,0,1,1,0) &&
   11209             INSNA(11,8)  == BITS4(0,0,0,0)         &&
   11210             INSNA(7,4)   == BITS4(0,1,0,1)) {
   11211            regD = INSNA(15,12);
   11212            regN = INSNA(19,16);
   11213            regM = INSNA(3,0);
   11214            if (regD != 15 && regN != 15 && regM != 15)
   11215               gate = True;
   11216         }
   11217      }
   11218 
   11219      if (gate) {
   11220         IRTemp rNt   = newTemp(Ity_I32);
   11221         IRTemp rMt   = newTemp(Ity_I32);
   11222         IRTemp rN_d  = newTemp(Ity_I32);
   11223         IRTemp res_q = newTemp(Ity_I32);
   11224 
   11225         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11226         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11227 
   11228         or_into_QFLAG32(
   11229            signed_overflow_after_Add32(
   11230               binop(Iop_Add32, mkexpr(rNt), mkexpr(rNt)), rNt, rNt),
   11231            condT
   11232         );
   11233 
   11234         assign(rN_d,  binop(Iop_QAdd32S, mkexpr(rNt), mkexpr(rNt)));
   11235         assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rN_d)));
   11236         if (isT)
   11237            putIRegT( regD, mkexpr(res_q), condT );
   11238         else
   11239            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11240 
   11241         or_into_QFLAG32(
   11242            signed_overflow_after_Sub32(
   11243               binop(Iop_Sub32, mkexpr(rMt), mkexpr(rN_d)), rMt, rN_d),
   11244            condT
   11245         );
   11246 
   11247         DIP("qdsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
   11248         return True;
   11249      }
   11250      /* fall through */
   11251    }
   11252 
   11253    /* ------------------ uqsub16<c> <Rd>,<Rn>,<Rm> ------------------ */
   11254    {
   11255      UInt regD = 99, regN = 99, regM = 99;
   11256      Bool gate = False;
   11257 
   11258      if (isT) {
   11259         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
   11260            regN = INSNT0(3,0);
   11261            regD = INSNT1(11,8);
   11262            regM = INSNT1(3,0);
   11263            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11264               gate = True;
   11265         }
   11266      } else {
   11267         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
   11268             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11269             INSNA(7,4)   == BITS4(0,1,1,1)) {
   11270            regD = INSNA(15,12);
   11271            regN = INSNA(19,16);
   11272            regM = INSNA(3,0);
   11273            if (regD != 15 && regN != 15 && regM != 15)
   11274              gate = True;
   11275         }
   11276      }
   11277 
   11278      if (gate) {
   11279         IRTemp rNt   = newTemp(Ity_I32);
   11280         IRTemp rMt   = newTemp(Ity_I32);
   11281         IRTemp res_q = newTemp(Ity_I32);
   11282 
   11283         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11284         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11285 
   11286         assign(res_q, binop(Iop_QSub16Ux2, mkexpr(rNt), mkexpr(rMt)));
   11287         if (isT)
   11288            putIRegT( regD, mkexpr(res_q), condT );
   11289         else
   11290            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11291 
   11292         DIP("uqsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   11293         return True;
   11294      }
   11295      /* fall through */
   11296    }
   11297 
   11298    /* ----------------- shadd16<c> <Rd>,<Rn>,<Rm> ------------------- */
   11299    {
   11300      UInt regD = 99, regN = 99, regM = 99;
   11301      Bool gate = False;
   11302 
   11303      if (isT) {
   11304         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
   11305            regN = INSNT0(3,0);
   11306            regD = INSNT1(11,8);
   11307            regM = INSNT1(3,0);
   11308            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11309               gate = True;
   11310         }
   11311      } else {
   11312         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
   11313             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11314             INSNA(7,4)   == BITS4(0,0,0,1)) {
   11315            regD = INSNA(15,12);
   11316            regN = INSNA(19,16);
   11317            regM = INSNA(3,0);
   11318            if (regD != 15 && regN != 15 && regM != 15)
   11319               gate = True;
   11320         }
   11321      }
   11322 
   11323      if (gate) {
   11324         IRTemp rNt   = newTemp(Ity_I32);
   11325         IRTemp rMt   = newTemp(Ity_I32);
   11326         IRTemp res_q = newTemp(Ity_I32);
   11327 
   11328         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11329         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11330 
   11331         assign(res_q, binop(Iop_HAdd16Sx2, mkexpr(rNt), mkexpr(rMt)));
   11332         if (isT)
   11333            putIRegT( regD, mkexpr(res_q), condT );
   11334         else
   11335            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11336 
   11337         DIP("shadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   11338         return True;
   11339      }
   11340      /* fall through */
   11341    }
   11342 
   11343    /* ----------------- uhsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
   11344    {
   11345      UInt regD = 99, regN = 99, regM = 99;
   11346      Bool gate = False;
   11347 
   11348      if (isT) {
   11349         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
   11350            regN = INSNT0(3,0);
   11351            regD = INSNT1(11,8);
   11352            regM = INSNT1(3,0);
   11353            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11354               gate = True;
   11355         }
   11356      } else {
   11357         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
   11358             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11359             INSNA(7,4)   == BITS4(1,1,1,1)) {
   11360            regD = INSNA(15,12);
   11361            regN = INSNA(19,16);
   11362            regM = INSNA(3,0);
   11363            if (regD != 15 && regN != 15 && regM != 15)
   11364               gate = True;
   11365         }
   11366      }
   11367 
   11368      if (gate) {
   11369         IRTemp rNt   = newTemp(Ity_I32);
   11370         IRTemp rMt   = newTemp(Ity_I32);
   11371         IRTemp res_q = newTemp(Ity_I32);
   11372 
   11373         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11374         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11375 
   11376         assign(res_q, binop(Iop_HSub8Ux4, mkexpr(rNt), mkexpr(rMt)));
   11377         if (isT)
   11378            putIRegT( regD, mkexpr(res_q), condT );
   11379         else
   11380            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11381 
   11382         DIP("uhsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   11383         return True;
   11384      }
   11385      /* fall through */
   11386    }
   11387 
   11388    /* ----------------- uhsub16<c> <Rd>,<Rn>,<Rm> ------------------- */
   11389    {
   11390      UInt regD = 99, regN = 99, regM = 99;
   11391      Bool gate = False;
   11392 
   11393      if (isT) {
   11394         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
   11395            regN = INSNT0(3,0);
   11396            regD = INSNT1(11,8);
   11397            regM = INSNT1(3,0);
   11398            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11399               gate = True;
   11400         }
   11401      } else {
   11402         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
   11403             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11404             INSNA(7,4)   == BITS4(0,1,1,1)) {
   11405            regD = INSNA(15,12);
   11406            regN = INSNA(19,16);
   11407            regM = INSNA(3,0);
   11408            if (regD != 15 && regN != 15 && regM != 15)
   11409               gate = True;
   11410         }
   11411      }
   11412 
   11413      if (gate) {
   11414         IRTemp rNt   = newTemp(Ity_I32);
   11415         IRTemp rMt   = newTemp(Ity_I32);
   11416         IRTemp res_q = newTemp(Ity_I32);
   11417 
   11418         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11419         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11420 
   11421         assign(res_q, binop(Iop_HSub16Ux2, mkexpr(rNt), mkexpr(rMt)));
   11422         if (isT)
   11423            putIRegT( regD, mkexpr(res_q), condT );
   11424         else
   11425            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11426 
   11427         DIP("uhsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   11428         return True;
   11429      }
   11430      /* fall through */
   11431    }
   11432 
   11433    /* ------------------ uqadd16<c> <Rd>,<Rn>,<Rm> ------------------ */
   11434    {
   11435      UInt regD = 99, regN = 99, regM = 99;
   11436      Bool gate = False;
   11437 
   11438      if (isT) {
   11439         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
   11440            regN = INSNT0(3,0);
   11441            regD = INSNT1(11,8);
   11442            regM = INSNT1(3,0);
   11443            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11444               gate = True;
   11445         }
   11446      } else {
   11447         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
   11448             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11449             INSNA(7,4)   == BITS4(0,0,0,1)) {
   11450            regD = INSNA(15,12);
   11451            regN = INSNA(19,16);
   11452            regM = INSNA(3,0);
   11453            if (regD != 15 && regN != 15 && regM != 15)
   11454               gate = True;
   11455         }
   11456      }
   11457 
   11458      if (gate) {
   11459         IRTemp rNt   = newTemp(Ity_I32);
   11460         IRTemp rMt   = newTemp(Ity_I32);
   11461         IRTemp res_q = newTemp(Ity_I32);
   11462 
   11463         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11464         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11465 
   11466         assign(res_q, binop(Iop_QAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
   11467         if (isT)
   11468            putIRegT( regD, mkexpr(res_q), condT );
   11469         else
   11470            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11471 
   11472         DIP("uqadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   11473         return True;
   11474      }
   11475      /* fall through */
   11476    }
   11477 
   11478    /* ------------------- uqsax<c> <Rd>,<Rn>,<Rm> ------------------- */
   11479    {
   11480      UInt regD = 99, regN = 99, regM = 99;
   11481      Bool gate = False;
   11482 
   11483      if (isT) {
   11484         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
   11485            regN = INSNT0(3,0);
   11486            regD = INSNT1(11,8);
   11487            regM = INSNT1(3,0);
   11488            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11489               gate = True;
   11490         }
   11491      } else {
   11492         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
   11493             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11494             INSNA(7,4)   == BITS4(0,1,0,1)) {
   11495            regD = INSNA(15,12);
   11496            regN = INSNA(19,16);
   11497            regM = INSNA(3,0);
   11498            if (regD != 15 && regN != 15 && regM != 15)
   11499               gate = True;
   11500         }
   11501      }
   11502 
   11503      if (gate) {
   11504         IRTemp irt_regN     = newTemp(Ity_I32);
   11505         IRTemp irt_regM     = newTemp(Ity_I32);
   11506         IRTemp irt_sum      = newTemp(Ity_I32);
   11507         IRTemp irt_diff     = newTemp(Ity_I32);
   11508         IRTemp irt_sum_res  = newTemp(Ity_I32);
   11509         IRTemp irt_diff_res = newTemp(Ity_I32);
   11510 
   11511         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   11512         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   11513 
   11514         assign( irt_diff,
   11515                 binop( Iop_Sub32,
   11516                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
   11517                        binop( Iop_Shr32,
   11518                               binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
   11519                               mkU8(16) ) ) );
   11520         armUnsignedSatQ( &irt_diff_res, NULL, irt_diff, 0x10);
   11521 
   11522         assign( irt_sum,
   11523                 binop( Iop_Add32,
   11524                        binop( Iop_Shr32,
   11525                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   11526                               mkU8(16) ),
   11527                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) )) );
   11528         armUnsignedSatQ( &irt_sum_res, NULL, irt_sum, 0x10 );
   11529 
   11530         IRExpr* ire_result = binop( Iop_Or32,
   11531                                     binop( Iop_Shl32, mkexpr(irt_diff_res),
   11532                                            mkU8(16) ),
   11533                                     binop( Iop_And32, mkexpr(irt_sum_res),
   11534                                            mkU32(0xFFFF)) );
   11535 
   11536         if (isT)
   11537            putIRegT( regD, ire_result, condT );
   11538         else
   11539            putIRegA( regD, ire_result, condT, Ijk_Boring );
   11540 
   11541         DIP( "uqsax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   11542         return True;
   11543      }
   11544      /* fall through */
   11545    }
   11546 
   11547    /* ------------------- uqasx<c> <Rd>,<Rn>,<Rm> ------------------- */
   11548    {
   11549      UInt regD = 99, regN = 99, regM = 99;
   11550      Bool gate = False;
   11551 
   11552      if (isT) {
   11553         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
   11554            regN = INSNT0(3,0);
   11555            regD = INSNT1(11,8);
   11556            regM = INSNT1(3,0);
   11557            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11558               gate = True;
   11559         }
   11560      } else {
   11561         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
   11562             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11563             INSNA(7,4)   == BITS4(0,0,1,1)) {
   11564            regD = INSNA(15,12);
   11565            regN = INSNA(19,16);
   11566            regM = INSNA(3,0);
   11567            if (regD != 15 && regN != 15 && regM != 15)
   11568               gate = True;
   11569         }
   11570      }
   11571 
   11572      if (gate) {
   11573         IRTemp irt_regN     = newTemp(Ity_I32);
   11574         IRTemp irt_regM     = newTemp(Ity_I32);
   11575         IRTemp irt_sum      = newTemp(Ity_I32);
   11576         IRTemp irt_diff     = newTemp(Ity_I32);
   11577         IRTemp irt_res_sum  = newTemp(Ity_I32);
   11578         IRTemp irt_res_diff = newTemp(Ity_I32);
   11579 
   11580         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   11581         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   11582 
   11583         assign( irt_diff,
   11584                 binop( Iop_Sub32,
   11585                        binop( Iop_Shr32,
   11586                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   11587                               mkU8(16) ),
   11588                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
   11589         armUnsignedSatQ( &irt_res_diff, NULL, irt_diff, 0x10 );
   11590 
   11591         assign( irt_sum,
   11592                 binop( Iop_Add32,
   11593                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
   11594                        binop( Iop_Shr32,
   11595                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
   11596                               mkU8(16) ) ) );
   11597         armUnsignedSatQ( &irt_res_sum, NULL, irt_sum, 0x10 );
   11598 
   11599         IRExpr* ire_result
   11600           = binop( Iop_Or32,
   11601                    binop( Iop_Shl32, mkexpr(irt_res_sum), mkU8(16) ),
   11602                    binop( Iop_And32, mkexpr(irt_res_diff), mkU32(0xFFFF) ) );
   11603 
   11604         if (isT)
   11605            putIRegT( regD, ire_result, condT );
   11606         else
   11607            putIRegA( regD, ire_result, condT, Ijk_Boring );
   11608 
   11609         DIP( "uqasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   11610         return True;
   11611      }
   11612      /* fall through */
   11613    }
   11614 
   11615    /* ------------------- usax<c> <Rd>,<Rn>,<Rm> ------------------- */
   11616    {
   11617      UInt regD = 99, regN = 99, regM = 99;
   11618      Bool gate = False;
   11619 
   11620      if (isT) {
   11621         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
   11622            regN = INSNT0(3,0);
   11623            regD = INSNT1(11,8);
   11624            regM = INSNT1(3,0);
   11625            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11626               gate = True;
   11627         }
   11628      } else {
   11629         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
   11630             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11631             INSNA(7,4)   == BITS4(0,1,0,1)) {
   11632            regD = INSNA(15,12);
   11633            regN = INSNA(19,16);
   11634            regM = INSNA(3,0);
   11635            if (regD != 15 && regN != 15 && regM != 15)
   11636               gate = True;
   11637         }
   11638      }
   11639 
   11640      if (gate) {
   11641         IRTemp irt_regN = newTemp(Ity_I32);
   11642         IRTemp irt_regM = newTemp(Ity_I32);
   11643         IRTemp irt_sum  = newTemp(Ity_I32);
   11644         IRTemp irt_diff = newTemp(Ity_I32);
   11645 
   11646         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   11647         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   11648 
   11649         assign( irt_sum,
   11650                 binop( Iop_Add32,
   11651                        unop( Iop_16Uto32,
   11652                              unop( Iop_32to16, mkexpr(irt_regN) )
   11653                        ),
   11654                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
   11655 
   11656         assign( irt_diff,
   11657                 binop( Iop_Sub32,
   11658                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
   11659                        unop( Iop_16Uto32,
   11660                              unop( Iop_32to16, mkexpr(irt_regM) )
   11661                        )
   11662                 )
   11663         );
   11664 
   11665         IRExpr* ire_result
   11666           = binop( Iop_Or32,
   11667                    binop( Iop_Shl32, mkexpr(irt_diff), mkU8(16) ),
   11668                    binop( Iop_And32, mkexpr(irt_sum), mkU32(0xFFFF) ) );
   11669 
   11670         IRTemp ge10 = newTemp(Ity_I32);
   11671         assign( ge10, IRExpr_ITE( binop( Iop_CmpLE32U,
   11672                                          mkU32(0x10000), mkexpr(irt_sum) ),
   11673                                   mkU32(1), mkU32(0) ) );
   11674         put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
   11675         put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
   11676 
   11677         IRTemp ge32 = newTemp(Ity_I32);
   11678         assign(ge32, unop(Iop_Not32, mkexpr(irt_diff)));
   11679         put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
   11680         put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
   11681 
   11682         if (isT)
   11683            putIRegT( regD, ire_result, condT );
   11684         else
   11685            putIRegA( regD, ire_result, condT, Ijk_Boring );
   11686 
   11687         DIP( "usax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   11688         return True;
   11689      }
   11690      /* fall through */
   11691    }
   11692 
   11693    /* ------------------- uasx<c> <Rd>,<Rn>,<Rm> ------------------- */
   11694    {
   11695      UInt regD = 99, regN = 99, regM = 99;
   11696      Bool gate = False;
   11697 
   11698      if (isT) {
   11699         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
   11700            regN = INSNT0(3,0);
   11701            regD = INSNT1(11,8);
   11702            regM = INSNT1(3,0);
   11703            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11704               gate = True;
   11705         }
   11706      } else {
   11707         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
   11708             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11709             INSNA(7,4)   == BITS4(0,0,1,1)) {
   11710            regD = INSNA(15,12);
   11711            regN = INSNA(19,16);
   11712            regM = INSNA(3,0);
   11713            if (regD != 15 && regN != 15 && regM != 15)
   11714               gate = True;
   11715         }
   11716      }
   11717 
   11718      if (gate) {
   11719         IRTemp irt_regN = newTemp(Ity_I32);
   11720         IRTemp irt_regM = newTemp(Ity_I32);
   11721         IRTemp irt_sum  = newTemp(Ity_I32);
   11722         IRTemp irt_diff = newTemp(Ity_I32);
   11723 
   11724         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   11725         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   11726 
   11727         assign( irt_diff,
   11728                 binop( Iop_Sub32,
   11729                        unop( Iop_16Uto32,
   11730                              unop( Iop_32to16, mkexpr(irt_regN) )
   11731                        ),
   11732                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
   11733 
   11734         assign( irt_sum,
   11735                 binop( Iop_Add32,
   11736                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
   11737                        unop( Iop_16Uto32,
   11738                              unop( Iop_32to16, mkexpr(irt_regM) )
   11739                        ) ) );
   11740 
   11741         IRExpr* ire_result
   11742           = binop( Iop_Or32,
   11743                    binop( Iop_Shl32, mkexpr(irt_sum), mkU8(16) ),
   11744                    binop( Iop_And32, mkexpr(irt_diff), mkU32(0xFFFF) ) );
   11745 
   11746         IRTemp ge10 = newTemp(Ity_I32);
   11747         assign(ge10, unop(Iop_Not32, mkexpr(irt_diff)));
   11748         put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
   11749         put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
   11750 
   11751         IRTemp ge32 = newTemp(Ity_I32);
   11752         assign( ge32, IRExpr_ITE( binop( Iop_CmpLE32U,
   11753                                          mkU32(0x10000), mkexpr(irt_sum) ),
   11754                                   mkU32(1), mkU32(0) ) );
   11755         put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
   11756         put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
   11757 
   11758         if (isT)
   11759            putIRegT( regD, ire_result, condT );
   11760         else
   11761            putIRegA( regD, ire_result, condT, Ijk_Boring );
   11762 
   11763         DIP( "uasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   11764         return True;
   11765      }
   11766      /* fall through */
   11767    }
   11768 
   11769    /* ------------------- ssax<c> <Rd>,<Rn>,<Rm> ------------------- */
   11770    {
   11771      UInt regD = 99, regN = 99, regM = 99;
   11772      Bool gate = False;
   11773 
   11774      if (isT) {
   11775         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
   11776            regN = INSNT0(3,0);
   11777            regD = INSNT1(11,8);
   11778            regM = INSNT1(3,0);
   11779            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11780               gate = True;
   11781         }
   11782      } else {
   11783         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
   11784             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11785             INSNA(7,4)   == BITS4(0,1,0,1)) {
   11786            regD = INSNA(15,12);
   11787            regN = INSNA(19,16);
   11788            regM = INSNA(3,0);
   11789            if (regD != 15 && regN != 15 && regM != 15)
   11790               gate = True;
   11791         }
   11792      }
   11793 
   11794      if (gate) {
   11795         IRTemp irt_regN = newTemp(Ity_I32);
   11796         IRTemp irt_regM = newTemp(Ity_I32);
   11797         IRTemp irt_sum  = newTemp(Ity_I32);
   11798         IRTemp irt_diff = newTemp(Ity_I32);
   11799 
   11800         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   11801         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   11802 
   11803         assign( irt_sum,
   11804                 binop( Iop_Add32,
   11805                        binop( Iop_Sar32,
   11806                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
   11807                               mkU8(16) ),
   11808                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
   11809 
   11810         assign( irt_diff,
   11811                 binop( Iop_Sub32,
   11812                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
   11813                        binop( Iop_Sar32,
   11814                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
   11815                               mkU8(16) ) ) );
   11816 
   11817         IRExpr* ire_result
   11818           = binop( Iop_Or32,
   11819                    binop( Iop_Shl32, mkexpr(irt_diff), mkU8(16) ),
   11820                    binop( Iop_And32, mkexpr(irt_sum), mkU32(0xFFFF) ) );
   11821 
   11822         IRTemp ge10 = newTemp(Ity_I32);
   11823         assign(ge10, unop(Iop_Not32, mkexpr(irt_sum)));
   11824         put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
   11825         put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
   11826 
   11827         IRTemp ge32 = newTemp(Ity_I32);
   11828         assign(ge32, unop(Iop_Not32, mkexpr(irt_diff)));
   11829         put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
   11830         put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
   11831 
   11832         if (isT)
   11833            putIRegT( regD, ire_result, condT );
   11834         else
   11835            putIRegA( regD, ire_result, condT, Ijk_Boring );
   11836 
   11837         DIP( "ssax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
   11838         return True;
   11839      }
   11840      /* fall through */
   11841    }
   11842 
   11843    /* ----------------- shsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
   11844    {
   11845      UInt regD = 99, regN = 99, regM = 99;
   11846      Bool gate = False;
   11847 
   11848      if (isT) {
   11849         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
   11850            regN = INSNT0(3,0);
   11851            regD = INSNT1(11,8);
   11852            regM = INSNT1(3,0);
   11853            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11854               gate = True;
   11855         }
   11856      } else {
   11857         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
   11858             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11859             INSNA(7,4)   == BITS4(1,1,1,1)) {
   11860            regD = INSNA(15,12);
   11861            regN = INSNA(19,16);
   11862            regM = INSNA(3,0);
   11863            if (regD != 15 && regN != 15 && regM != 15)
   11864               gate = True;
   11865         }
   11866      }
   11867 
   11868      if (gate) {
   11869         IRTemp rNt   = newTemp(Ity_I32);
   11870         IRTemp rMt   = newTemp(Ity_I32);
   11871         IRTemp res_q = newTemp(Ity_I32);
   11872 
   11873         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11874         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11875 
   11876         assign(res_q, binop(Iop_HSub8Sx4, mkexpr(rNt), mkexpr(rMt)));
   11877         if (isT)
   11878            putIRegT( regD, mkexpr(res_q), condT );
   11879         else
   11880            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   11881 
   11882         DIP("shsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   11883         return True;
   11884      }
   11885      /* fall through */
   11886    }
   11887 
   11888    /* ----------------- sxtab16<c> Rd,Rn,Rm{,rot} ------------------ */
   11889    {
   11890      UInt regD = 99, regN = 99, regM = 99, rotate = 99;
   11891      Bool gate = False;
   11892 
   11893      if (isT) {
   11894         if (INSNT0(15,4) == 0xFA2 && (INSNT1(15,0) & 0xF0C0) == 0xF080) {
   11895            regN   = INSNT0(3,0);
   11896            regD   = INSNT1(11,8);
   11897            regM   = INSNT1(3,0);
   11898            rotate = INSNT1(5,4);
   11899            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11900               gate = True;
   11901         }
   11902      } else {
   11903         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
   11904             INSNA(9,4)   == BITS6(0,0,0,1,1,1) ) {
   11905            regD   = INSNA(15,12);
   11906            regN   = INSNA(19,16);
   11907            regM   = INSNA(3,0);
   11908            rotate = INSNA(11,10);
   11909            if (regD != 15 && regN != 15 && regM != 15)
   11910              gate = True;
   11911         }
   11912      }
   11913 
   11914      if (gate) {
   11915         IRTemp irt_regN = newTemp(Ity_I32);
   11916         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
   11917 
   11918         IRTemp irt_regM = newTemp(Ity_I32);
   11919         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
   11920 
   11921         IRTemp irt_rot = newTemp(Ity_I32);
   11922         assign( irt_rot, genROR32(irt_regM, 8 * rotate) );
   11923 
   11924         /* FIXME Maybe we can write this arithmetic in shorter form. */
   11925         IRExpr* resLo
   11926            = binop(Iop_And32,
   11927                    binop(Iop_Add32,
   11928                          mkexpr(irt_regN),
   11929                          unop(Iop_16Uto32,
   11930                               unop(Iop_8Sto16,
   11931                                    unop(Iop_32to8, mkexpr(irt_rot))))),
   11932                    mkU32(0x0000FFFF));
   11933 
   11934         IRExpr* resHi
   11935            = binop(Iop_And32,
   11936                    binop(Iop_Add32,
   11937                          mkexpr(irt_regN),
   11938                          binop(Iop_Shl32,
   11939                                unop(Iop_16Uto32,
   11940                                     unop(Iop_8Sto16,
   11941                                          unop(Iop_32to8,
   11942                                               binop(Iop_Shr32,
   11943                                                     mkexpr(irt_rot),
   11944                                                     mkU8(16))))),
   11945                                mkU8(16))),
   11946                    mkU32(0xFFFF0000));
   11947 
   11948         IRExpr* ire_result
   11949            = binop( Iop_Or32, resHi, resLo );
   11950 
   11951         if (isT)
   11952            putIRegT( regD, ire_result, condT );
   11953         else
   11954            putIRegA( regD, ire_result, condT, Ijk_Boring );
   11955 
   11956         DIP( "sxtab16%s r%u, r%u, r%u, ROR #%u\n",
   11957              nCC(conq), regD, regN, regM, 8 * rotate );
   11958         return True;
   11959      }
   11960      /* fall through */
   11961    }
   11962 
   11963    /* ----------------- shasx<c> <Rd>,<Rn>,<Rm> ------------------- */
   11964    {
   11965      UInt regD = 99, regN = 99, regM = 99;
   11966      Bool gate = False;
   11967 
   11968      if (isT) {
   11969         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
   11970            regN = INSNT0(3,0);
   11971            regD = INSNT1(11,8);
   11972            regM = INSNT1(3,0);
   11973            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   11974               gate = True;
   11975         }
   11976      } else {
   11977         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
   11978             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   11979             INSNA(7,4)   == BITS4(0,0,1,1)) {
   11980            regD = INSNA(15,12);
   11981            regN = INSNA(19,16);
   11982            regM = INSNA(3,0);
   11983            if (regD != 15 && regN != 15 && regM != 15)
   11984               gate = True;
   11985         }
   11986      }
   11987 
   11988      if (gate) {
   11989         IRTemp rNt   = newTemp(Ity_I32);
   11990         IRTemp rMt   = newTemp(Ity_I32);
   11991         IRTemp irt_diff  = newTemp(Ity_I32);
   11992         IRTemp irt_sum   = newTemp(Ity_I32);
   11993         IRTemp res_q = newTemp(Ity_I32);
   11994 
   11995         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   11996         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   11997 
   11998         assign( irt_diff,
   11999                 binop(Iop_Sub32,
   12000                       unop(Iop_16Sto32,
   12001                            unop(Iop_32to16,
   12002                                 mkexpr(rNt)
   12003                            )
   12004                       ),
   12005                       unop(Iop_16Sto32,
   12006                            unop(Iop_32to16,
   12007                                 binop(Iop_Shr32,
   12008                                       mkexpr(rMt), mkU8(16)
   12009                                 )
   12010                            )
   12011                       )
   12012                 )
   12013         );
   12014 
   12015         assign( irt_sum,
   12016                 binop(Iop_Add32,
   12017                       unop(Iop_16Sto32,
   12018                            unop(Iop_32to16,
   12019                                 binop(Iop_Shr32,
   12020                                       mkexpr(rNt), mkU8(16)
   12021                                 )
   12022                            )
   12023                       ),
   12024                       unop(Iop_16Sto32,
   12025                            unop(Iop_32to16, mkexpr(rMt)
   12026                            )
   12027                       )
   12028                 )
   12029         );
   12030 
   12031         assign( res_q,
   12032                 binop(Iop_Or32,
   12033                       unop(Iop_16Uto32,
   12034                            unop(Iop_32to16,
   12035                                 binop(Iop_Shr32,
   12036                                       mkexpr(irt_diff), mkU8(1)
   12037                                 )
   12038                            )
   12039                       ),
   12040                       binop(Iop_Shl32,
   12041                             binop(Iop_Shr32,
   12042                                   mkexpr(irt_sum), mkU8(1)
   12043                             ),
   12044                             mkU8(16)
   12045                      )
   12046                 )
   12047         );
   12048 
   12049         if (isT)
   12050            putIRegT( regD, mkexpr(res_q), condT );
   12051         else
   12052            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   12053 
   12054         DIP("shasx%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   12055         return True;
   12056      }
   12057      /* fall through */
   12058    }
   12059 
   12060    /* ----------------- uhasx<c> <Rd>,<Rn>,<Rm> ------------------- */
   12061    {
   12062      UInt regD = 99, regN = 99, regM = 99;
   12063      Bool gate = False;
   12064 
   12065      if (isT) {
   12066         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
   12067            regN = INSNT0(3,0);
   12068            regD = INSNT1(11,8);
   12069            regM = INSNT1(3,0);
   12070            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   12071               gate = True;
   12072         }
   12073      } else {
   12074         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
   12075             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   12076             INSNA(7,4)   == BITS4(0,0,1,1)) {
   12077            regD = INSNA(15,12);
   12078            regN = INSNA(19,16);
   12079            regM = INSNA(3,0);
   12080            if (regD != 15 && regN != 15 && regM != 15)
   12081               gate = True;
   12082         }
   12083      }
   12084 
   12085      if (gate) {
   12086         IRTemp rNt   = newTemp(Ity_I32);
   12087         IRTemp rMt   = newTemp(Ity_I32);
   12088         IRTemp irt_diff  = newTemp(Ity_I32);
   12089         IRTemp irt_sum   = newTemp(Ity_I32);
   12090         IRTemp res_q = newTemp(Ity_I32);
   12091 
   12092         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   12093         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   12094 
   12095         assign( irt_diff,
   12096                 binop(Iop_Sub32,
   12097                       unop(Iop_16Uto32,
   12098                            unop(Iop_32to16,
   12099                                 mkexpr(rNt)
   12100                            )
   12101                       ),
   12102                       unop(Iop_16Uto32,
   12103                            unop(Iop_32to16,
   12104                                 binop(Iop_Shr32,
   12105                                       mkexpr(rMt), mkU8(16)
   12106                                 )
   12107                            )
   12108                       )
   12109                 )
   12110         );
   12111 
   12112         assign( irt_sum,
   12113                 binop(Iop_Add32,
   12114                       unop(Iop_16Uto32,
   12115                            unop(Iop_32to16,
   12116                                 binop(Iop_Shr32,
   12117                                       mkexpr(rNt), mkU8(16)
   12118                                 )
   12119                            )
   12120                       ),
   12121                       unop(Iop_16Uto32,
   12122                            unop(Iop_32to16, mkexpr(rMt)
   12123                            )
   12124                       )
   12125                 )
   12126         );
   12127 
   12128         assign( res_q,
   12129                 binop(Iop_Or32,
   12130                       unop(Iop_16Uto32,
   12131                            unop(Iop_32to16,
   12132                                 binop(Iop_Shr32,
   12133                                       mkexpr(irt_diff), mkU8(1)
   12134                                 )
   12135                            )
   12136                       ),
   12137                       binop(Iop_Shl32,
   12138                             binop(Iop_Shr32,
   12139                                   mkexpr(irt_sum), mkU8(1)
   12140                             ),
   12141                             mkU8(16)
   12142                      )
   12143                 )
   12144         );
   12145 
   12146         if (isT)
   12147            putIRegT( regD, mkexpr(res_q), condT );
   12148         else
   12149            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   12150 
   12151         DIP("uhasx%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   12152         return True;
   12153      }
   12154      /* fall through */
   12155    }
   12156 
   12157    /* ----------------- shsax<c> <Rd>,<Rn>,<Rm> ------------------- */
   12158    {
   12159      UInt regD = 99, regN = 99, regM = 99;
   12160      Bool gate = False;
   12161 
   12162      if (isT) {
   12163         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
   12164            regN = INSNT0(3,0);
   12165            regD = INSNT1(11,8);
   12166            regM = INSNT1(3,0);
   12167            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   12168               gate = True;
   12169         }
   12170      } else {
   12171         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
   12172             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   12173             INSNA(7,4)   == BITS4(0,1,0,1)) {
   12174            regD = INSNA(15,12);
   12175            regN = INSNA(19,16);
   12176            regM = INSNA(3,0);
   12177            if (regD != 15 && regN != 15 && regM != 15)
   12178               gate = True;
   12179         }
   12180      }
   12181 
   12182      if (gate) {
   12183         IRTemp rNt   = newTemp(Ity_I32);
   12184         IRTemp rMt   = newTemp(Ity_I32);
   12185         IRTemp irt_diff  = newTemp(Ity_I32);
   12186         IRTemp irt_sum   = newTemp(Ity_I32);
   12187         IRTemp res_q = newTemp(Ity_I32);
   12188 
   12189         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   12190         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   12191 
   12192         assign( irt_sum,
   12193                 binop(Iop_Add32,
   12194                       unop(Iop_16Sto32,
   12195                            unop(Iop_32to16,
   12196                                 mkexpr(rNt)
   12197                            )
   12198                       ),
   12199                       unop(Iop_16Sto32,
   12200                            unop(Iop_32to16,
   12201                                 binop(Iop_Shr32,
   12202                                       mkexpr(rMt), mkU8(16)
   12203                                 )
   12204                            )
   12205                       )
   12206                 )
   12207         );
   12208 
   12209         assign( irt_diff,
   12210                 binop(Iop_Sub32,
   12211                       unop(Iop_16Sto32,
   12212                            unop(Iop_32to16,
   12213                                 binop(Iop_Shr32,
   12214                                       mkexpr(rNt), mkU8(16)
   12215                                 )
   12216                            )
   12217                       ),
   12218                       unop(Iop_16Sto32,
   12219                            unop(Iop_32to16, mkexpr(rMt)
   12220                            )
   12221                       )
   12222                 )
   12223         );
   12224 
   12225         assign( res_q,
   12226                 binop(Iop_Or32,
   12227                       unop(Iop_16Uto32,
   12228                            unop(Iop_32to16,
   12229                                 binop(Iop_Shr32,
   12230                                       mkexpr(irt_sum), mkU8(1)
   12231                                 )
   12232                            )
   12233                       ),
   12234                       binop(Iop_Shl32,
   12235                             binop(Iop_Shr32,
   12236                                   mkexpr(irt_diff), mkU8(1)
   12237                             ),
   12238                             mkU8(16)
   12239                      )
   12240                 )
   12241         );
   12242 
   12243         if (isT)
   12244            putIRegT( regD, mkexpr(res_q), condT );
   12245         else
   12246            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   12247 
   12248         DIP("shsax%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   12249         return True;
   12250      }
   12251      /* fall through */
   12252    }
   12253 
   12254    /* ----------------- uhsax<c> <Rd>,<Rn>,<Rm> ------------------- */
   12255    {
   12256      UInt regD = 99, regN = 99, regM = 99;
   12257      Bool gate = False;
   12258 
   12259      if (isT) {
   12260         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
   12261            regN = INSNT0(3,0);
   12262            regD = INSNT1(11,8);
   12263            regM = INSNT1(3,0);
   12264            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   12265               gate = True;
   12266         }
   12267      } else {
   12268         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
   12269             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   12270             INSNA(7,4)   == BITS4(0,1,0,1)) {
   12271            regD = INSNA(15,12);
   12272            regN = INSNA(19,16);
   12273            regM = INSNA(3,0);
   12274            if (regD != 15 && regN != 15 && regM != 15)
   12275               gate = True;
   12276         }
   12277      }
   12278 
   12279      if (gate) {
   12280         IRTemp rNt   = newTemp(Ity_I32);
   12281         IRTemp rMt   = newTemp(Ity_I32);
   12282         IRTemp irt_diff  = newTemp(Ity_I32);
   12283         IRTemp irt_sum   = newTemp(Ity_I32);
   12284         IRTemp res_q = newTemp(Ity_I32);
   12285 
   12286         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   12287         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   12288 
   12289         assign( irt_sum,
   12290                 binop(Iop_Add32,
   12291                       unop(Iop_16Uto32,
   12292                            unop(Iop_32to16,
   12293                                 mkexpr(rNt)
   12294                            )
   12295                       ),
   12296                       unop(Iop_16Uto32,
   12297                            unop(Iop_32to16,
   12298                                 binop(Iop_Shr32,
   12299                                       mkexpr(rMt), mkU8(16)
   12300                                 )
   12301                            )
   12302                       )
   12303                 )
   12304         );
   12305 
   12306         assign( irt_diff,
   12307                 binop(Iop_Sub32,
   12308                       unop(Iop_16Uto32,
   12309                            unop(Iop_32to16,
   12310                                 binop(Iop_Shr32,
   12311                                       mkexpr(rNt), mkU8(16)
   12312                                 )
   12313                            )
   12314                       ),
   12315                       unop(Iop_16Uto32,
   12316                            unop(Iop_32to16, mkexpr(rMt)
   12317                            )
   12318                       )
   12319                 )
   12320         );
   12321 
   12322         assign( res_q,
   12323                 binop(Iop_Or32,
   12324                       unop(Iop_16Uto32,
   12325                            unop(Iop_32to16,
   12326                                 binop(Iop_Shr32,
   12327                                       mkexpr(irt_sum), mkU8(1)
   12328                                 )
   12329                            )
   12330                       ),
   12331                       binop(Iop_Shl32,
   12332                             binop(Iop_Shr32,
   12333                                   mkexpr(irt_diff), mkU8(1)
   12334                             ),
   12335                             mkU8(16)
   12336                      )
   12337                 )
   12338         );
   12339 
   12340         if (isT)
   12341            putIRegT( regD, mkexpr(res_q), condT );
   12342         else
   12343            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   12344 
   12345         DIP("uhsax%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   12346         return True;
   12347      }
   12348      /* fall through */
   12349    }
   12350 
   12351    /* ----------------- shsub16<c> <Rd>,<Rn>,<Rm> ------------------- */
   12352    {
   12353      UInt regD = 99, regN = 99, regM = 99;
   12354      Bool gate = False;
   12355 
   12356      if (isT) {
   12357         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
   12358            regN = INSNT0(3,0);
   12359            regD = INSNT1(11,8);
   12360            regM = INSNT1(3,0);
   12361            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
   12362               gate = True;
   12363         }
   12364      } else {
   12365         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
   12366             INSNA(11,8)  == BITS4(1,1,1,1)         &&
   12367             INSNA(7,4)   == BITS4(0,1,1,1)) {
   12368            regD = INSNA(15,12);
   12369            regN = INSNA(19,16);
   12370            regM = INSNA(3,0);
   12371            if (regD != 15 && regN != 15 && regM != 15)
   12372               gate = True;
   12373         }
   12374      }
   12375 
   12376      if (gate) {
   12377         IRTemp rNt   = newTemp(Ity_I32);
   12378         IRTemp rMt   = newTemp(Ity_I32);
   12379         IRTemp res_q = newTemp(Ity_I32);
   12380 
   12381         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
   12382         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
   12383 
   12384         assign(res_q, binop(Iop_HSub16Sx2, mkexpr(rNt), mkexpr(rMt)));
   12385         if (isT)
   12386            putIRegT( regD, mkexpr(res_q), condT );
   12387         else
   12388            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
   12389 
   12390         DIP("shsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
   12391         return True;
   12392      }
   12393      /* fall through */
   12394    }
   12395 
   12396    /* ----------------- smmls{r}<c> <Rd>,<Rn>,<Rm>,<Ra> ------------------- */
   12397    {
   12398      UInt rD = 99, rN = 99, rM = 99, rA = 99;
   12399      Bool round  = False;
   12400      Bool gate   = False;
   12401 
   12402      if (isT) {
   12403         if (INSNT0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
   12404             && INSNT0(6,4) == BITS3(1,1,0)
   12405             && INSNT1(7,5) == BITS3(0,0,0)) {
   12406            round = INSNT1(4,4);
   12407            rA    = INSNT1(15,12);
   12408            rD    = INSNT1(11,8);
   12409            rM    = INSNT1(3,0);
   12410            rN    = INSNT0(3,0);
   12411            if (!isBadRegT(rD)
   12412                && !isBadRegT(rN) && !isBadRegT(rM) && !isBadRegT(rA))
   12413               gate = True;
   12414         }
   12415      } else {
   12416         if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,1)
   12417             && INSNA(15,12) != BITS4(1,1,1,1)
   12418             && (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(1,1,0,1)) {
   12419            round = INSNA(5,5);
   12420            rD    = INSNA(19,16);
   12421            rA    = INSNA(15,12);
   12422            rM    = INSNA(11,8);
   12423            rN    = INSNA(3,0);
   12424            if (rD != 15 && rM != 15 && rN != 15)
   12425               gate = True;
   12426         }
   12427      }
   12428      if (gate) {
   12429         IRTemp irt_rA   = newTemp(Ity_I32);
   12430         IRTemp irt_rN   = newTemp(Ity_I32);
   12431         IRTemp irt_rM   = newTemp(Ity_I32);
   12432         assign( irt_rA, isT ? getIRegT(rA) : getIRegA(rA) );
   12433         assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN) );
   12434         assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM) );
   12435         IRExpr* res
   12436         = unop(Iop_64HIto32,
   12437                binop(Iop_Add64,
   12438                      binop(Iop_Sub64,
   12439                            binop(Iop_32HLto64, mkexpr(irt_rA), mkU32(0)),
   12440                            binop(Iop_MullS32, mkexpr(irt_rN), mkexpr(irt_rM))),
   12441                      mkU64(round ? 0x80000000ULL : 0ULL)));
   12442         if (isT)
   12443            putIRegT( rD, res, condT );
   12444         else
   12445            putIRegA(rD, res, condT, Ijk_Boring);
   12446         DIP("smmls%s%s r%u, r%u, r%u, r%u\n",
   12447             round ? "r" : "", nCC(conq), rD, rN, rM, rA);
   12448         return True;
   12449      }
   12450      /* fall through */
   12451    }
   12452 
   12453    /* -------------- smlald{x}<c> <RdLo>,<RdHi>,<Rn>,<Rm> ---------------- */
   12454    {
   12455      UInt rN = 99, rDlo = 99, rDhi = 99, rM = 99;
   12456      Bool m_swap = False;
   12457      Bool gate   = False;
   12458 
   12459      if (isT) {
   12460         if (INSNT0(15,4) == 0xFBC &&
   12461             (INSNT1(7,4) & BITS4(1,1,1,0)) == BITS4(1,1,0,0)) {
   12462            rN     = INSNT0(3,0);
   12463            rDlo   = INSNT1(15,12);
   12464            rDhi   = INSNT1(11,8);
   12465            rM     = INSNT1(3,0);
   12466            m_swap = (INSNT1(4,4) & 1) == 1;
   12467            if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
   12468                && !isBadRegT(rM) && rDhi != rDlo)
   12469               gate = True;
   12470         }
   12471      } else {
   12472         if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,0)
   12473             && (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
   12474            rN     = INSNA(3,0);
   12475            rDlo   = INSNA(15,12);
   12476            rDhi   = INSNA(19,16);
   12477            rM     = INSNA(11,8);
   12478            m_swap = ( INSNA(5,5) & 1 ) == 1;
   12479            if (rDlo != 15 && rDhi != 15
   12480                && rN != 15 && rM != 15 && rDlo != rDhi)
   12481               gate = True;
   12482         }
   12483      }
   12484 
   12485      if (gate) {
   12486         IRTemp irt_rM   = newTemp(Ity_I32);
   12487         IRTemp irt_rN   = newTemp(Ity_I32);
   12488         IRTemp irt_rDhi = newTemp(Ity_I32);
   12489         IRTemp irt_rDlo = newTemp(Ity_I32);
   12490         IRTemp op_2     = newTemp(Ity_I32);
   12491         IRTemp pr_1     = newTemp(Ity_I64);
   12492         IRTemp pr_2     = newTemp(Ity_I64);
   12493         IRTemp result   = newTemp(Ity_I64);
   12494         IRTemp resHi    = newTemp(Ity_I32);
   12495         IRTemp resLo    = newTemp(Ity_I32);
   12496         assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM));
   12497         assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN));
   12498         assign( irt_rDhi, isT ? getIRegT(rDhi) : getIRegA(rDhi));
   12499         assign( irt_rDlo, isT ? getIRegT(rDlo) : getIRegA(rDlo));
   12500         assign( op_2, genROR32(irt_rM, m_swap ? 16 : 0) );
   12501         assign( pr_1, binop(Iop_MullS32,
   12502                             unop(Iop_16Sto32,
   12503                                  unop(Iop_32to16, mkexpr(irt_rN))
   12504                             ),
   12505                             unop(Iop_16Sto32,
   12506                                  unop(Iop_32to16, mkexpr(op_2))
   12507                             )
   12508                       )
   12509         );
   12510         assign( pr_2, binop(Iop_MullS32,
   12511                             binop(Iop_Sar32, mkexpr(irt_rN), mkU8(16)),
   12512                             binop(Iop_Sar32, mkexpr(op_2), mkU8(16))
   12513                       )
   12514         );
   12515         assign( result, binop(Iop_Add64,
   12516                               binop(Iop_Add64,
   12517                                     mkexpr(pr_1),
   12518                                     mkexpr(pr_2)
   12519                               ),
   12520                               binop(Iop_32HLto64,
   12521                                     mkexpr(irt_rDhi),
   12522                                     mkexpr(irt_rDlo)
   12523                               )
   12524                         )
   12525         );
   12526         assign( resHi, unop(Iop_64HIto32, mkexpr(result)) );
   12527         assign( resLo, unop(Iop_64to32, mkexpr(result)) );
   12528         if (isT) {
   12529            putIRegT( rDhi, mkexpr(resHi), condT );
   12530            putIRegT( rDlo, mkexpr(resLo), condT );
   12531         } else {
   12532            putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
   12533            putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
   12534         }
   12535         DIP("smlald%c%s r%u, r%u, r%u, r%u\n",
   12536             m_swap ? 'x' : ' ', nCC(conq), rDlo, rDhi, rN, rM);
   12537         return True;
   12538      }
   12539      /* fall through */
   12540    }
   12541 
   12542    /* -------------- smlsld{x}<c> <RdLo>,<RdHi>,<Rn>,<Rm> ---------------- */
   12543    {
   12544      UInt rN = 99, rDlo = 99, rDhi = 99, rM = 99;
   12545      Bool m_swap = False;
   12546      Bool gate   = False;
   12547 
   12548      if (isT) {
   12549         if ((INSNT0(15,4) == 0xFBD &&
   12550             (INSNT1(7,4) & BITS4(1,1,1,0)) == BITS4(1,1,0,0))) {
   12551            rN     = INSNT0(3,0);
   12552            rDlo   = INSNT1(15,12);
   12553            rDhi   = INSNT1(11,8);
   12554            rM     = INSNT1(3,0);
   12555            m_swap = (INSNT1(4,4) & 1) == 1;
   12556            if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN) &&
   12557                !isBadRegT(rM) && rDhi != rDlo)
   12558               gate = True;
   12559         }
   12560      } else {
   12561         if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,0) &&
   12562             (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(0,1,0,1)) {
   12563            rN     = INSNA(3,0);
   12564            rDlo   = INSNA(15,12);
   12565            rDhi   = INSNA(19,16);
   12566            rM     = INSNA(11,8);
   12567            m_swap = (INSNA(5,5) & 1) == 1;
   12568            if (rDlo != 15 && rDhi != 15 &&
   12569                rN != 15 && rM != 15 && rDlo != rDhi)
   12570               gate = True;
   12571         }
   12572      }
   12573      if (gate) {
   12574         IRTemp irt_rM   = newTemp(Ity_I32);
   12575         IRTemp irt_rN   = newTemp(Ity_I32);
   12576         IRTemp irt_rDhi = newTemp(Ity_I32);
   12577         IRTemp irt_rDlo = newTemp(Ity_I32);
   12578         IRTemp op_2     = newTemp(Ity_I32);
   12579         IRTemp pr_1     = newTemp(Ity_I64);
   12580         IRTemp pr_2     = newTemp(Ity_I64);
   12581         IRTemp result   = newTemp(Ity_I64);
   12582         IRTemp resHi    = newTemp(Ity_I32);
   12583         IRTemp resLo    = newTemp(Ity_I32);
   12584         assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM) );
   12585         assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN) );
   12586         assign( irt_rDhi, isT ? getIRegT(rDhi) : getIRegA(rDhi) );
   12587         assign( irt_rDlo, isT ? getIRegT(rDlo) : getIRegA(rDlo) );
   12588         assign( op_2, genROR32(irt_rM, m_swap ? 16 : 0) );
   12589         assign( pr_1, binop(Iop_MullS32,
   12590                             unop(Iop_16Sto32,
   12591                                  unop(Iop_32to16, mkexpr(irt_rN))
   12592                             ),
   12593                             unop(Iop_16Sto32,
   12594                                  unop(Iop_32to16, mkexpr(op_2))
   12595                             )
   12596                       )
   12597         );
   12598         assign( pr_2, binop(Iop_MullS32,
   12599                             binop(Iop_Sar32, mkexpr(irt_rN), mkU8(16)),
   12600                             binop(Iop_Sar32, mkexpr(op_2), mkU8(16))
   12601                       )
   12602         );
   12603         assign( result, binop(Iop_Add64,
   12604                               binop(Iop_Sub64,
   12605                                     mkexpr(pr_1),
   12606                                     mkexpr(pr_2)
   12607                               ),
   12608                               binop(Iop_32HLto64,
   12609                                     mkexpr(irt_rDhi),
   12610                                     mkexpr(irt_rDlo)
   12611                               )
   12612                         )
   12613         );
   12614         assign( resHi, unop(Iop_64HIto32, mkexpr(result)) );
   12615         assign( resLo, unop(Iop_64to32, mkexpr(result)) );
   12616         if (isT) {
   12617            putIRegT( rDhi, mkexpr(resHi), condT );
   12618            putIRegT( rDlo, mkexpr(resLo), condT );
   12619         } else {
   12620            putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
   12621            putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
   12622         }
   12623         DIP("smlsld%c%s r%u, r%u, r%u, r%u\n",
   12624             m_swap ? 'x' : ' ', nCC(conq), rDlo, rDhi, rN, rM);
   12625         return True;
   12626      }
   12627      /* fall through */
   12628    }
   12629 
   12630    /* ---------- Doesn't match anything. ---------- */
   12631    return False;
   12632 
   12633 #  undef INSNA
   12634 #  undef INSNT0
   12635 #  undef INSNT1
   12636 }
   12637 
   12638 
   12639 /*------------------------------------------------------------*/
   12640 /*--- V8 instructions                                      ---*/
   12641 /*------------------------------------------------------------*/
   12642 
   12643 /* Break a V128-bit value up into four 32-bit ints. */
   12644 
   12645 static void breakupV128to32s ( IRTemp t128,
   12646                                /*OUTs*/
   12647                                IRTemp* t3, IRTemp* t2,
   12648                                IRTemp* t1, IRTemp* t0 )
   12649 {
   12650    IRTemp hi64 = newTemp(Ity_I64);
   12651    IRTemp lo64 = newTemp(Ity_I64);
   12652    assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) );
   12653    assign( lo64, unop(Iop_V128to64,   mkexpr(t128)) );
   12654 
   12655    vassert(t0 && *t0 == IRTemp_INVALID);
   12656    vassert(t1 && *t1 == IRTemp_INVALID);
   12657    vassert(t2 && *t2 == IRTemp_INVALID);
   12658    vassert(t3 && *t3 == IRTemp_INVALID);
   12659 
   12660    *t0 = newTemp(Ity_I32);
   12661    *t1 = newTemp(Ity_I32);
   12662    *t2 = newTemp(Ity_I32);
   12663    *t3 = newTemp(Ity_I32);
   12664    assign( *t0, unop(Iop_64to32,   mkexpr(lo64)) );
   12665    assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
   12666    assign( *t2, unop(Iop_64to32,   mkexpr(hi64)) );
   12667    assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
   12668 }
   12669 
   12670 
   12671 /* Both ARM and Thumb */
   12672 
   12673 /* Translate a V8 instruction.  If successful, returns True and *dres
   12674    may or may not be updated.  If unsuccessful, returns False and
   12675    doesn't change *dres nor create any IR.
   12676 
   12677    The Thumb and ARM encodings are potentially different.  In both
   12678    ARM and Thumb mode, the caller must pass the entire 32 bits of
   12679    the instruction.  Callers may pass any instruction; this function
   12680    ignores anything it doesn't recognise.
   12681 
   12682    Caller must supply an IRTemp 'condT' holding the gating condition,
   12683    or IRTemp_INVALID indicating the insn is always executed.
   12684 
   12685    If we are decoding an ARM instruction which is in the NV space
   12686    then it is expected that condT will be IRTemp_INVALID, and that is
   12687    asserted for.  That condition is ensured by the logic near the top
   12688    of disInstr_ARM_WRK, that sets up condT.
   12689 
   12690    When decoding for Thumb, the caller must pass the ITState pre/post
   12691    this instruction, so that we can generate a SIGILL in the cases where
   12692    the instruction may not be in an IT block.  When decoding for ARM,
   12693    both of these must be IRTemp_INVALID.
   12694 
   12695    Finally, the caller must indicate whether this occurs in ARM or in
   12696    Thumb code.
   12697 */
   12698 static Bool decode_V8_instruction (
   12699                /*MOD*/DisResult* dres,
   12700                UInt              insnv8,
   12701                IRTemp            condT,
   12702                Bool              isT,
   12703                IRTemp            old_itstate,
   12704                IRTemp            new_itstate
   12705             )
   12706 {
   12707 #  define INSN(_bMax,_bMin)   SLICE_UInt(insnv8, (_bMax), (_bMin))
   12708 
   12709    if (isT) {
   12710       vassert(old_itstate != IRTemp_INVALID);
   12711       vassert(new_itstate != IRTemp_INVALID);
   12712    } else {
   12713       vassert(old_itstate == IRTemp_INVALID);
   12714       vassert(new_itstate == IRTemp_INVALID);
   12715    }
   12716 
   12717    /* ARMCondcode 'conq' is only used for debug printing and for no other
   12718       purpose.  For ARM, this is simply the top 4 bits of the instruction.
   12719       For Thumb, the condition is not (really) known until run time, and so
   12720       we set it to ARMCondAL in order that printing of these instructions
   12721       does not show any condition. */
   12722    ARMCondcode conq;
   12723    if (isT) {
   12724       conq = ARMCondAL;
   12725    } else {
   12726       conq = (ARMCondcode)INSN(31,28);
   12727       if (conq == ARMCondNV || conq == ARMCondAL) {
   12728          vassert(condT == IRTemp_INVALID);
   12729       } else {
   12730          vassert(condT != IRTemp_INVALID);
   12731       }
   12732       vassert(conq >= ARMCondEQ && conq <= ARMCondNV);
   12733    }
   12734 
   12735    /* ----------- {AESD, AESE, AESMC, AESIMC}.8 q_q ----------- */
   12736    /*     31   27   23  21 19 17 15 11   7      3
   12737       T1: 1111 1111 1 D 11 sz 00 d  0011 00 M 0 m  AESE Qd, Qm
   12738       A1: 1111 0011 1 D 11 sz 00 d  0011 00 M 0 m  AESE Qd, Qm
   12739 
   12740       T1: 1111 1111 1 D 11 sz 00 d  0011 01 M 0 m  AESD Qd, Qm
   12741       A1: 1111 0011 1 D 11 sz 00 d  0011 01 M 0 m  AESD Qd, Qm
   12742 
   12743       T1: 1111 1111 1 D 11 sz 00 d  0011 10 M 0 m  AESMC Qd, Qm
   12744       A1: 1111 0011 1 D 11 sz 00 d  0011 10 M 0 m  AESMC Qd, Qm
   12745 
   12746       T1: 1111 1111 1 D 11 sz 00 d  0011 11 M 0 m  AESIMC Qd, Qm
   12747       A1: 1111 0011 1 D 11 sz 00 d  0011 11 M 0 m  AESIMC Qd, Qm
   12748 
   12749       sz must be 00
   12750       ARM encoding is in NV space.
   12751       In Thumb mode, we must not be in an IT block.
   12752    */
   12753    {
   12754      UInt regD = 99, regM = 99, opc = 4/*invalid*/;
   12755      Bool gate = True;
   12756 
   12757      UInt high9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,1);
   12758      if (INSN(31,23) == high9 && INSN(21,16) == BITS6(1,1,0,0,0,0)
   12759          && INSN(11,8) == BITS4(0,0,1,1) && INSN(4,4) == 0) {
   12760         UInt bitD = INSN(22,22);
   12761         UInt fldD = INSN(15,12);
   12762         UInt bitM = INSN(5,5);
   12763         UInt fldM = INSN(3,0);
   12764         opc  = INSN(7,6);
   12765         regD = (bitD << 4) | fldD;
   12766         regM = (bitM << 4) | fldM;
   12767      }
   12768      if ((regD & 1) == 1 || (regM & 1) == 1)
   12769         gate = False;
   12770 
   12771      if (gate) {
   12772         if (isT) {
   12773            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   12774         }
   12775         /* In ARM mode, this is statically unconditional.  In Thumb mode,
   12776            this must be dynamically unconditional, and we've SIGILLd if not.
   12777            In either case we can create unconditional IR. */
   12778         IRTemp op1 = newTemp(Ity_V128);
   12779         IRTemp op2 = newTemp(Ity_V128);
   12780         IRTemp src = newTemp(Ity_V128);
   12781         IRTemp res = newTemp(Ity_V128);
   12782         assign(op1,  getQReg(regD >> 1));
   12783         assign(op2,  getQReg(regM >> 1));
   12784         assign(src,  opc == BITS2(0,0) || opc == BITS2(0,1)
   12785                         ? binop(Iop_XorV128, mkexpr(op1), mkexpr(op2))
   12786                         : mkexpr(op2));
   12787 
   12788         void* helpers[4]
   12789            = { &armg_dirtyhelper_AESE,  &armg_dirtyhelper_AESD,
   12790                &armg_dirtyhelper_AESMC, &armg_dirtyhelper_AESIMC };
   12791         const HChar* hNames[4]
   12792            = { "armg_dirtyhelper_AESE",  "armg_dirtyhelper_AESD",
   12793                "armg_dirtyhelper_AESMC", "armg_dirtyhelper_AESIMC" };
   12794         const HChar* iNames[4]
   12795            = { "aese", "aesd", "aesmc", "aesimc" };
   12796 
   12797         vassert(opc >= 0 && opc <= 3);
   12798         void*        helper = helpers[opc];
   12799         const HChar* hname  = hNames[opc];
   12800 
   12801         IRTemp w32_3, w32_2, w32_1, w32_0;
   12802         w32_3 = w32_2 = w32_1 = w32_0 = IRTemp_INVALID;
   12803         breakupV128to32s( src, &w32_3, &w32_2, &w32_1, &w32_0 );
   12804 
   12805         IRDirty* di
   12806           = unsafeIRDirty_1_N( res, 0/*regparms*/, hname, helper,
   12807                                mkIRExprVec_5(
   12808                                   IRExpr_VECRET(),
   12809                                   mkexpr(w32_3), mkexpr(w32_2),
   12810                                   mkexpr(w32_1), mkexpr(w32_0)) );
   12811         stmt(IRStmt_Dirty(di));
   12812 
   12813         putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
   12814         DIP("%s.8 q%d, q%d\n", iNames[opc], regD >> 1, regM >> 1);
   12815         return True;
   12816      }
   12817      /* fall through */
   12818    }
   12819 
   12820    /* ----------- SHA 3-reg insns q_q_q ----------- */
   12821    /*
   12822           31   27   23      19 15 11   7       3
   12823       T1: 1110 1111 0  D 00 n  d  1100 N Q M 0 m  SHA1C Qd, Qn, Qm  ix=0
   12824       A1: 1111 0010 ----------------------------
   12825 
   12826       T1: 1110 1111 0  D 01 n  d  1100 N Q M 0 m  SHA1P Qd, Qn, Qm  ix=1
   12827       A1: 1111 0010 ----------------------------
   12828 
   12829       T1: 1110 1111 0  D 10 n  d  1100 N Q M 0 m  SHA1M Qd, Qn, Qm  ix=2
   12830       A1: 1111 0010 ----------------------------
   12831 
   12832       T1: 1110 1111 0  D 11 n  d  1100 N Q M 0 m  SHA1SU0 Qd, Qn, Qm  ix=3
   12833       A1: 1111 0010 ----------------------------
   12834       (that's a complete set of 4, based on insn[21,20])
   12835 
   12836       T1: 1111 1111 0  D 00 n  d  1100 N Q M 0 m  SHA256H Qd, Qn, Qm  ix=4
   12837       A1: 1111 0011 ----------------------------
   12838 
   12839       T1: 1111 1111 0  D 01 n  d  1100 N Q M 0 m  SHA256H2 Qd, Qn, Qm  ix=5
   12840       A1: 1111 0011 ----------------------------
   12841 
   12842       T1: 1111 1111 0  D 10 n  d  1100 N Q M 0 m  SHA256SU1 Qd, Qn, Qm  ix=6
   12843       A1: 1111 0011 ----------------------------
   12844       (3/4 of a complete set of 4, based on insn[21,20])
   12845 
   12846       Q must be 1.  Same comments about conditionalisation as for the AES
   12847       group above apply.
   12848    */
   12849    {
   12850      UInt ix = 8; /* invalid */
   12851      Bool gate = False;
   12852 
   12853      UInt hi9_sha1   = isT ? BITS9(1,1,1,0,1,1,1,1,0)
   12854                            : BITS9(1,1,1,1,0,0,1,0,0);
   12855      UInt hi9_sha256 = isT ? BITS9(1,1,1,1,1,1,1,1,0)
   12856                            : BITS9(1,1,1,1,0,0,1,1,0);
   12857      if ((INSN(31,23) == hi9_sha1 || INSN(31,23) == hi9_sha256)
   12858          && INSN(11,8) == BITS4(1,1,0,0)
   12859          && INSN(6,6) == 1 && INSN(4,4) == 0) {
   12860         ix = INSN(21,20);
   12861         if (INSN(31,23) == hi9_sha256)
   12862            ix |= 4;
   12863         if (ix < 7)
   12864            gate = True;
   12865      }
   12866 
   12867      UInt regN = (INSN(7,7)   << 4)  | INSN(19,16);
   12868      UInt regD = (INSN(22,22) << 4)  | INSN(15,12);
   12869      UInt regM = (INSN(5,5)   << 4)  | INSN(3,0);
   12870      if ((regD & 1) == 1 || (regM & 1) == 1 || (regN & 1) == 1)
   12871         gate = False;
   12872 
   12873      if (gate) {
   12874         vassert(ix >= 0 && ix < 7);
   12875         const HChar* inames[7]
   12876            = { "sha1c", "sha1p", "sha1m", "sha1su0",
   12877                "sha256h", "sha256h2", "sha256su1" };
   12878         void(*helpers[7])(V128*,UInt,UInt,UInt,UInt,UInt,UInt,
   12879                                 UInt,UInt,UInt,UInt,UInt,UInt)
   12880            = { &armg_dirtyhelper_SHA1C,    &armg_dirtyhelper_SHA1P,
   12881                &armg_dirtyhelper_SHA1M,    &armg_dirtyhelper_SHA1SU0,
   12882                &armg_dirtyhelper_SHA256H,  &armg_dirtyhelper_SHA256H2,
   12883                &armg_dirtyhelper_SHA256SU1 };
   12884         const HChar* hnames[7]
   12885            = { "armg_dirtyhelper_SHA1C",    "armg_dirtyhelper_SHA1P",
   12886                "armg_dirtyhelper_SHA1M",    "armg_dirtyhelper_SHA1SU0",
   12887                "armg_dirtyhelper_SHA256H",  "armg_dirtyhelper_SHA256H2",
   12888                "armg_dirtyhelper_SHA256SU1" };
   12889 
   12890         /* This is a really lame way to implement this, even worse than
   12891            the arm64 version.  But at least it works. */
   12892 
   12893         if (isT) {
   12894            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   12895         }
   12896 
   12897         IRTemp vD = newTemp(Ity_V128);
   12898         IRTemp vN = newTemp(Ity_V128);
   12899         IRTemp vM = newTemp(Ity_V128);
   12900         assign(vD,  getQReg(regD >> 1));
   12901         assign(vN,  getQReg(regN >> 1));
   12902         assign(vM,  getQReg(regM >> 1));
   12903 
   12904         IRTemp d32_3, d32_2, d32_1, d32_0;
   12905         d32_3 = d32_2 = d32_1 = d32_0 = IRTemp_INVALID;
   12906         breakupV128to32s( vD, &d32_3, &d32_2, &d32_1, &d32_0 );
   12907 
   12908         IRTemp n32_3_pre, n32_2_pre, n32_1_pre, n32_0_pre;
   12909         n32_3_pre = n32_2_pre = n32_1_pre = n32_0_pre = IRTemp_INVALID;
   12910         breakupV128to32s( vN, &n32_3_pre, &n32_2_pre, &n32_1_pre, &n32_0_pre );
   12911 
   12912         IRTemp m32_3, m32_2, m32_1, m32_0;
   12913         m32_3 = m32_2 = m32_1 = m32_0 = IRTemp_INVALID;
   12914         breakupV128to32s( vM, &m32_3, &m32_2, &m32_1, &m32_0 );
   12915 
   12916         IRTemp n32_3 = newTemp(Ity_I32);
   12917         IRTemp n32_2 = newTemp(Ity_I32);
   12918         IRTemp n32_1 = newTemp(Ity_I32);
   12919         IRTemp n32_0 = newTemp(Ity_I32);
   12920 
   12921         /* Mask off any bits of the N register operand that aren't actually
   12922            needed, so that Memcheck doesn't complain unnecessarily. */
   12923         switch (ix) {
   12924            case 0: case 1: case 2:
   12925               assign(n32_3, mkU32(0));
   12926               assign(n32_2, mkU32(0));
   12927               assign(n32_1, mkU32(0));
   12928               assign(n32_0, mkexpr(n32_0_pre));
   12929               break;
   12930            case 3: case 4: case 5: case 6:
   12931               assign(n32_3, mkexpr(n32_3_pre));
   12932               assign(n32_2, mkexpr(n32_2_pre));
   12933               assign(n32_1, mkexpr(n32_1_pre));
   12934               assign(n32_0, mkexpr(n32_0_pre));
   12935               break;
   12936            default:
   12937               vassert(0);
   12938         }
   12939 
   12940         IRExpr** argvec
   12941            = mkIRExprVec_13(
   12942                 IRExpr_VECRET(),
   12943                 mkexpr(d32_3), mkexpr(d32_2), mkexpr(d32_1), mkexpr(d32_0),
   12944                 mkexpr(n32_3), mkexpr(n32_2), mkexpr(n32_1), mkexpr(n32_0),
   12945                 mkexpr(m32_3), mkexpr(m32_2), mkexpr(m32_1), mkexpr(m32_0)
   12946              );
   12947 
   12948         IRTemp res = newTemp(Ity_V128);
   12949         IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
   12950                                          hnames[ix], helpers[ix], argvec );
   12951         stmt(IRStmt_Dirty(di));
   12952         putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
   12953 
   12954         DIP("%s.8 q%u, q%u, q%u\n",
   12955             inames[ix], regD >> 1, regN >> 1, regM >> 1);
   12956         return True;
   12957      }
   12958      /* fall through */
   12959    }
   12960 
   12961    /* ----------- SHA1SU1, SHA256SU0 ----------- */
   12962    /*
   12963           31   27   23  21 19   15 11   7      3
   12964       T1: 1111 1111 1 D 11 1010 d  0011 10 M 0 m  SHA1SU1 Qd, Qm
   12965       A1: 1111 0011 ----------------------------
   12966 
   12967       T1: 1111 1111 1 D 11 1010 d  0011 11 M 0 m  SHA256SU0 Qd, Qm
   12968       A1: 1111 0011 ----------------------------
   12969 
   12970       Same comments about conditionalisation as for the AES group above apply.
   12971    */
   12972    {
   12973      Bool gate = False;
   12974 
   12975      UInt hi9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,1);
   12976      if (INSN(31,23) == hi9 && INSN(21,16) == BITS6(1,1,1,0,1,0)
   12977          && INSN(11,7) == BITS5(0,0,1,1,1) && INSN(4,4) == 0) {
   12978         gate = True;
   12979      }
   12980 
   12981      UInt regD = (INSN(22,22) << 4) | INSN(15,12);
   12982      UInt regM = (INSN(5,5)   << 4) | INSN(3,0);
   12983      if ((regD & 1) == 1 || (regM & 1) == 1)
   12984         gate = False;
   12985 
   12986      Bool is_1SU1 = INSN(6,6) == 0;
   12987 
   12988      if (gate) {
   12989         const HChar* iname
   12990            = is_1SU1 ? "sha1su1" : "sha256su0";
   12991         void (*helper)(V128*,UInt,UInt,UInt,UInt,UInt,UInt,UInt,UInt)
   12992            = is_1SU1 ? &armg_dirtyhelper_SHA1SU1
   12993                      : *armg_dirtyhelper_SHA256SU0;
   12994         const HChar* hname
   12995            = is_1SU1 ? "armg_dirtyhelper_SHA1SU1"
   12996                      : "armg_dirtyhelper_SHA256SU0";
   12997 
   12998         if (isT) {
   12999            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   13000         }
   13001 
   13002         IRTemp vD = newTemp(Ity_V128);
   13003         IRTemp vM = newTemp(Ity_V128);
   13004         assign(vD,  getQReg(regD >> 1));
   13005         assign(vM,  getQReg(regM >> 1));
   13006 
   13007         IRTemp d32_3, d32_2, d32_1, d32_0;
   13008         d32_3 = d32_2 = d32_1 = d32_0 = IRTemp_INVALID;
   13009         breakupV128to32s( vD, &d32_3, &d32_2, &d32_1, &d32_0 );
   13010 
   13011         IRTemp m32_3, m32_2, m32_1, m32_0;
   13012         m32_3 = m32_2 = m32_1 = m32_0 = IRTemp_INVALID;
   13013         breakupV128to32s( vM, &m32_3, &m32_2, &m32_1, &m32_0 );
   13014 
   13015         IRExpr** argvec
   13016            = mkIRExprVec_9(
   13017                 IRExpr_VECRET(),
   13018                 mkexpr(d32_3), mkexpr(d32_2), mkexpr(d32_1), mkexpr(d32_0),
   13019                 mkexpr(m32_3), mkexpr(m32_2), mkexpr(m32_1), mkexpr(m32_0)
   13020              );
   13021 
   13022         IRTemp res = newTemp(Ity_V128);
   13023         IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
   13024                                          hname, helper, argvec );
   13025         stmt(IRStmt_Dirty(di));
   13026         putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
   13027 
   13028         DIP("%s.8 q%u, q%u\n", iname, regD >> 1, regM >> 1);
   13029         return True;
   13030      }
   13031      /* fall through */
   13032    }
   13033 
   13034    /* ----------- SHA1H ----------- */
   13035    /*
   13036           31   27   23  21 19   15 11   7      3
   13037       T1: 1111 1111 1 D 11 1001 d  0010 11 M 0 m  SHA1H Qd, Qm
   13038       A1: 1111 0011 ----------------------------
   13039 
   13040       Same comments about conditionalisation as for the AES group above apply.
   13041    */
   13042    {
   13043      Bool gate = False;
   13044 
   13045      UInt hi9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,1);
   13046      if (INSN(31,23) == hi9 && INSN(21,16) == BITS6(1,1,1,0,0,1)
   13047          && INSN(11,6) == BITS6(0,0,1,0,1,1) && INSN(4,4) == 0) {
   13048         gate = True;
   13049      }
   13050 
   13051      UInt regD = (INSN(22,22) << 4) | INSN(15,12);
   13052      UInt regM = (INSN(5,5)   << 4) | INSN(3,0);
   13053      if ((regD & 1) == 1 || (regM & 1) == 1)
   13054         gate = False;
   13055 
   13056      if (gate) {
   13057         const HChar* iname = "sha1h";
   13058         void (*helper)(V128*,UInt,UInt,UInt,UInt) = &armg_dirtyhelper_SHA1H;
   13059         const HChar* hname                        = "armg_dirtyhelper_SHA1H";
   13060 
   13061         if (isT) {
   13062            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   13063         }
   13064 
   13065         IRTemp vM = newTemp(Ity_V128);
   13066         assign(vM,  getQReg(regM >> 1));
   13067 
   13068         IRTemp m32_3, m32_2, m32_1, m32_0;
   13069         m32_3 = m32_2 = m32_1 = m32_0 = IRTemp_INVALID;
   13070         breakupV128to32s( vM, &m32_3, &m32_2, &m32_1, &m32_0 );
   13071         /* m32_3, m32_2, m32_1 are just abandoned.  No harm; iropt will
   13072            remove them. */
   13073 
   13074         IRExpr*  zero   = mkU32(0);
   13075         IRExpr** argvec = mkIRExprVec_5(IRExpr_VECRET(),
   13076                                         zero, zero, zero, mkexpr(m32_0));
   13077 
   13078         IRTemp res = newTemp(Ity_V128);
   13079         IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
   13080                                          hname, helper, argvec );
   13081         stmt(IRStmt_Dirty(di));
   13082         putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
   13083 
   13084         DIP("%s.8 q%u, q%u\n", iname, regD >> 1, regM >> 1);
   13085         return True;
   13086      }
   13087      /* fall through */
   13088    }
   13089 
   13090    /* ----------- VMULL.P64 ----------- */
   13091    /*
   13092           31   27   23  21 19 15 11   7       3
   13093       T2: 1110 1111 1 D 10 n  d  1110 N 0 M 0 m
   13094       A2: 1111 0010 -------------------------
   13095 
   13096       The ARM documentation is pretty difficult to follow here.
   13097       Same comments about conditionalisation as for the AES group above apply.
   13098    */
   13099    {
   13100      Bool gate = False;
   13101 
   13102      UInt hi9 = isT ? BITS9(1,1,1,0,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,0,1);
   13103      if (INSN(31,23) == hi9 && INSN(21,20) == BITS2(1,0)
   13104          && INSN(11,8) == BITS4(1,1,1,0)
   13105          && INSN(6,6) == 0 && INSN(4,4) == 0) {
   13106         gate = True;
   13107      }
   13108 
   13109      UInt regN = (INSN(7,7)   << 4)  | INSN(19,16);
   13110      UInt regD = (INSN(22,22) << 4)  | INSN(15,12);
   13111      UInt regM = (INSN(5,5)   << 4)  | INSN(3,0);
   13112 
   13113      if ((regD & 1) == 1)
   13114         gate = False;
   13115 
   13116      if (gate) {
   13117         const HChar* iname = "vmull";
   13118         void (*helper)(V128*,UInt,UInt,UInt,UInt) = &armg_dirtyhelper_VMULLP64;
   13119         const HChar* hname                        = "armg_dirtyhelper_VMULLP64";
   13120 
   13121         if (isT) {
   13122            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   13123         }
   13124 
   13125         IRTemp srcN = newTemp(Ity_I64);
   13126         IRTemp srcM = newTemp(Ity_I64);
   13127         assign(srcN, getDRegI64(regN));
   13128         assign(srcM, getDRegI64(regM));
   13129 
   13130         IRExpr** argvec = mkIRExprVec_5(IRExpr_VECRET(),
   13131                                         unop(Iop_64HIto32, mkexpr(srcN)),
   13132                                         unop(Iop_64to32,   mkexpr(srcN)),
   13133                                         unop(Iop_64HIto32, mkexpr(srcM)),
   13134                                         unop(Iop_64to32, mkexpr(srcM)));
   13135 
   13136         IRTemp res = newTemp(Ity_V128);
   13137         IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
   13138                                          hname, helper, argvec );
   13139         stmt(IRStmt_Dirty(di));
   13140         putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
   13141 
   13142         DIP("%s.p64 q%u, q%u, w%u\n", iname, regD >> 1, regN, regM);
   13143         return True;
   13144      }
   13145      /* fall through */
   13146    }
   13147 
   13148    /* ----------- LDA{,B,H}, STL{,B,H} ----------- */
   13149    /*     31   27   23   19   15 11   7    3
   13150       A1: cond 0001 1001  n    t 1100 1001 1111  LDA  Rt, [Rn]
   13151       A1: cond 0001 1111  n    t 1100 1001 1111  LDAH Rt, [Rn]
   13152       A1: cond 0001 1101  n    t 1100 1001 1111  LDAB Rt, [Rn]
   13153 
   13154       A1: cond 0001 1000  n 1111 1100 1001    t  STL  Rt, [Rn]
   13155       A1: cond 0001 1110  n 1111 1100 1001    t  STLH Rt, [Rn]
   13156       A1: cond 0001 1100  n 1111 1100 1001    t  STLB Rt, [Rn]
   13157 
   13158       T1: 1110 1000 1101  n    t 1111 1010 1111  LDA  Rt, [Rn]
   13159       T1: 1110 1000 1101  n    t 1111 1001 1111  LDAH Rt, [Rn]
   13160       T1: 1110 1000 1101  n    t 1111 1000 1111  LDAB Rt, [Rn]
   13161 
   13162       T1: 1110 1000 1100  n    t 1111 1010 1111  STL  Rt, [Rn]
   13163       T1: 1110 1000 1100  n    t 1111 1001 1111  STLH Rt, [Rn]
   13164       T1: 1110 1000 1100  n    t 1111 1000 1111  STLB Rt, [Rn]
   13165    */
   13166    {
   13167      UInt nn     = 16; // invalid
   13168      UInt tt     = 16; // invalid
   13169      UInt szBlg2 = 4;  // invalid
   13170      Bool isLoad = False;
   13171      Bool gate   = False;
   13172      if (isT) {
   13173         if (INSN(31,21) == BITS11(1,1,1,0,1,0,0,0,1,1,0)
   13174             && INSN(11,6) == BITS6(1,1,1,1,1,0)
   13175             && INSN(3,0) == BITS4(1,1,1,1)) {
   13176            nn     = INSN(19,16);
   13177            tt     = INSN(15,12);
   13178            isLoad = INSN(20,20) == 1;
   13179            szBlg2 = INSN(5,4); // 00:B 01:H 10:W 11:invalid
   13180            gate   = szBlg2 != BITS2(1,1) && tt != 15 && nn != 15;
   13181         }
   13182      } else {
   13183         if (INSN(27,23) == BITS5(0,0,0,1,1) && INSN(20,20) == 1
   13184             && INSN(11,0) == BITS12(1,1,0,0,1,0,0,1,1,1,1,1)) {
   13185            nn     = INSN(19,16);
   13186            tt     = INSN(15,12);
   13187            isLoad = True;
   13188            szBlg2     = INSN(22,21); // 10:B 11:H 00:W 01:invalid
   13189            gate   = szBlg2 != BITS2(0,1) && tt != 15 && nn != 15;
   13190         }
   13191         else
   13192         if (INSN(27,23) == BITS5(0,0,0,1,1) && INSN(20,20) == 0
   13193             && INSN(15,4) == BITS12(1,1,1,1,1,1,0,0,1,0,0,1)) {
   13194            nn     = INSN(19,16);
   13195            tt     = INSN(3,0);
   13196            isLoad = False;
   13197            szBlg2     = INSN(22,21);  // 10:B 11:H 00:W 01:invalid
   13198            gate   = szBlg2 != BITS2(0,1) && tt != 15 && nn != 15;
   13199         }
   13200         if (gate) {
   13201            // Rearrange szBlg2 bits to be the same as the Thumb case
   13202            switch (szBlg2) {
   13203               case 2: szBlg2 = 0; break;
   13204               case 3: szBlg2 = 1; break;
   13205               case 0: szBlg2 = 2; break;
   13206               default: /*NOTREACHED*/vassert(0);
   13207            }
   13208         }
   13209      }
   13210      // For both encodings, the instruction is guarded by condT, which
   13211      // is passed in by the caller.  Note that the the loads and stores
   13212      // are conditional, so we don't have to truncate the IRSB at this
   13213      // point, but the fence is unconditional.  There's no way to
   13214      // represent a conditional fence without a side exit, but it
   13215      // doesn't matter from a correctness standpoint that it is
   13216      // unconditional -- it just loses a bit of performance in the
   13217      // case where the condition doesn't hold.
   13218      if (gate) {
   13219         vassert(szBlg2 <= 2 && nn <= 14 && tt <= 14);
   13220         IRExpr* ea = llGetIReg(nn);
   13221         if (isLoad) {
   13222            static IRLoadGOp cvt[3]
   13223               = { ILGop_8Uto32, ILGop_16Uto32, ILGop_Ident32 };
   13224            IRTemp data = newTemp(Ity_I32);
   13225            loadGuardedLE(data, cvt[szBlg2], ea, mkU32(0)/*alt*/, condT);
   13226            if (isT) {
   13227               putIRegT(tt, mkexpr(data), condT);
   13228            } else {
   13229               putIRegA(tt, mkexpr(data), condT, Ijk_INVALID);
   13230            }
   13231            stmt(IRStmt_MBE(Imbe_Fence));
   13232         } else {
   13233            stmt(IRStmt_MBE(Imbe_Fence));
   13234            IRExpr* data = llGetIReg(tt);
   13235            switch (szBlg2) {
   13236               case 0: data = unop(Iop_32to8,  data); break;
   13237               case 1: data = unop(Iop_32to16, data); break;
   13238               case 2: break;
   13239               default: vassert(0);
   13240            }
   13241            storeGuardedLE(ea, data, condT);
   13242         }
   13243         const HChar* ldNames[3] = { "ldab", "ldah", "lda" };
   13244         const HChar* stNames[3] = { "stlb", "stlh", "stl" };
   13245         DIP("%s r%u, [r%u]", (isLoad ? ldNames : stNames)[szBlg2], tt, nn);
   13246         return True;
   13247      }
   13248      /* else fall through */
   13249    }
   13250 
   13251    /* ----------- LDAEX{,B,H,D}, STLEX{,B,H,D} ----------- */
   13252    /*     31   27   23   19 15 11   7    3
   13253       A1: cond 0001 1101 n  t  1110 1001 1111  LDAEXB Rt, [Rn]
   13254       A1: cond 0001 1111 n  t  1110 1001 1111  LDAEXH Rt, [Rn]
   13255       A1: cond 0001 1001 n  t  1110 1001 1111  LDAEX  Rt, [Rn]
   13256       A1: cond 0001 1011 n  t  1110 1001 1111  LDAEXD Rt, Rt+1, [Rn]
   13257 
   13258       A1: cond 0001 1100 n  d  1110 1001 t     STLEXB Rd, Rt, [Rn]
   13259       A1: cond 0001 1110 n  d  1110 1001 t     STLEXH Rd, Rt, [Rn]
   13260       A1: cond 0001 1000 n  d  1110 1001 t     STLEX  Rd, Rt, [Rn]
   13261       A1: cond 0001 1010 n  d  1110 1001 t     STLEXD Rd, Rt, Rt+1, [Rn]
   13262 
   13263           31  28   24    19 15 11   7    3
   13264       T1: 111 0100 01101 n  t  1111 1100 1111  LDAEXB Rt, [Rn]
   13265       T1: 111 0100 01101 n  t  1111 1101 1111  LDAEXH Rt, [Rn]
   13266       T1: 111 0100 01101 n  t  1111 1110 1111  LDAEX  Rt, [Rn]
   13267       T1: 111 0100 01101 n  t  t2   1111 1111  LDAEXD Rt, Rt2, [Rn]
   13268 
   13269       T1: 111 0100 01100 n  t  1111 1100 d     STLEXB Rd, Rt, [Rn]
   13270       T1: 111 0100 01100 n  t  1111 1101 d     STLEXH Rd, Rt, [Rn]
   13271       T1: 111 0100 01100 n  t  1111 1110 d     STLEX  Rd, Rt, [Rn]
   13272       T1: 111 0100 01100 n  t  t2   1111 d     STLEXD Rd, Rt, Rt2, [Rn]
   13273    */
   13274    {
   13275      UInt nn     = 16; // invalid
   13276      UInt tt     = 16; // invalid
   13277      UInt tt2    = 16; // invalid
   13278      UInt dd     = 16; // invalid
   13279      UInt szBlg2 = 4;  // invalid
   13280      Bool isLoad = False;
   13281      Bool gate   = False;
   13282      if (isT) {
   13283         if (INSN(31,21) == BITS11(1,1,1,0,1,0,0,0,1,1,0)
   13284             && INSN(7,6) == BITS2(1,1)) {
   13285            isLoad = INSN(20,20) == 1;
   13286            nn     = INSN(19,16);
   13287            tt     = INSN(15,12);
   13288            tt2    = INSN(11,8);
   13289            szBlg2 = INSN(5,4);
   13290            dd     = INSN(3,0);
   13291            gate   = True;
   13292            if (szBlg2 < BITS2(1,1) && tt2 != BITS4(1,1,1,1)) gate = False;
   13293            if (isLoad && dd != BITS4(1,1,1,1)) gate = False;
   13294            // re-set not-used register values to invalid
   13295            if (szBlg2 < BITS2(1,1)) tt2 = 16;
   13296            if (isLoad) dd = 16;
   13297         }
   13298      } else {
   13299         /* ARM encoding.  Do the load and store cases separately as
   13300            the register numbers are in different places and a combined decode
   13301            is too confusing. */
   13302         if (INSN(27,23) == BITS5(0,0,0,1,1) && INSN(20,20) == 1
   13303             && INSN(11,0) == BITS12(1,1,1,0,1,0,0,1,1,1,1,1)) {
   13304            szBlg2 = INSN(22,21);
   13305            isLoad = True;
   13306            nn     = INSN(19,16);
   13307            tt     = INSN(15,12);
   13308            gate   = True;
   13309         }
   13310         else
   13311         if (INSN(27,23) == BITS5(0,0,0,1,1) && INSN(20,20) == 0
   13312             && INSN(11,4) == BITS8(1,1,1,0,1,0,0,1)) {
   13313            szBlg2 = INSN(22,21);
   13314            isLoad = False;
   13315            nn     = INSN(19,16);
   13316            dd     = INSN(15,12);
   13317            tt     = INSN(3,0);
   13318            gate   = True;
   13319         }
   13320         if (gate) {
   13321            // Rearrange szBlg2 bits to be the same as the Thumb case
   13322            switch (szBlg2) {
   13323               case 2: szBlg2 = 0; break;
   13324               case 3: szBlg2 = 1; break;
   13325               case 0: szBlg2 = 2; break;
   13326               case 1: szBlg2 = 3; break;
   13327               default: /*NOTREACHED*/vassert(0);
   13328            }
   13329         }
   13330      }
   13331      // Perform further checks on register numbers
   13332      if (gate) {
   13333         /**/ if (isT && isLoad) {
   13334            // Thumb load
   13335            if (szBlg2 < 3) {
   13336               if (! (tt != 13 && tt != 15 && nn != 15)) gate = False;
   13337            } else {
   13338               if (! (tt != 13 && tt != 15 && tt2 != 13 && tt2 != 15
   13339                      && tt != tt2 && nn != 15)) gate = False;
   13340            }
   13341         }
   13342         else if (isT && !isLoad) {
   13343            // Thumb store
   13344            if (szBlg2 < 3) {
   13345               if (! (dd != 13 && dd != 15 && tt != 13 && tt != 15
   13346                      && nn != 15 && dd != nn && dd != tt)) gate = False;
   13347            } else {
   13348               if (! (dd != 13 && dd != 15 && tt != 13 && tt != 15
   13349                      && tt2 != 13 && tt2 != 15 && nn != 15 && dd != nn
   13350                      && dd != tt && dd != tt2)) gate = False;
   13351            }
   13352         }
   13353         else if (!isT && isLoad) {
   13354            // ARM Load
   13355            if (szBlg2 < 3) {
   13356               if (! (tt != 15 && nn != 15)) gate = False;
   13357            } else {
   13358               if (! ((tt & 1) == 0 && tt != 14 && nn != 15)) gate = False;
   13359               vassert(tt2 == 16/*invalid*/);
   13360               tt2 = tt + 1;
   13361            }
   13362         }
   13363         else if (!isT && !isLoad) {
   13364            // ARM Store
   13365            if (szBlg2 < 3) {
   13366               if (! (dd != 15 && tt != 15 && nn != 15
   13367                      && dd != nn && dd != tt)) gate = False;
   13368            } else {
   13369               if (! (dd != 15 && (tt & 1) == 0 && tt != 14 && nn != 15
   13370                      && dd != nn && dd != tt && dd != tt+1)) gate = False;
   13371               vassert(tt2 == 16/*invalid*/);
   13372               tt2 = tt + 1;
   13373            }
   13374         }
   13375         else /*NOTREACHED*/vassert(0);
   13376      }
   13377      if (gate) {
   13378         // Paranoia ..
   13379         vassert(szBlg2 <= 3);
   13380         if (szBlg2 < 3) { vassert(tt2 == 16/*invalid*/); }
   13381                    else { vassert(tt2 <= 14); }
   13382         if (isLoad) { vassert(dd == 16/*invalid*/); }
   13383                else { vassert(dd <= 14); }
   13384      }
   13385      // If we're still good even after all that, generate the IR.
   13386      if (gate) {
   13387         /* First, go unconditional.  Staying in-line is too complex. */
   13388         if (isT) {
   13389            vassert(condT != IRTemp_INVALID);
   13390            mk_skip_over_T32_if_cond_is_false( condT );
   13391         } else {
   13392            if (condT != IRTemp_INVALID) {
   13393               mk_skip_over_A32_if_cond_is_false( condT );
   13394               condT = IRTemp_INVALID;
   13395            }
   13396         }
   13397         /* Now the load or store. */
   13398         IRType ty = Ity_INVALID; /* the type of the transferred data */
   13399         const HChar* nm = NULL;
   13400         switch (szBlg2) {
   13401            case 0: nm = "b"; ty = Ity_I8;  break;
   13402            case 1: nm = "h"; ty = Ity_I16; break;
   13403            case 2: nm = "";  ty = Ity_I32; break;
   13404            case 3: nm = "d"; ty = Ity_I64; break;
   13405            default: vassert(0);
   13406         }
   13407         IRExpr* ea = isT ? getIRegT(nn) : getIRegA(nn);
   13408         if (isLoad) {
   13409            // LOAD.  Transaction, then fence.
   13410            IROp widen = Iop_INVALID;
   13411            switch (szBlg2) {
   13412               case 0: widen = Iop_8Uto32;  break;
   13413               case 1: widen = Iop_16Uto32; break;
   13414               case 2: case 3: break;
   13415               default: vassert(0);
   13416            }
   13417            IRTemp  res = newTemp(ty);
   13418            // FIXME: assumes little-endian guest
   13419            stmt( IRStmt_LLSC(Iend_LE, res, ea, NULL/*this is a load*/) );
   13420 
   13421 #          define PUT_IREG(_nnz, _eez) \
   13422               do { vassert((_nnz) <= 14); /* no writes to the PC */ \
   13423                    if (isT) { putIRegT((_nnz), (_eez), IRTemp_INVALID); } \
   13424                        else { putIRegA((_nnz), (_eez), \
   13425                               IRTemp_INVALID, Ijk_Boring); } } while(0)
   13426            if (ty == Ity_I64) {
   13427               // FIXME: assumes little-endian guest
   13428               PUT_IREG(tt,  unop(Iop_64to32, mkexpr(res)));
   13429               PUT_IREG(tt2, unop(Iop_64HIto32, mkexpr(res)));
   13430            } else {
   13431               PUT_IREG(tt, widen == Iop_INVALID
   13432                               ? mkexpr(res) : unop(widen, mkexpr(res)));
   13433            }
   13434            stmt(IRStmt_MBE(Imbe_Fence));
   13435            if (ty == Ity_I64) {
   13436               DIP("ldrex%s%s r%u, r%u, [r%u]\n",
   13437                   nm, isT ? "" : nCC(conq), tt, tt2, nn);
   13438            } else {
   13439               DIP("ldrex%s%s r%u, [r%u]\n", nm, isT ? "" : nCC(conq), tt, nn);
   13440            }
   13441 #          undef PUT_IREG
   13442         } else {
   13443            // STORE.  Fence, then transaction.
   13444            IRTemp resSC1, resSC32, data;
   13445            IROp   narrow = Iop_INVALID;
   13446            switch (szBlg2) {
   13447               case 0: narrow = Iop_32to8; break;
   13448               case 1: narrow = Iop_32to16; break;
   13449               case 2: case 3: break;
   13450               default: vassert(0);
   13451            }
   13452            stmt(IRStmt_MBE(Imbe_Fence));
   13453            data = newTemp(ty);
   13454 #          define GET_IREG(_nnz) (isT ? getIRegT(_nnz) : getIRegA(_nnz))
   13455            assign(data,
   13456                   ty == Ity_I64
   13457                      // FIXME: assumes little-endian guest
   13458                      ? binop(Iop_32HLto64, GET_IREG(tt2), GET_IREG(tt))
   13459                      : narrow == Iop_INVALID
   13460                         ? GET_IREG(tt)
   13461                         : unop(narrow, GET_IREG(tt)));
   13462 #          undef GET_IREG
   13463            resSC1 = newTemp(Ity_I1);
   13464            // FIXME: assumes little-endian guest
   13465            stmt( IRStmt_LLSC(Iend_LE, resSC1, ea, mkexpr(data)) );
   13466 
   13467            /* Set rDD to 1 on failure, 0 on success.  Currently we have
   13468               resSC1 == 0 on failure, 1 on success. */
   13469            resSC32 = newTemp(Ity_I32);
   13470            assign(resSC32,
   13471                   unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
   13472            vassert(dd <= 14); /* no writes to the PC */
   13473            if (isT) {
   13474               putIRegT(dd, mkexpr(resSC32), IRTemp_INVALID);
   13475            } else {
   13476               putIRegA(dd, mkexpr(resSC32), IRTemp_INVALID, Ijk_Boring);
   13477            }
   13478            if (ty == Ity_I64) {
   13479               DIP("strex%s%s r%u, r%u, r%u, [r%u]\n",
   13480                   nm, isT ? "" : nCC(conq), dd, tt, tt2, nn);
   13481            } else {
   13482               DIP("strex%s%s r%u, r%u, [r%u]\n",
   13483                   nm, isT ? "" : nCC(conq), dd, tt, nn);
   13484            }
   13485         } /* if (isLoad) */
   13486         return True;
   13487      } /* if (gate) */
   13488      /* else fall through */
   13489    }
   13490 
   13491    /* ----------- VSEL<c>.F64 d_d_d, VSEL<c>.F32 s_s_s ----------- */
   13492    /*        31   27    22 21 19 15 11  8 7 6 5 4 3
   13493       T1/A1: 1111 11100 D  cc n  d  101 1 N 0 M 0 m  VSEL<c>.F64 Dd, Dn, Dm
   13494       T1/A1: 1111 11100 D  cc n  d  101 0 N 0 M 0 m  VSEL<c>.F32 Sd, Sn, Sm
   13495 
   13496       ARM encoding is in NV space.
   13497       In Thumb mode, we must not be in an IT block.
   13498    */
   13499    if (INSN(31,23) == BITS9(1,1,1,1,1,1,1,0,0) && INSN(11,9) == BITS3(1,0,1)
   13500        && INSN(6,6) == 0 && INSN(4,4) == 0) {
   13501       UInt bit_D  = INSN(22,22);
   13502       UInt fld_cc = INSN(21,20);
   13503       UInt fld_n  = INSN(19,16);
   13504       UInt fld_d  = INSN(15,12);
   13505       Bool isF64  = INSN(8,8) == 1;
   13506       UInt bit_N  = INSN(7,7);
   13507       UInt bit_M  = INSN(5,5);
   13508       UInt fld_m  = INSN(3,0);
   13509 
   13510       UInt dd = isF64 ? ((bit_D << 4) | fld_d) : ((fld_d << 1) | bit_D);
   13511       UInt nn = isF64 ? ((bit_N << 4) | fld_n) : ((fld_n << 1) | bit_N);
   13512       UInt mm = isF64 ? ((bit_M << 4) | fld_m) : ((fld_m << 1) | bit_M);
   13513 
   13514       UInt cc_1 = (fld_cc >> 1) & 1;
   13515       UInt cc_0 = (fld_cc >> 0) & 1;
   13516       UInt cond = (fld_cc << 2) | ((cc_1 ^ cc_0) << 1) | 0;
   13517 
   13518       if (isT) {
   13519          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   13520       }
   13521       /* In ARM mode, this is statically unconditional.  In Thumb mode,
   13522          this must be dynamically unconditional, and we've SIGILLd if not.
   13523          In either case we can create unconditional IR. */
   13524 
   13525       IRTemp guard = newTemp(Ity_I32);
   13526       assign(guard, mk_armg_calculate_condition(cond));
   13527       IRExpr* srcN = (isF64 ? llGetDReg : llGetFReg)(nn);
   13528       IRExpr* srcM = (isF64 ? llGetDReg : llGetFReg)(mm);
   13529       IRExpr* res  = IRExpr_ITE(unop(Iop_32to1, mkexpr(guard)), srcN, srcM);
   13530       (isF64 ? llPutDReg : llPutFReg)(dd, res);
   13531 
   13532       UChar rch = isF64 ? 'd' : 'f';
   13533       DIP("vsel%s.%s %c%u, %c%u, %c%u\n",
   13534           nCC(cond), isF64 ? "f64" : "f32", rch, dd, rch, nn, rch, mm);
   13535       return True;
   13536    }
   13537 
   13538    /* -------- VRINT{A,N,P,M}.F64 d_d, VRINT{A,N,P,M}.F32 s_s -------- */
   13539    /*        31        22 21   17 15 11  8 7  5 4 3
   13540       T1/A1: 111111101 D  1110 rm Vd 101 1 01 M 0 Vm VRINT{A,N,P,M}.F64 Dd, Dm
   13541       T1/A1: 111111101 D  1110 rm Vd 101 0 01 M 0 Vm VRINT{A,N,P,M}.F32 Sd, Sm
   13542 
   13543       ARM encoding is in NV space.
   13544       In Thumb mode, we must not be in an IT block.
   13545    */
   13546    if (INSN(31,23) == BITS9(1,1,1,1,1,1,1,0,1)
   13547        && INSN(21,18) == BITS4(1,1,1,0) && INSN(11,9) == BITS3(1,0,1)
   13548        && INSN(7,6) == BITS2(0,1) && INSN(4,4) == 0) {
   13549       UInt bit_D  = INSN(22,22);
   13550       UInt fld_rm = INSN(17,16);
   13551       UInt fld_d  = INSN(15,12);
   13552       Bool isF64  = INSN(8,8) == 1;
   13553       UInt bit_M  = INSN(5,5);
   13554       UInt fld_m  = INSN(3,0);
   13555 
   13556       UInt dd = isF64 ? ((bit_D << 4) | fld_d) : ((fld_d << 1) | bit_D);
   13557       UInt mm = isF64 ? ((bit_M << 4) | fld_m) : ((fld_m << 1) | bit_M);
   13558 
   13559       if (isT) {
   13560          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   13561       }
   13562       /* In ARM mode, this is statically unconditional.  In Thumb mode,
   13563          this must be dynamically unconditional, and we've SIGILLd if not.
   13564          In either case we can create unconditional IR. */
   13565 
   13566       UChar c = '?';
   13567       IRRoundingMode rm = Irrm_NEAREST;
   13568       switch (fld_rm) {
   13569          /* The use of NEAREST for both the 'a' and 'n' cases is a bit of a
   13570             kludge since it doesn't take into account the nearest-even vs
   13571             nearest-away semantics. */
   13572          case BITS2(0,0): c = 'a'; rm = Irrm_NEAREST; break;
   13573          case BITS2(0,1): c = 'n'; rm = Irrm_NEAREST; break;
   13574          case BITS2(1,0): c = 'p'; rm = Irrm_PosINF;  break;
   13575          case BITS2(1,1): c = 'm'; rm = Irrm_NegINF;  break;
   13576          default: vassert(0);
   13577       }
   13578 
   13579       IRExpr* srcM = (isF64 ? llGetDReg : llGetFReg)(mm);
   13580       IRExpr* res  = binop(isF64 ? Iop_RoundF64toInt : Iop_RoundF32toInt,
   13581                            mkU32((UInt)rm), srcM);
   13582       (isF64 ? llPutDReg : llPutFReg)(dd, res);
   13583 
   13584       UChar rch = isF64 ? 'd' : 'f';
   13585       DIP("vrint%c.%s.%s %c%u, %c%u\n",
   13586           c, isF64 ? "f64" : "f32", isF64 ? "f64" : "f32", rch, dd, rch, mm);
   13587       return True;
   13588    }
   13589 
   13590    /* -------- VRINT{Z,R}.F64.F64 d_d, VRINT{Z,R}.F32.F32 s_s -------- */
   13591    /*     31   27    22 21     15 11   7  6 5 4 3
   13592       T1: 1110 11101 D  110110 Vd 1011 op 1 M 0 Vm VRINT<r><c>.F64.F64 Dd, Dm
   13593       A1: cond 11101 D  110110 Vd 1011 op 1 M 0 Vm
   13594 
   13595       T1: 1110 11101 D  110110 Vd 1010 op 1 M 0 Vm VRINT<r><c>.F32.F32 Sd, Sm
   13596       A1: cond 11101 D  110110 Vd 1010 op 1 M 0 Vm
   13597 
   13598       In contrast to the VRINT variants just above, this can be conditional.
   13599    */
   13600    if ((isT ? (INSN(31,28) == BITS4(1,1,1,0)) : True)
   13601        && INSN(27,23) == BITS5(1,1,1,0,1) && INSN(21,16) == BITS6(1,1,0,1,1,0)
   13602        && INSN(11,9) == BITS3(1,0,1) && INSN(6,6) == 1 && INSN(4,4) == 0) {
   13603       UInt bit_D   = INSN(22,22);
   13604       UInt fld_Vd  = INSN(15,12);
   13605       Bool isF64   = INSN(8,8) == 1;
   13606       Bool rToZero = INSN(7,7) == 1;
   13607       UInt bit_M   = INSN(5,5);
   13608       UInt fld_Vm  = INSN(3,0);
   13609       UInt dd = isF64 ? ((bit_D << 4) | fld_Vd) : ((fld_Vd << 1) | bit_D);
   13610       UInt mm = isF64 ? ((bit_M << 4) | fld_Vm) : ((fld_Vm << 1) | bit_M);
   13611 
   13612       if (isT) vassert(condT != IRTemp_INVALID);
   13613       IRType ty  = isF64 ? Ity_F64 : Ity_F32;
   13614       IRTemp src = newTemp(ty);
   13615       IRTemp res = newTemp(ty);
   13616       assign(src, (isF64 ? getDReg : getFReg)(mm));
   13617 
   13618       IRTemp rm = newTemp(Ity_I32);
   13619       assign(rm, rToZero ? mkU32(Irrm_ZERO)
   13620                          : mkexpr(mk_get_IR_rounding_mode()));
   13621       assign(res, binop(isF64 ? Iop_RoundF64toInt : Iop_RoundF32toInt,
   13622                         mkexpr(rm), mkexpr(src)));
   13623       (isF64 ? putDReg : putFReg)(dd, mkexpr(res), condT);
   13624 
   13625       UChar rch = isF64 ? 'd' : 'f';
   13626       DIP("vrint%c.%s.%s %c%u, %c%u\n",
   13627           rToZero ? 'z' : 'r',
   13628           isF64 ? "f64" : "f32", isF64 ? "f64" : "f32", rch, dd, rch, mm);
   13629       return True;
   13630    }
   13631 
   13632    /* ----------- VCVT{A,N,P,M}{.S32,.U32}{.F64,.F32} ----------- */
   13633    /*        31   27    22 21   17 15 11  8  7  6 5 4 3
   13634       T1/A1: 1111 11101 D  1111 rm Vd 101 sz op 1 M 0 Vm
   13635              VCVT{A,N,P,M}{.S32,.U32}.F64 Sd, Dm
   13636              VCVT{A,N,P,M}{.S32,.U32}.F32 Sd, Sm
   13637 
   13638       ARM encoding is in NV space.
   13639       In Thumb mode, we must not be in an IT block.
   13640    */
   13641    if (INSN(31,23) == BITS9(1,1,1,1,1,1,1,0,1) && INSN(21,18) == BITS4(1,1,1,1)
   13642        && INSN(11,9) == BITS3(1,0,1) && INSN(6,6) == 1 && INSN(4,4) == 0) {
   13643       UInt bit_D  = INSN(22,22);
   13644       UInt fld_rm = INSN(17,16);
   13645       UInt fld_Vd = INSN(15,12);
   13646       Bool isF64  = INSN(8,8) == 1;
   13647       Bool isU    = INSN(7,7) == 0;
   13648       UInt bit_M  = INSN(5,5);
   13649       UInt fld_Vm = INSN(3,0);
   13650 
   13651       UInt dd = (fld_Vd << 1) | bit_D;
   13652       UInt mm = isF64 ? ((bit_M << 4) | fld_Vm) : ((fld_Vm << 1) | bit_M);
   13653 
   13654       if (isT) {
   13655          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   13656       }
   13657       /* In ARM mode, this is statically unconditional.  In Thumb mode,
   13658          this must be dynamically unconditional, and we've SIGILLd if not.
   13659          In either case we can create unconditional IR. */
   13660 
   13661       UChar c = '?';
   13662       IRRoundingMode rm = Irrm_NEAREST;
   13663       switch (fld_rm) {
   13664          /* The use of NEAREST for both the 'a' and 'n' cases is a bit of a
   13665             kludge since it doesn't take into account the nearest-even vs
   13666             nearest-away semantics. */
   13667          case BITS2(0,0): c = 'a'; rm = Irrm_NEAREST; break;
   13668          case BITS2(0,1): c = 'n'; rm = Irrm_NEAREST; break;
   13669          case BITS2(1,0): c = 'p'; rm = Irrm_PosINF;  break;
   13670          case BITS2(1,1): c = 'm'; rm = Irrm_NegINF;  break;
   13671          default: vassert(0);
   13672       }
   13673 
   13674       IRExpr* srcM = (isF64 ? llGetDReg : llGetFReg)(mm);
   13675       IRTemp   res = newTemp(Ity_I32);
   13676 
   13677       /* The arm back end doesn't support use of Iop_F32toI32U or
   13678          Iop_F32toI32S, so for those cases we widen the F32 to F64
   13679          and then follow the F64 route. */
   13680       if (!isF64) {
   13681          srcM = unop(Iop_F32toF64, srcM);
   13682       }
   13683       assign(res, binop(isU ? Iop_F64toI32U : Iop_F64toI32S,
   13684                         mkU32((UInt)rm), srcM));
   13685 
   13686       llPutFReg(dd, unop(Iop_ReinterpI32asF32, mkexpr(res)));
   13687 
   13688       UChar rch = isF64 ? 'd' : 'f';
   13689       DIP("vcvt%c.%s.%s %c%u, %c%u\n",
   13690           c, isU ? "u32" : "s32", isF64 ? "f64" : "f32", 's', dd, rch, mm);
   13691       return True;
   13692    }
   13693 
   13694    /* ----------- V{MAX,MIN}NM{.F64 d_d_d, .F32 s_s_s} ----------- */
   13695    /* 31   27    22 21 19 15 11  8 7 6  5 4 3
   13696       1111 11101 D  00 Vn Vd 101 1 N op M 0 Vm  V{MIN,MAX}NM.F64 Dd, Dn, Dm
   13697       1111 11101 D  00 Vn Vd 101 0 N op M 0 Vm  V{MIN,MAX}NM.F32 Sd, Sn, Sm
   13698 
   13699       ARM encoding is in NV space.
   13700       In Thumb mode, we must not be in an IT block.
   13701    */
   13702    if (INSN(31,23) == BITS9(1,1,1,1,1,1,1,0,1) && INSN(21,20) == BITS2(0,0)
   13703        && INSN(11,9) == BITS3(1,0,1) && INSN(4,4) == 0) {
   13704       UInt bit_D  = INSN(22,22);
   13705       UInt fld_Vn = INSN(19,16);
   13706       UInt fld_Vd = INSN(15,12);
   13707       Bool isF64  = INSN(8,8) == 1;
   13708       UInt bit_N  = INSN(7,7);
   13709       Bool isMAX  = INSN(6,6) == 0;
   13710       UInt bit_M  = INSN(5,5);
   13711       UInt fld_Vm = INSN(3,0);
   13712 
   13713       UInt dd = isF64 ? ((bit_D << 4) | fld_Vd) : ((fld_Vd << 1) | bit_D);
   13714       UInt nn = isF64 ? ((bit_N << 4) | fld_Vn) : ((fld_Vn << 1) | bit_N);
   13715       UInt mm = isF64 ? ((bit_M << 4) | fld_Vm) : ((fld_Vm << 1) | bit_M);
   13716 
   13717       if (isT) {
   13718          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   13719       }
   13720       /* In ARM mode, this is statically unconditional.  In Thumb mode,
   13721          this must be dynamically unconditional, and we've SIGILLd if not.
   13722          In either case we can create unconditional IR. */
   13723 
   13724       IROp op = isF64 ? (isMAX ? Iop_MaxNumF64 : Iop_MinNumF64)
   13725                       : (isMAX ? Iop_MaxNumF32 : Iop_MinNumF32);
   13726       IRExpr* srcN = (isF64 ? llGetDReg : llGetFReg)(nn);
   13727       IRExpr* srcM = (isF64 ? llGetDReg : llGetFReg)(mm);
   13728       IRExpr* res  = binop(op, srcN, srcM);
   13729       (isF64 ? llPutDReg : llPutFReg)(dd, res);
   13730 
   13731       UChar rch = isF64 ? 'd' : 'f';
   13732       DIP("v%snm.%s %c%u, %c%u, %c%u\n",
   13733           isMAX ? "max" : "min", isF64 ? "f64" : "f32",
   13734           rch, dd, rch, nn, rch, mm);
   13735       return True;
   13736    }
   13737 
   13738    /* ----------- VRINTX.F64.F64 d_d, VRINTX.F32.F32 s_s ----------- */
   13739    /*     31   27    22 21     15 11  8 7  5 4 3
   13740       T1: 1110 11101 D  110111 Vd 101 1 01 M 0 Vm VRINTX<c>.F64.F64 Dd, Dm
   13741       A1: cond 11101 D  110111 Vd 101 1 01 M 0 Vm
   13742 
   13743       T1: 1110 11101 D  110111 Vd 101 0 01 M 0 Vm VRINTX<c>.F32.F32 Dd, Dm
   13744       A1: cond 11101 D  110111 Vd 101 0 01 M 0 Vm
   13745 
   13746       Like VRINT{Z,R}{.F64.F64, .F32.F32} just above, this can be conditional.
   13747       This produces the same code as the VRINTR case since we ignore the
   13748       requirement to signal inexactness.
   13749    */
   13750    if ((isT ? (INSN(31,28) == BITS4(1,1,1,0)) : True)
   13751        && INSN(27,23) == BITS5(1,1,1,0,1) && INSN(21,16) == BITS6(1,1,0,1,1,1)
   13752        && INSN(11,9) == BITS3(1,0,1) && INSN(7,6) == BITS2(0,1)
   13753        && INSN(4,4) == 0) {
   13754       UInt bit_D  = INSN(22,22);
   13755       UInt fld_Vd = INSN(15,12);
   13756       Bool isF64  = INSN(8,8) == 1;
   13757       UInt bit_M  = INSN(5,5);
   13758       UInt fld_Vm = INSN(3,0);
   13759       UInt dd = isF64 ? ((bit_D << 4) | fld_Vd) : ((fld_Vd << 1) | bit_D);
   13760       UInt mm = isF64 ? ((bit_M << 4) | fld_Vm) : ((fld_Vm << 1) | bit_M);
   13761 
   13762       if (isT) vassert(condT != IRTemp_INVALID);
   13763       IRType ty  = isF64 ? Ity_F64 : Ity_F32;
   13764       IRTemp src = newTemp(ty);
   13765       IRTemp res = newTemp(ty);
   13766       assign(src, (isF64 ? getDReg : getFReg)(mm));
   13767 
   13768       IRTemp rm = newTemp(Ity_I32);
   13769       assign(rm, mkexpr(mk_get_IR_rounding_mode()));
   13770       assign(res, binop(isF64 ? Iop_RoundF64toInt : Iop_RoundF32toInt,
   13771                         mkexpr(rm), mkexpr(src)));
   13772       (isF64 ? putDReg : putFReg)(dd, mkexpr(res), condT);
   13773 
   13774       UChar rch = isF64 ? 'd' : 'f';
   13775       DIP("vrint%c.%s.%s %c%u, %c%u\n",
   13776           'x',
   13777           isF64 ? "f64" : "f32", isF64 ? "f64" : "f32", rch, dd, rch, mm);
   13778       return True;
   13779    }
   13780 
   13781    /* ----------- V{MAX,MIN}NM{.F32 d_d_d, .F32 q_q_q} ----------- */
   13782    /*     31   27    22 21 20 19 15 11   7 6 5 4 3
   13783       T1: 1111 11110 D  op 0  Vn Vd 1111 N 1 M 1 Vm  V{MIN,MAX}NM.F32 Qd,Qn,Qm
   13784       A1: 1111 00110 D  op 0  Vn Vd 1111 N 1 M 1 Vm
   13785 
   13786       T1: 1111 11110 D  op 0  Vn Vd 1111 N 0 M 1 Vm  V{MIN,MAX}NM.F32 Dd,Dn,Dm
   13787       A1: 1111 00110 D  op 0  Vn Vd 1111 N 0 M 1 Vm
   13788 
   13789       ARM encoding is in NV space.
   13790       In Thumb mode, we must not be in an IT block.
   13791    */
   13792    if (INSN(31,23) == (isT ? BITS9(1,1,1,1,1,1,1,1,0)
   13793                            : BITS9(1,1,1,1,0,0,1,1,0))
   13794        && INSN(20,20) == 0 && INSN(11,8) == BITS4(1,1,1,1) && INSN(4,4) == 1) {
   13795       UInt bit_D  = INSN(22,22);
   13796       Bool isMax  = INSN(21,21) == 0;
   13797       UInt fld_Vn = INSN(19,16);
   13798       UInt fld_Vd = INSN(15,12);
   13799       UInt bit_N  = INSN(7,7);
   13800       Bool isQ    = INSN(6,6) == 1;
   13801       UInt bit_M  = INSN(5,5);
   13802       UInt fld_Vm = INSN(3,0);
   13803 
   13804       /* dd, nn, mm are D-register numbers. */
   13805       UInt dd = (bit_D << 4) | fld_Vd;
   13806       UInt nn = (bit_N << 4) | fld_Vn;
   13807       UInt mm = (bit_M << 4) | fld_Vm;
   13808 
   13809       if (! (isQ && ((dd & 1) == 1 || (nn & 1) == 1 || (mm & 1) == 1))) {
   13810          /* Do this piecewise on f regs.  This is a bit tricky
   13811             though because we are dealing with the full 16 x Q == 32 x D
   13812             register set, so the implied F reg numbers are 0 to 63.  But
   13813             ll{Get,Put}FReg only allow the 0 .. 31 as those are the only
   13814             architected F regs. */
   13815          UInt ddF = dd << 1;
   13816          UInt nnF = nn << 1;
   13817          UInt mmF = mm << 1;
   13818 
   13819          if (isT) {
   13820             gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   13821          }
   13822          /* In ARM mode, this is statically unconditional.  In Thumb mode,
   13823             this must be dynamically unconditional, and we've SIGILLd if not.
   13824             In either case we can create unconditional IR. */
   13825 
   13826          IROp op = isMax ? Iop_MaxNumF32 : Iop_MinNumF32;
   13827 
   13828          IRTemp r0 = newTemp(Ity_F32);
   13829          IRTemp r1 = newTemp(Ity_F32);
   13830          IRTemp r2 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
   13831          IRTemp r3 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
   13832 
   13833          assign(r0, binop(op, llGetFReg_up_to_64(nnF+0),
   13834                               llGetFReg_up_to_64(mmF+0)));
   13835          assign(r1, binop(op, llGetFReg_up_to_64(nnF+1),
   13836                               llGetFReg_up_to_64(mmF+1)));
   13837          if (isQ) {
   13838             assign(r2, binop(op, llGetFReg_up_to_64(nnF+2),
   13839                                  llGetFReg_up_to_64(mmF+2)));
   13840             assign(r3, binop(op, llGetFReg_up_to_64(nnF+3),
   13841                                  llGetFReg_up_to_64(mmF+3)));
   13842          }
   13843          llPutFReg_up_to_64(ddF+0, mkexpr(r0));
   13844          llPutFReg_up_to_64(ddF+1, mkexpr(r1));
   13845          if (isQ) {
   13846             llPutFReg_up_to_64(ddF+2, mkexpr(r2));
   13847             llPutFReg_up_to_64(ddF+3, mkexpr(r3));
   13848          }
   13849 
   13850          HChar rch = isQ ? 'q' : 'd';
   13851          UInt  sh  = isQ ? 1 : 0;
   13852          DIP("v%snm.f32 %c%u, %c%u, %c%u\n",
   13853               isMax ? "max" : "min", rch,
   13854               dd >> sh, rch, nn >> sh, rch, mm >> sh);
   13855          return True;
   13856       }
   13857       /* else fall through */
   13858    }
   13859 
   13860    /* ----------- VCVT{A,N,P,M}{.F32 d_d, .F32 q_q} ----------- */
   13861    /*     31   27    22 21     15 11 9  7  6 5 4 3
   13862       T1: 1111 11111 D  111011 Vd 00 rm op Q M 0 Vm
   13863       A1: 1111 00111 D  111011 Vd 00 rm op Q M 0 Vm
   13864 
   13865       ARM encoding is in NV space.
   13866       In Thumb mode, we must not be in an IT block.
   13867    */
   13868    if (INSN(31,23) == (isT ? BITS9(1,1,1,1,1,1,1,1,1)
   13869                            : BITS9(1,1,1,1,0,0,1,1,1))
   13870        && INSN(21,16) == BITS6(1,1,1,0,1,1) && INSN(11,10) == BITS2(0,0)
   13871        && INSN(4,4) == 0) {
   13872       UInt bit_D  = INSN(22,22);
   13873       UInt fld_Vd = INSN(15,12);
   13874       UInt fld_rm = INSN(9,8);
   13875       Bool isU    = INSN(7,7) == 1;
   13876       Bool isQ    = INSN(6,6) == 1;
   13877       UInt bit_M  = INSN(5,5);
   13878       UInt fld_Vm = INSN(3,0);
   13879 
   13880       /* dd, nn, mm are D-register numbers. */
   13881       UInt dd = (bit_D << 4) | fld_Vd;
   13882       UInt mm = (bit_M << 4) | fld_Vm;
   13883 
   13884       if (! (isQ && ((dd & 1) == 1 || (mm & 1) == 1))) {
   13885          /* Do this piecewise on f regs. */
   13886          UInt ddF = dd << 1;
   13887          UInt mmF = mm << 1;
   13888 
   13889          if (isT) {
   13890             gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   13891          }
   13892          /* In ARM mode, this is statically unconditional.  In Thumb mode,
   13893             this must be dynamically unconditional, and we've SIGILLd if not.
   13894             In either case we can create unconditional IR. */
   13895 
   13896          UChar cvtc = '?';
   13897          IRRoundingMode rm = Irrm_NEAREST;
   13898          switch (fld_rm) {
   13899             /* The use of NEAREST for both the 'a' and 'n' cases is a bit of a
   13900                kludge since it doesn't take into account the nearest-even vs
   13901                nearest-away semantics. */
   13902             case BITS2(0,0): cvtc = 'a'; rm = Irrm_NEAREST; break;
   13903             case BITS2(0,1): cvtc = 'n'; rm = Irrm_NEAREST; break;
   13904             case BITS2(1,0): cvtc = 'p'; rm = Irrm_PosINF;  break;
   13905             case BITS2(1,1): cvtc = 'm'; rm = Irrm_NegINF;  break;
   13906             default: vassert(0);
   13907          }
   13908 
   13909          IROp cvt = isU ? Iop_F64toI32U : Iop_F64toI32S;
   13910 
   13911          IRTemp r0 = newTemp(Ity_F32);
   13912          IRTemp r1 = newTemp(Ity_F32);
   13913          IRTemp r2 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
   13914          IRTemp r3 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
   13915 
   13916          IRExpr* rmE = mkU32((UInt)rm);
   13917 
   13918          assign(r0, unop(Iop_ReinterpI32asF32,
   13919                          binop(cvt, rmE, unop(Iop_F32toF64,
   13920                                               llGetFReg_up_to_64(mmF+0)))));
   13921          assign(r1, unop(Iop_ReinterpI32asF32,
   13922                          binop(cvt, rmE, unop(Iop_F32toF64,
   13923                                               llGetFReg_up_to_64(mmF+1)))));
   13924          if (isQ) {
   13925             assign(r2, unop(Iop_ReinterpI32asF32,
   13926                             binop(cvt, rmE, unop(Iop_F32toF64,
   13927                                                  llGetFReg_up_to_64(mmF+2)))));
   13928             assign(r3, unop(Iop_ReinterpI32asF32,
   13929                             binop(cvt, rmE, unop(Iop_F32toF64,
   13930                                                  llGetFReg_up_to_64(mmF+3)))));
   13931          }
   13932 
   13933          llPutFReg_up_to_64(ddF+0, mkexpr(r0));
   13934          llPutFReg_up_to_64(ddF+1, mkexpr(r1));
   13935          if (isQ) {
   13936             llPutFReg_up_to_64(ddF+2, mkexpr(r2));
   13937             llPutFReg_up_to_64(ddF+3, mkexpr(r3));
   13938          }
   13939 
   13940          HChar rch = isQ ? 'q' : 'd';
   13941          UInt  sh  = isQ ? 1 : 0;
   13942          DIP("vcvt%c.%c32.f32 %c%u, %c%u\n",
   13943               cvtc, isU ? 'u' : 's', rch, dd >> sh, rch, mm >> sh);
   13944          return True;
   13945       }
   13946       /* else fall through */
   13947    }
   13948 
   13949    /* ----------- VRINT{A,N,P,M,X,Z}{.F32 d_d, .F32 q_q} ----------- */
   13950    /*     31   27    22 21     15 11 9  6 5 4 3
   13951       T1: 1111 11111 D  111010 Vd 01 op Q M 0 Vm
   13952       A1: 1111 00111 D  111010 Vd 01 op Q M 0 Vm
   13953 
   13954       ARM encoding is in NV space.
   13955       In Thumb mode, we must not be in an IT block.
   13956    */
   13957    if (INSN(31,23) == (isT ? BITS9(1,1,1,1,1,1,1,1,1)
   13958                            : BITS9(1,1,1,1,0,0,1,1,1))
   13959        && INSN(21,16) == BITS6(1,1,1,0,1,0) && INSN(11,10) == BITS2(0,1)
   13960        && INSN(4,4) == 0) {
   13961       UInt bit_D  = INSN(22,22);
   13962       UInt fld_Vd = INSN(15,12);
   13963       UInt fld_op = INSN(9,7);
   13964       Bool isQ    = INSN(6,6) == 1;
   13965       UInt bit_M  = INSN(5,5);
   13966       UInt fld_Vm = INSN(3,0);
   13967 
   13968       /* dd, nn, mm are D-register numbers. */
   13969       UInt dd = (bit_D << 4) | fld_Vd;
   13970       UInt mm = (bit_M << 4) | fld_Vm;
   13971 
   13972       if (! (fld_op == BITS3(1,0,0) || fld_op == BITS3(1,1,0))
   13973           && ! (isQ && ((dd & 1) == 1 || (mm & 1) == 1))) {
   13974          /* Do this piecewise on f regs. */
   13975          UInt ddF = dd << 1;
   13976          UInt mmF = mm << 1;
   13977 
   13978          if (isT) {
   13979             gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   13980          }
   13981          /* In ARM mode, this is statically unconditional.  In Thumb mode,
   13982             this must be dynamically unconditional, and we've SIGILLd if not.
   13983             In either case we can create unconditional IR. */
   13984 
   13985          UChar cvtc = '?';
   13986          IRRoundingMode rm = Irrm_NEAREST;
   13987          switch (fld_op) {
   13988             /* Various kludges:
   13989                - The use of NEAREST for both the 'a' and 'n' cases,
   13990                  since it doesn't take into account the nearest-even vs
   13991                  nearest-away semantics.
   13992                - For the 'x' case, we don't signal inexactness.
   13993             */
   13994             case BITS3(0,1,0): cvtc = 'a'; rm = Irrm_NEAREST; break;
   13995             case BITS3(0,0,0): cvtc = 'n'; rm = Irrm_NEAREST; break;
   13996             case BITS3(1,1,1): cvtc = 'p'; rm = Irrm_PosINF;  break;
   13997             case BITS3(1,0,1): cvtc = 'm'; rm = Irrm_NegINF;  break;
   13998             case BITS3(0,1,1): cvtc = 'z'; rm = Irrm_ZERO;    break;
   13999             case BITS3(0,0,1): cvtc = 'x'; rm = Irrm_NEAREST; break;
   14000             case BITS3(1,0,0):
   14001             case BITS3(1,1,0):
   14002             default: vassert(0);
   14003          }
   14004 
   14005          IRTemp r0 = newTemp(Ity_F32);
   14006          IRTemp r1 = newTemp(Ity_F32);
   14007          IRTemp r2 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
   14008          IRTemp r3 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
   14009 
   14010          IRExpr* rmE = mkU32((UInt)rm);
   14011          IROp    rnd = Iop_RoundF32toInt;
   14012 
   14013          assign(r0, binop(rnd, rmE, llGetFReg_up_to_64(mmF+0)));
   14014          assign(r1, binop(rnd, rmE, llGetFReg_up_to_64(mmF+1)));
   14015          if (isQ) {
   14016             assign(r2, binop(rnd, rmE, llGetFReg_up_to_64(mmF+2)));
   14017             assign(r3, binop(rnd, rmE, llGetFReg_up_to_64(mmF+3)));
   14018          }
   14019 
   14020          llPutFReg_up_to_64(ddF+0, mkexpr(r0));
   14021          llPutFReg_up_to_64(ddF+1, mkexpr(r1));
   14022          if (isQ) {
   14023             llPutFReg_up_to_64(ddF+2, mkexpr(r2));
   14024             llPutFReg_up_to_64(ddF+3, mkexpr(r3));
   14025          }
   14026 
   14027          HChar rch = isQ ? 'q' : 'd';
   14028          UInt  sh  = isQ ? 1 : 0;
   14029          DIP("vrint%c.f32.f32 %c%u, %c%u\n",
   14030              cvtc, rch, dd >> sh, rch, mm >> sh);
   14031          return True;
   14032       }
   14033       /* else fall through */
   14034    }
   14035 
   14036    /* ---------- Doesn't match anything. ---------- */
   14037    return False;
   14038 
   14039 #  undef INSN
   14040 }
   14041 
   14042 
   14043 /*------------------------------------------------------------*/
   14044 /*--- LDMxx/STMxx helper (both ARM and Thumb32)            ---*/
   14045 /*------------------------------------------------------------*/
   14046 
   14047 /* Generate IR for LDMxx and STMxx.  This is complex.  Assumes it's
   14048    unconditional, so the caller must produce a jump-around before
   14049    calling this, if the insn is to be conditional.  Caller is
   14050    responsible for all validation of parameters.  For LDMxx, if PC is
   14051    amongst the values loaded, caller is also responsible for
   14052    generating the jump. */
   14053 static void mk_ldm_stm ( Bool arm,     /* True: ARM, False: Thumb */
   14054                          UInt rN,      /* base reg */
   14055                          UInt bINC,    /* 1: inc,  0: dec */
   14056                          UInt bBEFORE, /* 1: inc/dec before, 0: after */
   14057                          UInt bW,      /* 1: writeback to Rn */
   14058                          UInt bL,      /* 1: load, 0: store */
   14059                          UInt regList )
   14060 {
   14061    Int i, r, m, nRegs;
   14062    IRTemp jk = Ijk_Boring;
   14063 
   14064    /* Get hold of the old Rn value.  We might need to write its value
   14065       to memory during a store, and if it's also the writeback
   14066       register then we need to get its value now.  We can't treat it
   14067       exactly like the other registers we're going to transfer,
   14068       because for xxMDA and xxMDB writeback forms, the generated IR
   14069       updates Rn in the guest state before any transfers take place.
   14070       We have to do this as per comments below, in order that if Rn is
   14071       the stack pointer then it always has a value is below or equal
   14072       to any of the transfer addresses.  Ick. */
   14073    IRTemp oldRnT = newTemp(Ity_I32);
   14074    assign(oldRnT, arm ? getIRegA(rN) : getIRegT(rN));
   14075 
   14076    IRTemp anchorT = newTemp(Ity_I32);
   14077    /* The old (Addison-Wesley) ARM ARM seems to say that LDMxx/STMxx
   14078       ignore the bottom two bits of the address.  However, Cortex-A8
   14079       doesn't seem to care.  Hence: */
   14080    /* No .. don't force alignment .. */
   14081    /* assign(anchorT, binop(Iop_And32, mkexpr(oldRnT), mkU32(~3U))); */
   14082    /* Instead, use the potentially misaligned address directly. */
   14083    assign(anchorT, mkexpr(oldRnT));
   14084 
   14085    IROp opADDorSUB = bINC ? Iop_Add32 : Iop_Sub32;
   14086    // bINC == 1:  xxMIA, xxMIB
   14087    // bINC == 0:  xxMDA, xxMDB
   14088 
   14089    // For xxMDA and xxMDB, update Rn first if necessary.  We have
   14090    // to do this first so that, for the common idiom of the transfers
   14091    // faulting because we're pushing stuff onto a stack and the stack
   14092    // is growing down onto allocate-on-fault pages (as Valgrind simulates),
   14093    // we need to have the SP up-to-date "covering" (pointing below) the
   14094    // transfer area.  For the same reason, if we are doing xxMIA or xxMIB,
   14095    // do the transfer first, and then update rN afterwards.
   14096    nRegs = 0;
   14097    for (i = 0; i < 16; i++) {
   14098      if ((regList & (1 << i)) != 0)
   14099          nRegs++;
   14100    }
   14101    if (bW == 1 && !bINC) {
   14102       IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
   14103       if (arm)
   14104          putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
   14105       else
   14106          putIRegT( rN, e, IRTemp_INVALID );
   14107    }
   14108 
   14109    // Make up a list of the registers to transfer, and their offsets
   14110    // in memory relative to the anchor.  If the base reg (Rn) is part
   14111    // of the transfer, then do it last for a load and first for a store.
   14112    UInt xReg[16], xOff[16];
   14113    Int  nX = 0;
   14114    m = 0;
   14115    for (i = 0; i < 16; i++) {
   14116       r = bINC ? i : (15-i);
   14117       if (0 == (regList & (1<<r)))
   14118          continue;
   14119       if (bBEFORE)
   14120          m++;
   14121       /* paranoia: check we aren't transferring the writeback
   14122          register during a load. Should be assured by decode-point
   14123          check above. */
   14124       if (bW == 1 && bL == 1)
   14125          vassert(r != rN);
   14126 
   14127       xOff[nX] = 4 * m;
   14128       xReg[nX] = r;
   14129       nX++;
   14130 
   14131       if (!bBEFORE)
   14132          m++;
   14133    }
   14134    vassert(m == nRegs);
   14135    vassert(nX == nRegs);
   14136    vassert(nX <= 16);
   14137 
   14138    if (bW == 0 && (regList & (1<<rN)) != 0) {
   14139       /* Non-writeback, and basereg is to be transferred.  Do its
   14140          transfer last for a load and first for a store.  Requires
   14141          reordering xOff/xReg. */
   14142       if (0) {
   14143          vex_printf("\nREG_LIST_PRE: (rN=%u)\n", rN);
   14144          for (i = 0; i < nX; i++)
   14145             vex_printf("reg %u   off %u\n", xReg[i], xOff[i]);
   14146          vex_printf("\n");
   14147       }
   14148 
   14149       vassert(nX > 0);
   14150       for (i = 0; i < nX; i++) {
   14151          if (xReg[i] == rN)
   14152              break;
   14153       }
   14154       vassert(i < nX); /* else we didn't find it! */
   14155       UInt tReg = xReg[i];
   14156       UInt tOff = xOff[i];
   14157       if (bL == 1) {
   14158          /* load; make this transfer happen last */
   14159          if (i < nX-1) {
   14160             for (m = i+1; m < nX; m++) {
   14161                xReg[m-1] = xReg[m];
   14162                xOff[m-1] = xOff[m];
   14163             }
   14164             vassert(m == nX);
   14165             xReg[m-1] = tReg;
   14166             xOff[m-1] = tOff;
   14167          }
   14168       } else {
   14169          /* store; make this transfer happen first */
   14170          if (i > 0) {
   14171             for (m = i-1; m >= 0; m--) {
   14172                xReg[m+1] = xReg[m];
   14173                xOff[m+1] = xOff[m];
   14174             }
   14175             vassert(m == -1);
   14176             xReg[0] = tReg;
   14177             xOff[0] = tOff;
   14178          }
   14179       }
   14180 
   14181       if (0) {
   14182          vex_printf("REG_LIST_POST:\n");
   14183          for (i = 0; i < nX; i++)
   14184             vex_printf("reg %u   off %u\n", xReg[i], xOff[i]);
   14185          vex_printf("\n");
   14186       }
   14187    }
   14188 
   14189    /* According to the Cortex A8 TRM Sec. 5.2.1, LDM(1) with r13 as the base
   14190        register and PC in the register list is a return for purposes of branch
   14191        prediction.
   14192       The ARM ARM Sec. C9.10.1 further specifies that writeback must be enabled
   14193        to be counted in event 0x0E (Procedure return).*/
   14194    if (rN == 13 && bL == 1 && bINC && !bBEFORE && bW == 1) {
   14195       jk = Ijk_Ret;
   14196    }
   14197 
   14198    /* Actually generate the transfers */
   14199    for (i = 0; i < nX; i++) {
   14200       r = xReg[i];
   14201       if (bL == 1) {
   14202          IRExpr* e = loadLE(Ity_I32,
   14203                             binop(opADDorSUB, mkexpr(anchorT),
   14204                                   mkU32(xOff[i])));
   14205          if (arm) {
   14206             putIRegA( r, e, IRTemp_INVALID, jk );
   14207          } else {
   14208             // no: putIRegT( r, e, IRTemp_INVALID );
   14209             // putIRegT refuses to write to R15.  But that might happen.
   14210             // Since this is uncond, and we need to be able to
   14211             // write the PC, just use the low level put:
   14212             llPutIReg( r, e );
   14213          }
   14214       } else {
   14215          /* if we're storing Rn, make sure we use the correct
   14216             value, as per extensive comments above */
   14217          storeLE( binop(opADDorSUB, mkexpr(anchorT), mkU32(xOff[i])),
   14218                   r == rN ? mkexpr(oldRnT)
   14219                           : (arm ? getIRegA(r) : getIRegT(r) ) );
   14220       }
   14221    }
   14222 
   14223    // If we are doing xxMIA or xxMIB,
   14224    // do the transfer first, and then update rN afterwards.
   14225    if (bW == 1 && bINC) {
   14226       IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
   14227       if (arm)
   14228          putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
   14229       else
   14230          putIRegT( rN, e, IRTemp_INVALID );
   14231    }
   14232 }
   14233 
   14234 
   14235 /*------------------------------------------------------------*/
   14236 /*--- VFP (CP 10 and 11) instructions                      ---*/
   14237 /*------------------------------------------------------------*/
   14238 
   14239 /* Both ARM and Thumb */
   14240 
   14241 /* Translate a CP10 or CP11 instruction.  If successful, returns
   14242    True and *dres may or may not be updated.  If failure, returns
   14243    False and doesn't change *dres nor create any IR.
   14244 
   14245    The ARM and Thumb encodings are identical for the low 28 bits of
   14246    the insn (yay!) and that's what the caller must supply, iow, imm28
   14247    has the top 4 bits masked out.  Caller is responsible for
   14248    determining whether the masked-out bits are valid for a CP10/11
   14249    insn.  The rules for the top 4 bits are:
   14250 
   14251      ARM: 0000 to 1110 allowed, and this is the gating condition.
   14252      1111 (NV) is not allowed.
   14253 
   14254      Thumb: must be 1110.  The gating condition is taken from
   14255      ITSTATE in the normal way.
   14256 
   14257    Conditionalisation:
   14258 
   14259    Caller must supply an IRTemp 'condT' holding the gating condition,
   14260    or IRTemp_INVALID indicating the insn is always executed.
   14261 
   14262    Caller must also supply an ARMCondcode 'cond'.  This is only used
   14263    for debug printing, no other purpose.  For ARM, this is simply the
   14264    top 4 bits of the original instruction.  For Thumb, the condition
   14265    is not (really) known until run time, and so ARMCondAL should be
   14266    passed, only so that printing of these instructions does not show
   14267    any condition.
   14268 
   14269    Finally, the caller must indicate whether this occurs in ARM or
   14270    Thumb code.
   14271 */
   14272 static Bool decode_CP10_CP11_instruction (
   14273                /*MOD*/DisResult* dres,
   14274                UInt              insn28,
   14275                IRTemp            condT,
   14276                ARMCondcode       conq,
   14277                Bool              isT
   14278             )
   14279 {
   14280 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn28, (_bMax), (_bMin))
   14281 
   14282    vassert(INSN(31,28) == BITS4(0,0,0,0)); // caller's obligation
   14283 
   14284    if (isT) {
   14285       vassert(conq == ARMCondAL);
   14286    } else {
   14287       vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
   14288    }
   14289 
   14290    /* ----------------------------------------------------------- */
   14291    /* -- VFP instructions -- double precision (mostly)         -- */
   14292    /* ----------------------------------------------------------- */
   14293 
   14294    /* --------------------- fldmx, fstmx --------------------- */
   14295    /*
   14296                                  31   27   23   19 15 11   7   0
   14297                                          P U WL
   14298       C4-100, C5-26  1  FSTMX    cond 1100 1000 Rn Dd 1011 offset
   14299       C4-100, C5-28  2  FSTMIAX  cond 1100 1010 Rn Dd 1011 offset
   14300       C4-100, C5-30  3  FSTMDBX  cond 1101 0010 Rn Dd 1011 offset
   14301 
   14302       C4-42, C5-26   1  FLDMX    cond 1100 1001 Rn Dd 1011 offset
   14303       C4-42, C5-28   2  FLDMIAX  cond 1100 1011 Rn Dd 1011 offset
   14304       C4-42, C5-30   3  FLDMDBX  cond 1101 0011 Rn Dd 1011 offset
   14305 
   14306       Regs transferred: Dd .. D(d + (offset-3)/2)
   14307       offset must be odd, must not imply a reg > 15
   14308       IA/DB: Rn is changed by (4 + 8 x # regs transferred)
   14309 
   14310       case coding:
   14311          1  at-Rn   (access at Rn)
   14312          2  ia-Rn   (access at Rn, then Rn += 4+8n)
   14313          3  db-Rn   (Rn -= 4+8n,   then access at Rn)
   14314    */
   14315    if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
   14316        && INSN(11,8) == BITS4(1,0,1,1)) {
   14317       UInt bP      = (insn28 >> 24) & 1;
   14318       UInt bU      = (insn28 >> 23) & 1;
   14319       UInt bW      = (insn28 >> 21) & 1;
   14320       UInt bL      = (insn28 >> 20) & 1;
   14321       UInt offset  = (insn28 >> 0) & 0xFF;
   14322       UInt rN      = INSN(19,16);
   14323       UInt dD      = (INSN(22,22) << 4) | INSN(15,12);
   14324       UInt nRegs   = (offset - 1) / 2;
   14325       UInt summary = 0;
   14326       Int  i;
   14327 
   14328       /**/ if (bP == 0 && bU == 1 && bW == 0) {
   14329          summary = 1;
   14330       }
   14331       else if (bP == 0 && bU == 1 && bW == 1) {
   14332          summary = 2;
   14333       }
   14334       else if (bP == 1 && bU == 0 && bW == 1) {
   14335          summary = 3;
   14336       }
   14337       else goto after_vfp_fldmx_fstmx;
   14338 
   14339       /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
   14340       if (rN == 15 && (summary == 2 || summary == 3 || isT))
   14341          goto after_vfp_fldmx_fstmx;
   14342 
   14343       /* offset must be odd, and specify at least one register */
   14344       if (0 == (offset & 1) || offset < 3)
   14345          goto after_vfp_fldmx_fstmx;
   14346 
   14347       /* can't transfer regs after D15 */
   14348       if (dD + nRegs - 1 >= 32)
   14349          goto after_vfp_fldmx_fstmx;
   14350 
   14351       /* Now, we can't do a conditional load or store, since that very
   14352          likely will generate an exception.  So we have to take a side
   14353          exit at this point if the condition is false. */
   14354       if (condT != IRTemp_INVALID) {
   14355          if (isT)
   14356             mk_skip_over_T32_if_cond_is_false( condT );
   14357          else
   14358             mk_skip_over_A32_if_cond_is_false( condT );
   14359          condT = IRTemp_INVALID;
   14360       }
   14361       /* Ok, now we're unconditional.  Do the load or store. */
   14362 
   14363       /* get the old Rn value */
   14364       IRTemp rnT = newTemp(Ity_I32);
   14365       assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
   14366                            rN == 15));
   14367 
   14368       /* make a new value for Rn, post-insn */
   14369       IRTemp rnTnew = IRTemp_INVALID;
   14370       if (summary == 2 || summary == 3) {
   14371          rnTnew = newTemp(Ity_I32);
   14372          assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
   14373                               mkexpr(rnT),
   14374                               mkU32(4 + 8 * nRegs)));
   14375       }
   14376 
   14377       /* decide on the base transfer address */
   14378       IRTemp taT = newTemp(Ity_I32);
   14379       assign(taT,  summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
   14380 
   14381       /* update Rn if necessary -- in case 3, we're moving it down, so
   14382          update before any memory reference, in order to keep Memcheck
   14383          and V's stack-extending logic (on linux) happy */
   14384       if (summary == 3) {
   14385          if (isT)
   14386             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
   14387          else
   14388             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
   14389       }
   14390 
   14391       /* generate the transfers */
   14392       for (i = 0; i < nRegs; i++) {
   14393          IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
   14394          if (bL) {
   14395             putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
   14396          } else {
   14397             storeLE(addr, getDReg(dD + i));
   14398          }
   14399       }
   14400 
   14401       /* update Rn if necessary -- in case 2, we're moving it up, so
   14402          update after any memory reference, in order to keep Memcheck
   14403          and V's stack-extending logic (on linux) happy */
   14404       if (summary == 2) {
   14405          if (isT)
   14406             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
   14407          else
   14408             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
   14409       }
   14410 
   14411       const HChar* nm = bL==1 ? "ld" : "st";
   14412       switch (summary) {
   14413          case 1:  DIP("f%smx%s r%u, {d%u-d%u}\n",
   14414                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
   14415                   break;
   14416          case 2:  DIP("f%smiax%s r%u!, {d%u-d%u}\n",
   14417                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
   14418                   break;
   14419          case 3:  DIP("f%smdbx%s r%u!, {d%u-d%u}\n",
   14420                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
   14421                   break;
   14422          default: vassert(0);
   14423       }
   14424 
   14425       goto decode_success_vfp;
   14426       /* FIXME alignment constraints? */
   14427    }
   14428 
   14429   after_vfp_fldmx_fstmx:
   14430 
   14431    /* --------------------- fldmd, fstmd --------------------- */
   14432    /*
   14433                                  31   27   23   19 15 11   7   0
   14434                                          P U WL
   14435       C4-96, C5-26   1  FSTMD    cond 1100 1000 Rn Dd 1011 offset
   14436       C4-96, C5-28   2  FSTMDIA  cond 1100 1010 Rn Dd 1011 offset
   14437       C4-96, C5-30   3  FSTMDDB  cond 1101 0010 Rn Dd 1011 offset
   14438 
   14439       C4-38, C5-26   1  FLDMD    cond 1100 1001 Rn Dd 1011 offset
   14440       C4-38, C5-28   2  FLDMIAD  cond 1100 1011 Rn Dd 1011 offset
   14441       C4-38, C5-30   3  FLDMDBD  cond 1101 0011 Rn Dd 1011 offset
   14442 
   14443       Regs transferred: Dd .. D(d + (offset-2)/2)
   14444       offset must be even, must not imply a reg > 15
   14445       IA/DB: Rn is changed by (8 x # regs transferred)
   14446 
   14447       case coding:
   14448          1  at-Rn   (access at Rn)
   14449          2  ia-Rn   (access at Rn, then Rn += 8n)
   14450          3  db-Rn   (Rn -= 8n,     then access at Rn)
   14451    */
   14452    if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
   14453        && INSN(11,8) == BITS4(1,0,1,1)) {
   14454       UInt bP      = (insn28 >> 24) & 1;
   14455       UInt bU      = (insn28 >> 23) & 1;
   14456       UInt bW      = (insn28 >> 21) & 1;
   14457       UInt bL      = (insn28 >> 20) & 1;
   14458       UInt offset  = (insn28 >> 0) & 0xFF;
   14459       UInt rN      = INSN(19,16);
   14460       UInt dD      = (INSN(22,22) << 4) | INSN(15,12);
   14461       UInt nRegs   = offset / 2;
   14462       UInt summary = 0;
   14463       Int  i;
   14464 
   14465       /**/ if (bP == 0 && bU == 1 && bW == 0) {
   14466          summary = 1;
   14467       }
   14468       else if (bP == 0 && bU == 1 && bW == 1) {
   14469          summary = 2;
   14470       }
   14471       else if (bP == 1 && bU == 0 && bW == 1) {
   14472          summary = 3;
   14473       }
   14474       else goto after_vfp_fldmd_fstmd;
   14475 
   14476       /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
   14477       if (rN == 15 && (summary == 2 || summary == 3 || isT))
   14478          goto after_vfp_fldmd_fstmd;
   14479 
   14480       /* offset must be even, and specify at least one register */
   14481       if (1 == (offset & 1) || offset < 2)
   14482          goto after_vfp_fldmd_fstmd;
   14483 
   14484       /* can't transfer regs after D15 */
   14485       if (dD + nRegs - 1 >= 32)
   14486          goto after_vfp_fldmd_fstmd;
   14487 
   14488       /* Now, we can't do a conditional load or store, since that very
   14489          likely will generate an exception.  So we have to take a side
   14490          exit at this point if the condition is false. */
   14491       if (condT != IRTemp_INVALID) {
   14492          if (isT)
   14493             mk_skip_over_T32_if_cond_is_false( condT );
   14494          else
   14495             mk_skip_over_A32_if_cond_is_false( condT );
   14496          condT = IRTemp_INVALID;
   14497       }
   14498       /* Ok, now we're unconditional.  Do the load or store. */
   14499 
   14500       /* get the old Rn value */
   14501       IRTemp rnT = newTemp(Ity_I32);
   14502       assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
   14503                            rN == 15));
   14504 
   14505       /* make a new value for Rn, post-insn */
   14506       IRTemp rnTnew = IRTemp_INVALID;
   14507       if (summary == 2 || summary == 3) {
   14508          rnTnew = newTemp(Ity_I32);
   14509          assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
   14510                               mkexpr(rnT),
   14511                               mkU32(8 * nRegs)));
   14512       }
   14513 
   14514       /* decide on the base transfer address */
   14515       IRTemp taT = newTemp(Ity_I32);
   14516       assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
   14517 
   14518       /* update Rn if necessary -- in case 3, we're moving it down, so
   14519          update before any memory reference, in order to keep Memcheck
   14520          and V's stack-extending logic (on linux) happy */
   14521       if (summary == 3) {
   14522          if (isT)
   14523             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
   14524          else
   14525             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
   14526       }
   14527 
   14528       /* generate the transfers */
   14529       for (i = 0; i < nRegs; i++) {
   14530          IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
   14531          if (bL) {
   14532             putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
   14533          } else {
   14534             storeLE(addr, getDReg(dD + i));
   14535          }
   14536       }
   14537 
   14538       /* update Rn if necessary -- in case 2, we're moving it up, so
   14539          update after any memory reference, in order to keep Memcheck
   14540          and V's stack-extending logic (on linux) happy */
   14541       if (summary == 2) {
   14542          if (isT)
   14543             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
   14544          else
   14545             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
   14546       }
   14547 
   14548       const HChar* nm = bL==1 ? "ld" : "st";
   14549       switch (summary) {
   14550          case 1:  DIP("f%smd%s r%u, {d%u-d%u}\n",
   14551                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
   14552                   break;
   14553          case 2:  DIP("f%smiad%s r%u!, {d%u-d%u}\n",
   14554                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
   14555                   break;
   14556          case 3:  DIP("f%smdbd%s r%u!, {d%u-d%u}\n",
   14557                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
   14558                   break;
   14559          default: vassert(0);
   14560       }
   14561 
   14562       goto decode_success_vfp;
   14563       /* FIXME alignment constraints? */
   14564    }
   14565 
   14566   after_vfp_fldmd_fstmd:
   14567 
   14568    /* ------------------- fmrx, fmxr ------------------- */
   14569    if (BITS8(1,1,1,0,1,1,1,1) == INSN(27,20)
   14570        && BITS4(1,0,1,0) == INSN(11,8)
   14571        && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
   14572       UInt rD  = INSN(15,12);
   14573       UInt reg = INSN(19,16);
   14574       if (reg == BITS4(0,0,0,1)) {
   14575          if (rD == 15) {
   14576             IRTemp nzcvT = newTemp(Ity_I32);
   14577             /* When rD is 15, we are copying the top 4 bits of FPSCR
   14578                into CPSR.  That is, set the flags thunk to COPY and
   14579                install FPSCR[31:28] as the value to copy. */
   14580             assign(nzcvT, binop(Iop_And32,
   14581                                 IRExpr_Get(OFFB_FPSCR, Ity_I32),
   14582                                 mkU32(0xF0000000)));
   14583             setFlags_D1(ARMG_CC_OP_COPY, nzcvT, condT);
   14584             DIP("fmstat%s\n", nCC(conq));
   14585          } else {
   14586             /* Otherwise, merely transfer FPSCR to r0 .. r14. */
   14587             IRExpr* e = IRExpr_Get(OFFB_FPSCR, Ity_I32);
   14588             if (isT)
   14589                putIRegT(rD, e, condT);
   14590             else
   14591                putIRegA(rD, e, condT, Ijk_Boring);
   14592             DIP("fmrx%s r%u, fpscr\n", nCC(conq), rD);
   14593          }
   14594          goto decode_success_vfp;
   14595       }
   14596       /* fall through */
   14597    }
   14598 
   14599    if (BITS8(1,1,1,0,1,1,1,0) == INSN(27,20)
   14600        && BITS4(1,0,1,0) == INSN(11,8)
   14601        && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
   14602       UInt rD  = INSN(15,12);
   14603       UInt reg = INSN(19,16);
   14604       if (reg == BITS4(0,0,0,1)) {
   14605          putMiscReg32(OFFB_FPSCR,
   14606                       isT ? getIRegT(rD) : getIRegA(rD), condT);
   14607          DIP("fmxr%s fpscr, r%u\n", nCC(conq), rD);
   14608          goto decode_success_vfp;
   14609       }
   14610       /* fall through */
   14611    }
   14612 
   14613    /* --------------------- vmov --------------------- */
   14614    // VMOV dM, rD, rN
   14615    if (0x0C400B10 == (insn28 & 0x0FF00FD0)) {
   14616       UInt dM = INSN(3,0) | (INSN(5,5) << 4);
   14617       UInt rD = INSN(15,12); /* lo32 */
   14618       UInt rN = INSN(19,16); /* hi32 */
   14619       if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))) {
   14620          /* fall through */
   14621       } else {
   14622          putDReg(dM,
   14623                  unop(Iop_ReinterpI64asF64,
   14624                       binop(Iop_32HLto64,
   14625                             isT ? getIRegT(rN) : getIRegA(rN),
   14626                             isT ? getIRegT(rD) : getIRegA(rD))),
   14627                  condT);
   14628          DIP("vmov%s d%u, r%u, r%u\n", nCC(conq), dM, rD, rN);
   14629          goto decode_success_vfp;
   14630       }
   14631       /* fall through */
   14632    }
   14633 
   14634    // VMOV rD, rN, dM
   14635    if (0x0C500B10 == (insn28 & 0x0FF00FD0)) {
   14636       UInt dM = INSN(3,0) | (INSN(5,5) << 4);
   14637       UInt rD = INSN(15,12); /* lo32 */
   14638       UInt rN = INSN(19,16); /* hi32 */
   14639       if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))
   14640           || rD == rN) {
   14641          /* fall through */
   14642       } else {
   14643          IRTemp i64 = newTemp(Ity_I64);
   14644          assign(i64, unop(Iop_ReinterpF64asI64, getDReg(dM)));
   14645          IRExpr* hi32 = unop(Iop_64HIto32, mkexpr(i64));
   14646          IRExpr* lo32 = unop(Iop_64to32,   mkexpr(i64));
   14647          if (isT) {
   14648             putIRegT(rN, hi32, condT);
   14649             putIRegT(rD, lo32, condT);
   14650          } else {
   14651             putIRegA(rN, hi32, condT, Ijk_Boring);
   14652             putIRegA(rD, lo32, condT, Ijk_Boring);
   14653          }
   14654          DIP("vmov%s r%u, r%u, d%u\n", nCC(conq), rD, rN, dM);
   14655          goto decode_success_vfp;
   14656       }
   14657       /* fall through */
   14658    }
   14659 
   14660    // VMOV sD, sD+1, rN, rM
   14661    if (0x0C400A10 == (insn28 & 0x0FF00FD0)) {
   14662       UInt sD = (INSN(3,0) << 1) | INSN(5,5);
   14663       UInt rN = INSN(15,12);
   14664       UInt rM = INSN(19,16);
   14665       if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
   14666           || sD == 31) {
   14667          /* fall through */
   14668       } else {
   14669          putFReg(sD,
   14670                  unop(Iop_ReinterpI32asF32, isT ? getIRegT(rN) : getIRegA(rN)),
   14671                  condT);
   14672          putFReg(sD+1,
   14673                  unop(Iop_ReinterpI32asF32, isT ? getIRegT(rM) : getIRegA(rM)),
   14674                  condT);
   14675          DIP("vmov%s, s%u, s%u, r%u, r%u\n",
   14676               nCC(conq), sD, sD + 1, rN, rM);
   14677          goto decode_success_vfp;
   14678       }
   14679    }
   14680 
   14681    // VMOV rN, rM, sD, sD+1
   14682    if (0x0C500A10 == (insn28 & 0x0FF00FD0)) {
   14683       UInt sD = (INSN(3,0) << 1) | INSN(5,5);
   14684       UInt rN = INSN(15,12);
   14685       UInt rM = INSN(19,16);
   14686       if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
   14687           || sD == 31 || rN == rM) {
   14688          /* fall through */
   14689       } else {
   14690          IRExpr* res0 = unop(Iop_ReinterpF32asI32, getFReg(sD));
   14691          IRExpr* res1 = unop(Iop_ReinterpF32asI32, getFReg(sD+1));
   14692          if (isT) {
   14693             putIRegT(rN, res0, condT);
   14694             putIRegT(rM, res1, condT);
   14695          } else {
   14696             putIRegA(rN, res0, condT, Ijk_Boring);
   14697             putIRegA(rM, res1, condT, Ijk_Boring);
   14698          }
   14699          DIP("vmov%s, r%u, r%u, s%u, s%u\n",
   14700              nCC(conq), rN, rM, sD, sD + 1);
   14701          goto decode_success_vfp;
   14702       }
   14703    }
   14704 
   14705    // VMOV rD[x], rT  (ARM core register to scalar)
   14706    if (0x0E000B10 == (insn28 & 0x0F900F1F)) {
   14707       UInt rD  = (INSN(7,7) << 4) | INSN(19,16);
   14708       UInt rT  = INSN(15,12);
   14709       UInt opc = (INSN(22,21) << 2) | INSN(6,5);
   14710       UInt index;
   14711       if (rT == 15 || (isT && rT == 13)) {
   14712          /* fall through */
   14713       } else {
   14714          if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
   14715             index = opc & 7;
   14716             putDRegI64(rD, triop(Iop_SetElem8x8,
   14717                                  getDRegI64(rD),
   14718                                  mkU8(index),
   14719                                  unop(Iop_32to8,
   14720                                       isT ? getIRegT(rT) : getIRegA(rT))),
   14721                            condT);
   14722             DIP("vmov%s.8 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
   14723             goto decode_success_vfp;
   14724          }
   14725          else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
   14726             index = (opc >> 1) & 3;
   14727             putDRegI64(rD, triop(Iop_SetElem16x4,
   14728                                  getDRegI64(rD),
   14729                                  mkU8(index),
   14730                                  unop(Iop_32to16,
   14731                                       isT ? getIRegT(rT) : getIRegA(rT))),
   14732                            condT);
   14733             DIP("vmov%s.16 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
   14734             goto decode_success_vfp;
   14735          }
   14736          else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0)) {
   14737             index = (opc >> 2) & 1;
   14738             putDRegI64(rD, triop(Iop_SetElem32x2,
   14739                                  getDRegI64(rD),
   14740                                  mkU8(index),
   14741                                  isT ? getIRegT(rT) : getIRegA(rT)),
   14742                            condT);
   14743             DIP("vmov%s.32 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
   14744             goto decode_success_vfp;
   14745          } else {
   14746             /* fall through */
   14747          }
   14748       }
   14749    }
   14750 
   14751    // VMOV (scalar to ARM core register)
   14752    // VMOV rT, rD[x]
   14753    if (0x0E100B10 == (insn28 & 0x0F100F1F)) {
   14754       UInt rN  = (INSN(7,7) << 4) | INSN(19,16);
   14755       UInt rT  = INSN(15,12);
   14756       UInt U   = INSN(23,23);
   14757       UInt opc = (INSN(22,21) << 2) | INSN(6,5);
   14758       UInt index;
   14759       if (rT == 15 || (isT && rT == 13)) {
   14760          /* fall through */
   14761       } else {
   14762          if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
   14763             index = opc & 7;
   14764             IRExpr* e = unop(U ? Iop_8Uto32 : Iop_8Sto32,
   14765                              binop(Iop_GetElem8x8,
   14766                                    getDRegI64(rN),
   14767                                    mkU8(index)));
   14768             if (isT)
   14769                putIRegT(rT, e, condT);
   14770             else
   14771                putIRegA(rT, e, condT, Ijk_Boring);
   14772             DIP("vmov%s.%c8 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
   14773                   rT, rN, index);
   14774             goto decode_success_vfp;
   14775          }
   14776          else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
   14777             index = (opc >> 1) & 3;
   14778             IRExpr* e = unop(U ? Iop_16Uto32 : Iop_16Sto32,
   14779                              binop(Iop_GetElem16x4,
   14780                                    getDRegI64(rN),
   14781                                    mkU8(index)));
   14782             if (isT)
   14783                putIRegT(rT, e, condT);
   14784             else
   14785                putIRegA(rT, e, condT, Ijk_Boring);
   14786             DIP("vmov%s.%c16 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
   14787                   rT, rN, index);
   14788             goto decode_success_vfp;
   14789          }
   14790          else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0) && U == 0) {
   14791             index = (opc >> 2) & 1;
   14792             IRExpr* e = binop(Iop_GetElem32x2, getDRegI64(rN), mkU8(index));
   14793             if (isT)
   14794                putIRegT(rT, e, condT);
   14795             else
   14796                putIRegA(rT, e, condT, Ijk_Boring);
   14797             DIP("vmov%s.32 r%u, d%u[%u]\n", nCC(conq), rT, rN, index);
   14798             goto decode_success_vfp;
   14799          } else {
   14800             /* fall through */
   14801          }
   14802       }
   14803    }
   14804 
   14805    // VMOV.F32 sD, #imm
   14806    // FCONSTS sD, #imm
   14807    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   14808        && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,0)) {
   14809       UInt rD   = (INSN(15,12) << 1) | INSN(22,22);
   14810       UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
   14811       UInt b    = (imm8 >> 6) & 1;
   14812       UInt imm;
   14813       imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,(imm8 >> 5) & 1) << 8)
   14814              | ((imm8 & 0x1f) << 3);
   14815       imm <<= 16;
   14816       putFReg(rD, unop(Iop_ReinterpI32asF32, mkU32(imm)), condT);
   14817       DIP("fconsts%s s%u #%u", nCC(conq), rD, imm8);
   14818       goto decode_success_vfp;
   14819    }
   14820 
   14821    // VMOV.F64 dD, #imm
   14822    // FCONSTD dD, #imm
   14823    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   14824        && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,1)) {
   14825       UInt rD   = INSN(15,12) | (INSN(22,22) << 4);
   14826       UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
   14827       UInt b    = (imm8 >> 6) & 1;
   14828       ULong imm;
   14829       imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,b) << 8)
   14830              | BITS8(b,b,0,0,0,0,0,0) | (imm8 & 0x3f);
   14831       imm <<= 48;
   14832       putDReg(rD, unop(Iop_ReinterpI64asF64, mkU64(imm)), condT);
   14833       DIP("fconstd%s d%u #%u", nCC(conq), rD, imm8);
   14834       goto decode_success_vfp;
   14835    }
   14836 
   14837    /* ---------------------- vdup ------------------------- */
   14838    // VDUP dD, rT
   14839    // VDUP qD, rT
   14840    if (BITS8(1,1,1,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,1))
   14841        && BITS4(1,0,1,1) == INSN(11,8) && INSN(6,6) == 0 && INSN(4,4) == 1) {
   14842       UInt rD   = (INSN(7,7) << 4) | INSN(19,16);
   14843       UInt rT   = INSN(15,12);
   14844       UInt Q    = INSN(21,21);
   14845       UInt size = (INSN(22,22) << 1) | INSN(5,5);
   14846       if (rT == 15 || (isT && rT == 13) || size == 3 || (Q && (rD & 1))) {
   14847          /* fall through */
   14848       } else {
   14849          IRExpr* e = isT ? getIRegT(rT) : getIRegA(rT);
   14850          if (Q) {
   14851             rD >>= 1;
   14852             switch (size) {
   14853                case 0:
   14854                   putQReg(rD, unop(Iop_Dup32x4, e), condT);
   14855                   break;
   14856                case 1:
   14857                   putQReg(rD, unop(Iop_Dup16x8, unop(Iop_32to16, e)),
   14858                               condT);
   14859                   break;
   14860                case 2:
   14861                   putQReg(rD, unop(Iop_Dup8x16, unop(Iop_32to8, e)),
   14862                               condT);
   14863                   break;
   14864                default:
   14865                   vassert(0);
   14866             }
   14867             DIP("vdup.%d q%u, r%u\n", 32 / (1<<size), rD, rT);
   14868          } else {
   14869             switch (size) {
   14870                case 0:
   14871                   putDRegI64(rD, unop(Iop_Dup32x2, e), condT);
   14872                   break;
   14873                case 1:
   14874                   putDRegI64(rD, unop(Iop_Dup16x4, unop(Iop_32to16, e)),
   14875                                condT);
   14876                   break;
   14877                case 2:
   14878                   putDRegI64(rD, unop(Iop_Dup8x8, unop(Iop_32to8, e)),
   14879                                condT);
   14880                   break;
   14881                default:
   14882                   vassert(0);
   14883             }
   14884             DIP("vdup.%d d%u, r%u\n", 32 / (1<<size), rD, rT);
   14885          }
   14886          goto decode_success_vfp;
   14887       }
   14888    }
   14889 
   14890    /* --------------------- f{ld,st}d --------------------- */
   14891    // FLDD, FSTD
   14892    if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
   14893        && BITS4(1,0,1,1) == INSN(11,8)) {
   14894       UInt dD     = INSN(15,12) | (INSN(22,22) << 4);
   14895       UInt rN     = INSN(19,16);
   14896       UInt offset = (insn28 & 0xFF) << 2;
   14897       UInt bU     = (insn28 >> 23) & 1; /* 1: +offset  0: -offset */
   14898       UInt bL     = (insn28 >> 20) & 1; /* 1: load  0: store */
   14899       /* make unconditional */
   14900       if (condT != IRTemp_INVALID) {
   14901          if (isT)
   14902             mk_skip_over_T32_if_cond_is_false( condT );
   14903          else
   14904             mk_skip_over_A32_if_cond_is_false( condT );
   14905          condT = IRTemp_INVALID;
   14906       }
   14907       IRTemp ea = newTemp(Ity_I32);
   14908       assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
   14909                        align4if(isT ? getIRegT(rN) : getIRegA(rN),
   14910                                 rN == 15),
   14911                        mkU32(offset)));
   14912       if (bL) {
   14913          putDReg(dD, loadLE(Ity_F64,mkexpr(ea)), IRTemp_INVALID);
   14914       } else {
   14915          storeLE(mkexpr(ea), getDReg(dD));
   14916       }
   14917       DIP("f%sd%s d%u, [r%u, %c#%u]\n",
   14918           bL ? "ld" : "st", nCC(conq), dD, rN,
   14919           bU ? '+' : '-', offset);
   14920       goto decode_success_vfp;
   14921    }
   14922 
   14923    /* --------------------- dp insns (D) --------------------- */
   14924    if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
   14925        && BITS4(1,0,1,1) == INSN(11,8)
   14926        && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
   14927       UInt    dM  = INSN(3,0)   | (INSN(5,5) << 4);       /* argR */
   14928       UInt    dD  = INSN(15,12) | (INSN(22,22) << 4);   /* dst/acc */
   14929       UInt    dN  = INSN(19,16) | (INSN(7,7) << 4);     /* argL */
   14930       UInt    bP  = (insn28 >> 23) & 1;
   14931       UInt    bQ  = (insn28 >> 21) & 1;
   14932       UInt    bR  = (insn28 >> 20) & 1;
   14933       UInt    bS  = (insn28 >> 6) & 1;
   14934       UInt    opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
   14935       IRExpr* rm  = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
   14936       switch (opc) {
   14937          case BITS4(0,0,0,0): /* MAC: d + n * m */
   14938             putDReg(dD, triop(Iop_AddF64, rm,
   14939                               getDReg(dD),
   14940                               triop(Iop_MulF64, rm, getDReg(dN),
   14941                                                     getDReg(dM))),
   14942                         condT);
   14943             DIP("fmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   14944             goto decode_success_vfp;
   14945          case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
   14946             putDReg(dD, triop(Iop_AddF64, rm,
   14947                               getDReg(dD),
   14948                               unop(Iop_NegF64,
   14949                                    triop(Iop_MulF64, rm, getDReg(dN),
   14950                                                          getDReg(dM)))),
   14951                         condT);
   14952             DIP("fnmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   14953             goto decode_success_vfp;
   14954          case BITS4(0,0,1,0): /* MSC: - d + n * m */
   14955             putDReg(dD, triop(Iop_AddF64, rm,
   14956                               unop(Iop_NegF64, getDReg(dD)),
   14957                               triop(Iop_MulF64, rm, getDReg(dN),
   14958                                                     getDReg(dM))),
   14959                         condT);
   14960             DIP("fmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   14961             goto decode_success_vfp;
   14962          case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
   14963             putDReg(dD, triop(Iop_AddF64, rm,
   14964                               unop(Iop_NegF64, getDReg(dD)),
   14965                               unop(Iop_NegF64,
   14966                                    triop(Iop_MulF64, rm, getDReg(dN),
   14967                                                          getDReg(dM)))),
   14968                         condT);
   14969             DIP("fnmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   14970             goto decode_success_vfp;
   14971          case BITS4(0,1,0,0): /* MUL: n * m */
   14972             putDReg(dD, triop(Iop_MulF64, rm, getDReg(dN), getDReg(dM)),
   14973                         condT);
   14974             DIP("fmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   14975             goto decode_success_vfp;
   14976          case BITS4(0,1,0,1): /* NMUL: - n * m */
   14977             putDReg(dD, unop(Iop_NegF64,
   14978                              triop(Iop_MulF64, rm, getDReg(dN),
   14979                                                    getDReg(dM))),
   14980                     condT);
   14981             DIP("fnmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   14982             goto decode_success_vfp;
   14983          case BITS4(0,1,1,0): /* ADD: n + m */
   14984             putDReg(dD, triop(Iop_AddF64, rm, getDReg(dN), getDReg(dM)),
   14985                         condT);
   14986             DIP("faddd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   14987             goto decode_success_vfp;
   14988          case BITS4(0,1,1,1): /* SUB: n - m */
   14989             putDReg(dD, triop(Iop_SubF64, rm, getDReg(dN), getDReg(dM)),
   14990                         condT);
   14991             DIP("fsubd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   14992             goto decode_success_vfp;
   14993          case BITS4(1,0,0,0): /* DIV: n / m */
   14994             putDReg(dD, triop(Iop_DivF64, rm, getDReg(dN), getDReg(dM)),
   14995                         condT);
   14996             DIP("fdivd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   14997             goto decode_success_vfp;
   14998          case BITS4(1,0,1,0): /* VNFMS: -(d - n * m) (fused) */
   14999             /* XXXROUNDINGFIXME look up ARM reference for fused
   15000                multiply-add rounding */
   15001             putDReg(dD, triop(Iop_AddF64, rm,
   15002                               unop(Iop_NegF64, getDReg(dD)),
   15003                               triop(Iop_MulF64, rm,
   15004                                                 getDReg(dN),
   15005                                                 getDReg(dM))),
   15006                         condT);
   15007             DIP("vfnmsd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   15008             goto decode_success_vfp;
   15009          case BITS4(1,0,1,1): /* VNFMA: -(d + n * m) (fused) */
   15010             /* XXXROUNDINGFIXME look up ARM reference for fused
   15011                multiply-add rounding */
   15012             putDReg(dD, triop(Iop_AddF64, rm,
   15013                               unop(Iop_NegF64, getDReg(dD)),
   15014                               triop(Iop_MulF64, rm,
   15015                                                 unop(Iop_NegF64, getDReg(dN)),
   15016                                                 getDReg(dM))),
   15017                         condT);
   15018             DIP("vfnmad%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   15019             goto decode_success_vfp;
   15020          case BITS4(1,1,0,0): /* VFMA: d + n * m (fused) */
   15021             /* XXXROUNDINGFIXME look up ARM reference for fused
   15022                multiply-add rounding */
   15023             putDReg(dD, triop(Iop_AddF64, rm,
   15024                               getDReg(dD),
   15025                               triop(Iop_MulF64, rm, getDReg(dN),
   15026                                                     getDReg(dM))),
   15027                         condT);
   15028             DIP("vfmad%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   15029             goto decode_success_vfp;
   15030          case BITS4(1,1,0,1): /* VFMS: d + (-n * m) (fused) */
   15031             /* XXXROUNDINGFIXME look up ARM reference for fused
   15032                multiply-add rounding */
   15033             putDReg(dD, triop(Iop_AddF64, rm,
   15034                               getDReg(dD),
   15035                               triop(Iop_MulF64, rm,
   15036                                     unop(Iop_NegF64, getDReg(dN)),
   15037                                     getDReg(dM))),
   15038                         condT);
   15039             DIP("vfmsd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
   15040             goto decode_success_vfp;
   15041          default:
   15042             break;
   15043       }
   15044    }
   15045 
   15046    /* --------------------- compares (D) --------------------- */
   15047    /*          31   27   23   19   15 11   7    3
   15048                  28   24   20   16 12    8    4    0
   15049       FCMPD    cond 1110 1D11 0100 Dd 1011 0100 Dm
   15050       FCMPED   cond 1110 1D11 0100 Dd 1011 1100 Dm
   15051       FCMPZD   cond 1110 1D11 0101 Dd 1011 0100 0000
   15052       FCMPZED  cond 1110 1D11 0101 Dd 1011 1100 0000
   15053                                  Z         N
   15054 
   15055       Z=0 Compare Dd vs Dm     and set FPSCR 31:28 accordingly
   15056       Z=1 Compare Dd vs zero
   15057 
   15058       N=1 generates Invalid Operation exn if either arg is any kind of NaN
   15059       N=0 generates Invalid Operation exn if either arg is a signalling NaN
   15060       (Not that we pay any attention to N here)
   15061    */
   15062    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   15063        && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
   15064        && BITS4(1,0,1,1) == INSN(11,8)
   15065        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   15066       UInt bZ = (insn28 >> 16) & 1;
   15067       UInt bN = (insn28 >> 7) & 1;
   15068       UInt dD = INSN(15,12) | (INSN(22,22) << 4);
   15069       UInt dM = INSN(3,0) | (INSN(5,5) << 4);
   15070       if (bZ && INSN(3,0) != 0) {
   15071          /* does not decode; fall through */
   15072       } else {
   15073          IRTemp argL = newTemp(Ity_F64);
   15074          IRTemp argR = newTemp(Ity_F64);
   15075          IRTemp irRes = newTemp(Ity_I32);
   15076          assign(argL, getDReg(dD));
   15077          assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0)) : getDReg(dM));
   15078          assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
   15079 
   15080          IRTemp nzcv     = IRTemp_INVALID;
   15081          IRTemp oldFPSCR = newTemp(Ity_I32);
   15082          IRTemp newFPSCR = newTemp(Ity_I32);
   15083 
   15084          /* This is where the fun starts.  We have to convert 'irRes'
   15085             from an IR-convention return result (IRCmpF64Result) to an
   15086             ARM-encoded (N,Z,C,V) group.  The final result is in the
   15087             bottom 4 bits of 'nzcv'. */
   15088          /* Map compare result from IR to ARM(nzcv) */
   15089          /*
   15090             FP cmp result | IR   | ARM(nzcv)
   15091             --------------------------------
   15092             UN              0x45   0011
   15093             LT              0x01   1000
   15094             GT              0x00   0010
   15095             EQ              0x40   0110
   15096          */
   15097          nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
   15098 
   15099          /* And update FPSCR accordingly */
   15100          assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
   15101          assign(newFPSCR,
   15102                 binop(Iop_Or32,
   15103                       binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
   15104                       binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
   15105 
   15106          putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
   15107 
   15108          if (bZ) {
   15109             DIP("fcmpz%sd%s d%u\n", bN ? "e" : "", nCC(conq), dD);
   15110          } else {
   15111             DIP("fcmp%sd%s d%u, d%u\n", bN ? "e" : "", nCC(conq), dD, dM);
   15112          }
   15113          goto decode_success_vfp;
   15114       }
   15115       /* fall through */
   15116    }
   15117 
   15118    /* --------------------- unary (D) --------------------- */
   15119    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   15120        && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
   15121        && BITS4(1,0,1,1) == INSN(11,8)
   15122        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   15123       UInt dD  = INSN(15,12) | (INSN(22,22) << 4);
   15124       UInt dM  = INSN(3,0) | (INSN(5,5) << 4);
   15125       UInt b16 = (insn28 >> 16) & 1;
   15126       UInt b7  = (insn28 >> 7) & 1;
   15127       /**/ if (b16 == 0 && b7 == 0) {
   15128          // FCPYD
   15129          putDReg(dD, getDReg(dM), condT);
   15130          DIP("fcpyd%s d%u, d%u\n", nCC(conq), dD, dM);
   15131          goto decode_success_vfp;
   15132       }
   15133       else if (b16 == 0 && b7 == 1) {
   15134          // FABSD
   15135          putDReg(dD, unop(Iop_AbsF64, getDReg(dM)), condT);
   15136          DIP("fabsd%s d%u, d%u\n", nCC(conq), dD, dM);
   15137          goto decode_success_vfp;
   15138       }
   15139       else if (b16 == 1 && b7 == 0) {
   15140          // FNEGD
   15141          putDReg(dD, unop(Iop_NegF64, getDReg(dM)), condT);
   15142          DIP("fnegd%s d%u, d%u\n", nCC(conq), dD, dM);
   15143          goto decode_success_vfp;
   15144       }
   15145       else if (b16 == 1 && b7 == 1) {
   15146          // FSQRTD
   15147          IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
   15148          putDReg(dD, binop(Iop_SqrtF64, rm, getDReg(dM)), condT);
   15149          DIP("fsqrtd%s d%u, d%u\n", nCC(conq), dD, dM);
   15150          goto decode_success_vfp;
   15151       }
   15152       else
   15153          vassert(0);
   15154 
   15155       /* fall through */
   15156    }
   15157 
   15158    /* ----------------- I <-> D conversions ----------------- */
   15159 
   15160    // F{S,U}ITOD dD, fM
   15161    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   15162        && BITS4(1,0,0,0) == (INSN(19,16) & BITS4(1,1,1,1))
   15163        && BITS4(1,0,1,1) == INSN(11,8)
   15164        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   15165       UInt bM    = (insn28 >> 5) & 1;
   15166       UInt fM    = (INSN(3,0) << 1) | bM;
   15167       UInt dD    = INSN(15,12) | (INSN(22,22) << 4);
   15168       UInt syned = (insn28 >> 7) & 1;
   15169       if (syned) {
   15170          // FSITOD
   15171          putDReg(dD, unop(Iop_I32StoF64,
   15172                           unop(Iop_ReinterpF32asI32, getFReg(fM))),
   15173                  condT);
   15174          DIP("fsitod%s d%u, s%u\n", nCC(conq), dD, fM);
   15175       } else {
   15176          // FUITOD
   15177          putDReg(dD, unop(Iop_I32UtoF64,
   15178                           unop(Iop_ReinterpF32asI32, getFReg(fM))),
   15179                  condT);
   15180          DIP("fuitod%s d%u, s%u\n", nCC(conq), dD, fM);
   15181       }
   15182       goto decode_success_vfp;
   15183    }
   15184 
   15185    // FTO{S,U}ID fD, dM
   15186    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   15187        && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
   15188        && BITS4(1,0,1,1) == INSN(11,8)
   15189        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   15190       UInt   bD    = (insn28 >> 22) & 1;
   15191       UInt   fD    = (INSN(15,12) << 1) | bD;
   15192       UInt   dM    = INSN(3,0) | (INSN(5,5) << 4);
   15193       UInt   bZ    = (insn28 >> 7) & 1;
   15194       UInt   syned = (insn28 >> 16) & 1;
   15195       IRTemp rmode = newTemp(Ity_I32);
   15196       assign(rmode, bZ ? mkU32(Irrm_ZERO)
   15197                        : mkexpr(mk_get_IR_rounding_mode()));
   15198       if (syned) {
   15199          // FTOSID
   15200          putFReg(fD, unop(Iop_ReinterpI32asF32,
   15201                           binop(Iop_F64toI32S, mkexpr(rmode),
   15202                                 getDReg(dM))),
   15203                  condT);
   15204          DIP("ftosi%sd%s s%u, d%u\n", bZ ? "z" : "",
   15205              nCC(conq), fD, dM);
   15206       } else {
   15207          // FTOUID
   15208          putFReg(fD, unop(Iop_ReinterpI32asF32,
   15209                           binop(Iop_F64toI32U, mkexpr(rmode),
   15210                                 getDReg(dM))),
   15211                  condT);
   15212          DIP("ftoui%sd%s s%u, d%u\n", bZ ? "z" : "",
   15213              nCC(conq), fD, dM);
   15214       }
   15215       goto decode_success_vfp;
   15216    }
   15217 
   15218    /* ----------------------------------------------------------- */
   15219    /* -- VFP instructions -- single precision                  -- */
   15220    /* ----------------------------------------------------------- */
   15221 
   15222    /* --------------------- fldms, fstms --------------------- */
   15223    /*
   15224                                  31   27   23   19 15 11   7   0
   15225                                          P UDWL
   15226       C4-98, C5-26   1  FSTMD    cond 1100 1x00 Rn Fd 1010 offset
   15227       C4-98, C5-28   2  FSTMDIA  cond 1100 1x10 Rn Fd 1010 offset
   15228       C4-98, C5-30   3  FSTMDDB  cond 1101 0x10 Rn Fd 1010 offset
   15229 
   15230       C4-40, C5-26   1  FLDMD    cond 1100 1x01 Rn Fd 1010 offset
   15231       C4-40, C5-26   2  FLDMIAD  cond 1100 1x11 Rn Fd 1010 offset
   15232       C4-40, C5-26   3  FLDMDBD  cond 1101 0x11 Rn Fd 1010 offset
   15233 
   15234       Regs transferred: F(Fd:D) .. F(Fd:d + offset)
   15235       offset must not imply a reg > 15
   15236       IA/DB: Rn is changed by (4 x # regs transferred)
   15237 
   15238       case coding:
   15239          1  at-Rn   (access at Rn)
   15240          2  ia-Rn   (access at Rn, then Rn += 4n)
   15241          3  db-Rn   (Rn -= 4n,     then access at Rn)
   15242    */
   15243    if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
   15244        && INSN(11,8) == BITS4(1,0,1,0)) {
   15245       UInt bP      = (insn28 >> 24) & 1;
   15246       UInt bU      = (insn28 >> 23) & 1;
   15247       UInt bW      = (insn28 >> 21) & 1;
   15248       UInt bL      = (insn28 >> 20) & 1;
   15249       UInt bD      = (insn28 >> 22) & 1;
   15250       UInt offset  = (insn28 >> 0) & 0xFF;
   15251       UInt rN      = INSN(19,16);
   15252       UInt fD      = (INSN(15,12) << 1) | bD;
   15253       UInt nRegs   = offset;
   15254       UInt summary = 0;
   15255       Int  i;
   15256 
   15257       /**/ if (bP == 0 && bU == 1 && bW == 0) {
   15258          summary = 1;
   15259       }
   15260       else if (bP == 0 && bU == 1 && bW == 1) {
   15261          summary = 2;
   15262       }
   15263       else if (bP == 1 && bU == 0 && bW == 1) {
   15264          summary = 3;
   15265       }
   15266       else goto after_vfp_fldms_fstms;
   15267 
   15268       /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
   15269       if (rN == 15 && (summary == 2 || summary == 3 || isT))
   15270          goto after_vfp_fldms_fstms;
   15271 
   15272       /* offset must specify at least one register */
   15273       if (offset < 1)
   15274          goto after_vfp_fldms_fstms;
   15275 
   15276       /* can't transfer regs after S31 */
   15277       if (fD + nRegs - 1 >= 32)
   15278          goto after_vfp_fldms_fstms;
   15279 
   15280       /* Now, we can't do a conditional load or store, since that very
   15281          likely will generate an exception.  So we have to take a side
   15282          exit at this point if the condition is false. */
   15283       if (condT != IRTemp_INVALID) {
   15284          if (isT)
   15285             mk_skip_over_T32_if_cond_is_false( condT );
   15286          else
   15287             mk_skip_over_A32_if_cond_is_false( condT );
   15288          condT = IRTemp_INVALID;
   15289       }
   15290       /* Ok, now we're unconditional.  Do the load or store. */
   15291 
   15292       /* get the old Rn value */
   15293       IRTemp rnT = newTemp(Ity_I32);
   15294       assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
   15295                            rN == 15));
   15296 
   15297       /* make a new value for Rn, post-insn */
   15298       IRTemp rnTnew = IRTemp_INVALID;
   15299       if (summary == 2 || summary == 3) {
   15300          rnTnew = newTemp(Ity_I32);
   15301          assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
   15302                               mkexpr(rnT),
   15303                               mkU32(4 * nRegs)));
   15304       }
   15305 
   15306       /* decide on the base transfer address */
   15307       IRTemp taT = newTemp(Ity_I32);
   15308       assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
   15309 
   15310       /* update Rn if necessary -- in case 3, we're moving it down, so
   15311          update before any memory reference, in order to keep Memcheck
   15312          and V's stack-extending logic (on linux) happy */
   15313       if (summary == 3) {
   15314          if (isT)
   15315             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
   15316          else
   15317             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
   15318       }
   15319 
   15320       /* generate the transfers */
   15321       for (i = 0; i < nRegs; i++) {
   15322          IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(4*i));
   15323          if (bL) {
   15324             putFReg(fD + i, loadLE(Ity_F32, addr), IRTemp_INVALID);
   15325          } else {
   15326             storeLE(addr, getFReg(fD + i));
   15327          }
   15328       }
   15329 
   15330       /* update Rn if necessary -- in case 2, we're moving it up, so
   15331          update after any memory reference, in order to keep Memcheck
   15332          and V's stack-extending logic (on linux) happy */
   15333       if (summary == 2) {
   15334          if (isT)
   15335             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
   15336          else
   15337             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
   15338       }
   15339 
   15340       const HChar* nm = bL==1 ? "ld" : "st";
   15341       switch (summary) {
   15342          case 1:  DIP("f%sms%s r%u, {s%u-s%u}\n",
   15343                       nm, nCC(conq), rN, fD, fD + nRegs - 1);
   15344                   break;
   15345          case 2:  DIP("f%smias%s r%u!, {s%u-s%u}\n",
   15346                       nm, nCC(conq), rN, fD, fD + nRegs - 1);
   15347                   break;
   15348          case 3:  DIP("f%smdbs%s r%u!, {s%u-s%u}\n",
   15349                       nm, nCC(conq), rN, fD, fD + nRegs - 1);
   15350                   break;
   15351          default: vassert(0);
   15352       }
   15353 
   15354       goto decode_success_vfp;
   15355       /* FIXME alignment constraints? */
   15356    }
   15357 
   15358   after_vfp_fldms_fstms:
   15359 
   15360    /* --------------------- fmsr, fmrs --------------------- */
   15361    if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
   15362        && BITS4(1,0,1,0) == INSN(11,8)
   15363        && BITS4(0,0,0,0) == INSN(3,0)
   15364        && BITS4(0,0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
   15365       UInt rD  = INSN(15,12);
   15366       UInt b7  = (insn28 >> 7) & 1;
   15367       UInt fN  = (INSN(19,16) << 1) | b7;
   15368       UInt b20 = (insn28 >> 20) & 1;
   15369       if (rD == 15) {
   15370          /* fall through */
   15371          /* Let's assume that no sane person would want to do
   15372             floating-point transfers to or from the program counter,
   15373             and simply decline to decode the instruction.  The ARM ARM
   15374             doesn't seem to explicitly disallow this case, though. */
   15375       } else {
   15376          if (b20) {
   15377             IRExpr* res = unop(Iop_ReinterpF32asI32, getFReg(fN));
   15378             if (isT)
   15379                putIRegT(rD, res, condT);
   15380             else
   15381                putIRegA(rD, res, condT, Ijk_Boring);
   15382             DIP("fmrs%s r%u, s%u\n", nCC(conq), rD, fN);
   15383          } else {
   15384             putFReg(fN, unop(Iop_ReinterpI32asF32,
   15385                              isT ? getIRegT(rD) : getIRegA(rD)),
   15386                         condT);
   15387             DIP("fmsr%s s%u, r%u\n", nCC(conq), fN, rD);
   15388          }
   15389          goto decode_success_vfp;
   15390       }
   15391       /* fall through */
   15392    }
   15393 
   15394    /* --------------------- f{ld,st}s --------------------- */
   15395    // FLDS, FSTS
   15396    if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
   15397        && BITS4(1,0,1,0) == INSN(11,8)) {
   15398       UInt bD     = (insn28 >> 22) & 1;
   15399       UInt fD     = (INSN(15,12) << 1) | bD;
   15400       UInt rN     = INSN(19,16);
   15401       UInt offset = (insn28 & 0xFF) << 2;
   15402       UInt bU     = (insn28 >> 23) & 1; /* 1: +offset  0: -offset */
   15403       UInt bL     = (insn28 >> 20) & 1; /* 1: load  0: store */
   15404       /* make unconditional */
   15405       if (condT != IRTemp_INVALID) {
   15406          if (isT)
   15407             mk_skip_over_T32_if_cond_is_false( condT );
   15408          else
   15409             mk_skip_over_A32_if_cond_is_false( condT );
   15410          condT = IRTemp_INVALID;
   15411       }
   15412       IRTemp ea = newTemp(Ity_I32);
   15413       assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
   15414                        align4if(isT ? getIRegT(rN) : getIRegA(rN),
   15415                                 rN == 15),
   15416                        mkU32(offset)));
   15417       if (bL) {
   15418          putFReg(fD, loadLE(Ity_F32,mkexpr(ea)), IRTemp_INVALID);
   15419       } else {
   15420          storeLE(mkexpr(ea), getFReg(fD));
   15421       }
   15422       DIP("f%ss%s s%u, [r%u, %c#%u]\n",
   15423           bL ? "ld" : "st", nCC(conq), fD, rN,
   15424           bU ? '+' : '-', offset);
   15425       goto decode_success_vfp;
   15426    }
   15427 
   15428    /* --------------------- dp insns (F) --------------------- */
   15429    if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
   15430        && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
   15431        && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
   15432       UInt    bM  = (insn28 >> 5) & 1;
   15433       UInt    bD  = (insn28 >> 22) & 1;
   15434       UInt    bN  = (insn28 >> 7) & 1;
   15435       UInt    fM  = (INSN(3,0) << 1) | bM;   /* argR */
   15436       UInt    fD  = (INSN(15,12) << 1) | bD; /* dst/acc */
   15437       UInt    fN  = (INSN(19,16) << 1) | bN; /* argL */
   15438       UInt    bP  = (insn28 >> 23) & 1;
   15439       UInt    bQ  = (insn28 >> 21) & 1;
   15440       UInt    bR  = (insn28 >> 20) & 1;
   15441       UInt    bS  = (insn28 >> 6) & 1;
   15442       UInt    opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
   15443       IRExpr* rm  = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
   15444       switch (opc) {
   15445          case BITS4(0,0,0,0): /* MAC: d + n * m */
   15446             putFReg(fD, triop(Iop_AddF32, rm,
   15447                               getFReg(fD),
   15448                               triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
   15449                         condT);
   15450             DIP("fmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   15451             goto decode_success_vfp;
   15452          case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
   15453             putFReg(fD, triop(Iop_AddF32, rm,
   15454                               getFReg(fD),
   15455                               unop(Iop_NegF32,
   15456                                    triop(Iop_MulF32, rm, getFReg(fN),
   15457                                                          getFReg(fM)))),
   15458                         condT);
   15459             DIP("fnmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   15460             goto decode_success_vfp;
   15461          case BITS4(0,0,1,0): /* MSC: - d + n * m */
   15462             putFReg(fD, triop(Iop_AddF32, rm,
   15463                               unop(Iop_NegF32, getFReg(fD)),
   15464                               triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
   15465                         condT);
   15466             DIP("fmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   15467             goto decode_success_vfp;
   15468          case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
   15469             putFReg(fD, triop(Iop_AddF32, rm,
   15470                               unop(Iop_NegF32, getFReg(fD)),
   15471                               unop(Iop_NegF32,
   15472                                    triop(Iop_MulF32, rm,
   15473                                                      getFReg(fN),
   15474                                                     getFReg(fM)))),
   15475                         condT);
   15476             DIP("fnmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   15477             goto decode_success_vfp;
   15478          case BITS4(0,1,0,0): /* MUL: n * m */
   15479             putFReg(fD, triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM)),
   15480                         condT);
   15481             DIP("fmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   15482             goto decode_success_vfp;
   15483          case BITS4(0,1,0,1): /* NMUL: - n * m */
   15484             putFReg(fD, unop(Iop_NegF32,
   15485                              triop(Iop_MulF32, rm, getFReg(fN),
   15486                                                    getFReg(fM))),
   15487                     condT);
   15488             DIP("fnmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   15489             goto decode_success_vfp;
   15490          case BITS4(0,1,1,0): /* ADD: n + m */
   15491             putFReg(fD, triop(Iop_AddF32, rm, getFReg(fN), getFReg(fM)),
   15492                         condT);
   15493             DIP("fadds%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   15494             goto decode_success_vfp;
   15495          case BITS4(0,1,1,1): /* SUB: n - m */
   15496             putFReg(fD, triop(Iop_SubF32, rm, getFReg(fN), getFReg(fM)),
   15497                         condT);
   15498             DIP("fsubs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   15499             goto decode_success_vfp;
   15500          case BITS4(1,0,0,0): /* DIV: n / m */
   15501             putFReg(fD, triop(Iop_DivF32, rm, getFReg(fN), getFReg(fM)),
   15502                         condT);
   15503             DIP("fdivs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   15504             goto decode_success_vfp;
   15505          case BITS4(1,0,1,0): /* VNFMS: -(d - n * m) (fused) */
   15506             /* XXXROUNDINGFIXME look up ARM reference for fused
   15507                multiply-add rounding */
   15508             putFReg(fD, triop(Iop_AddF32, rm,
   15509                               unop(Iop_NegF32, getFReg(fD)),
   15510                               triop(Iop_MulF32, rm,
   15511                                                 getFReg(fN),
   15512                                                 getFReg(fM))),
   15513                         condT);
   15514             DIP("vfnmss%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   15515             goto decode_success_vfp;
   15516          case BITS4(1,0,1,1): /* VNFMA: -(d + n * m) (fused) */
   15517             /* XXXROUNDINGFIXME look up ARM reference for fused
   15518                multiply-add rounding */
   15519             putFReg(fD, triop(Iop_AddF32, rm,
   15520                               unop(Iop_NegF32, getFReg(fD)),
   15521                               triop(Iop_MulF32, rm,
   15522                                                 unop(Iop_NegF32, getFReg(fN)),
   15523                                                 getFReg(fM))),
   15524                         condT);
   15525             DIP("vfnmas%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   15526             goto decode_success_vfp;
   15527          case BITS4(1,1,0,0): /* VFMA: d + n * m (fused) */
   15528             /* XXXROUNDINGFIXME look up ARM reference for fused
   15529                multiply-add rounding */
   15530             putFReg(fD, triop(Iop_AddF32, rm,
   15531                               getFReg(fD),
   15532                               triop(Iop_MulF32, rm, getFReg(fN),
   15533                                                     getFReg(fM))),
   15534                         condT);
   15535             DIP("vfmas%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   15536             goto decode_success_vfp;
   15537          case BITS4(1,1,0,1): /* VFMS: d + (-n * m) (fused) */
   15538             /* XXXROUNDINGFIXME look up ARM reference for fused
   15539                multiply-add rounding */
   15540             putFReg(fD, triop(Iop_AddF32, rm,
   15541                               getFReg(fD),
   15542                               triop(Iop_MulF32, rm,
   15543                                     unop(Iop_NegF32, getFReg(fN)),
   15544                                     getFReg(fM))),
   15545                         condT);
   15546             DIP("vfmss%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
   15547             goto decode_success_vfp;
   15548          default:
   15549             break;
   15550       }
   15551    }
   15552 
   15553    /* --------------------- compares (S) --------------------- */
   15554    /*          31   27   23   19   15 11   7    3
   15555                  28   24   20   16 12    8    4    0
   15556       FCMPS    cond 1110 1D11 0100 Fd 1010 01M0 Fm
   15557       FCMPES   cond 1110 1D11 0100 Fd 1010 11M0 Fm
   15558       FCMPZS   cond 1110 1D11 0101 Fd 1010 0100 0000
   15559       FCMPZED  cond 1110 1D11 0101 Fd 1010 1100 0000
   15560                                  Z         N
   15561 
   15562       Z=0 Compare Fd:D vs Fm:M     and set FPSCR 31:28 accordingly
   15563       Z=1 Compare Fd:D vs zero
   15564 
   15565       N=1 generates Invalid Operation exn if either arg is any kind of NaN
   15566       N=0 generates Invalid Operation exn if either arg is a signalling NaN
   15567       (Not that we pay any attention to N here)
   15568    */
   15569    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   15570        && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
   15571        && BITS4(1,0,1,0) == INSN(11,8)
   15572        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   15573       UInt bZ = (insn28 >> 16) & 1;
   15574       UInt bN = (insn28 >> 7) & 1;
   15575       UInt bD = (insn28 >> 22) & 1;
   15576       UInt bM = (insn28 >> 5) & 1;
   15577       UInt fD = (INSN(15,12) << 1) | bD;
   15578       UInt fM = (INSN(3,0) << 1) | bM;
   15579       if (bZ && (INSN(3,0) != 0 || (INSN(7,4) & 3) != 0)) {
   15580          /* does not decode; fall through */
   15581       } else {
   15582          IRTemp argL = newTemp(Ity_F64);
   15583          IRTemp argR = newTemp(Ity_F64);
   15584          IRTemp irRes = newTemp(Ity_I32);
   15585 
   15586          assign(argL, unop(Iop_F32toF64, getFReg(fD)));
   15587          assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0))
   15588                          : unop(Iop_F32toF64, getFReg(fM)));
   15589          assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
   15590 
   15591          IRTemp nzcv     = IRTemp_INVALID;
   15592          IRTemp oldFPSCR = newTemp(Ity_I32);
   15593          IRTemp newFPSCR = newTemp(Ity_I32);
   15594 
   15595          /* This is where the fun starts.  We have to convert 'irRes'
   15596             from an IR-convention return result (IRCmpF64Result) to an
   15597             ARM-encoded (N,Z,C,V) group.  The final result is in the
   15598             bottom 4 bits of 'nzcv'. */
   15599          /* Map compare result from IR to ARM(nzcv) */
   15600          /*
   15601             FP cmp result | IR   | ARM(nzcv)
   15602             --------------------------------
   15603             UN              0x45   0011
   15604             LT              0x01   1000
   15605             GT              0x00   0010
   15606             EQ              0x40   0110
   15607          */
   15608          nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
   15609 
   15610          /* And update FPSCR accordingly */
   15611          assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
   15612          assign(newFPSCR,
   15613                 binop(Iop_Or32,
   15614                       binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
   15615                       binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
   15616 
   15617          putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
   15618 
   15619          if (bZ) {
   15620             DIP("fcmpz%ss%s s%u\n", bN ? "e" : "", nCC(conq), fD);
   15621          } else {
   15622             DIP("fcmp%ss%s s%u, s%u\n", bN ? "e" : "",
   15623                 nCC(conq), fD, fM);
   15624          }
   15625          goto decode_success_vfp;
   15626       }
   15627       /* fall through */
   15628    }
   15629 
   15630    /* --------------------- unary (S) --------------------- */
   15631    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   15632        && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
   15633        && BITS4(1,0,1,0) == INSN(11,8)
   15634        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   15635       UInt bD = (insn28 >> 22) & 1;
   15636       UInt bM = (insn28 >> 5) & 1;
   15637       UInt fD  = (INSN(15,12) << 1) | bD;
   15638       UInt fM  = (INSN(3,0) << 1) | bM;
   15639       UInt b16 = (insn28 >> 16) & 1;
   15640       UInt b7  = (insn28 >> 7) & 1;
   15641       /**/ if (b16 == 0 && b7 == 0) {
   15642          // FCPYS
   15643          putFReg(fD, getFReg(fM), condT);
   15644          DIP("fcpys%s s%u, s%u\n", nCC(conq), fD, fM);
   15645          goto decode_success_vfp;
   15646       }
   15647       else if (b16 == 0 && b7 == 1) {
   15648          // FABSS
   15649          putFReg(fD, unop(Iop_AbsF32, getFReg(fM)), condT);
   15650          DIP("fabss%s s%u, s%u\n", nCC(conq), fD, fM);
   15651          goto decode_success_vfp;
   15652       }
   15653       else if (b16 == 1 && b7 == 0) {
   15654          // FNEGS
   15655          putFReg(fD, unop(Iop_NegF32, getFReg(fM)), condT);
   15656          DIP("fnegs%s s%u, s%u\n", nCC(conq), fD, fM);
   15657          goto decode_success_vfp;
   15658       }
   15659       else if (b16 == 1 && b7 == 1) {
   15660          // FSQRTS
   15661          IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
   15662          putFReg(fD, binop(Iop_SqrtF32, rm, getFReg(fM)), condT);
   15663          DIP("fsqrts%s s%u, s%u\n", nCC(conq), fD, fM);
   15664          goto decode_success_vfp;
   15665       }
   15666       else
   15667          vassert(0);
   15668 
   15669       /* fall through */
   15670    }
   15671 
   15672    /* ----------------- I <-> S conversions ----------------- */
   15673 
   15674    // F{S,U}ITOS fD, fM
   15675    /* These are more complex than FSITOD/FUITOD.  In the D cases, a 32
   15676       bit int will always fit within the 53 bit mantissa, so there's
   15677       no possibility of a loss of precision, but that's obviously not
   15678       the case here.  Hence this case possibly requires rounding, and
   15679       so it drags in the current rounding mode. */
   15680    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   15681        && BITS4(1,0,0,0) == INSN(19,16)
   15682        && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
   15683        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   15684       UInt bM    = (insn28 >> 5) & 1;
   15685       UInt bD    = (insn28 >> 22) & 1;
   15686       UInt fM    = (INSN(3,0) << 1) | bM;
   15687       UInt fD    = (INSN(15,12) << 1) | bD;
   15688       UInt syned = (insn28 >> 7) & 1;
   15689       IRTemp rmode = newTemp(Ity_I32);
   15690       assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
   15691       if (syned) {
   15692          // FSITOS
   15693          putFReg(fD, binop(Iop_F64toF32,
   15694                            mkexpr(rmode),
   15695                            unop(Iop_I32StoF64,
   15696                                 unop(Iop_ReinterpF32asI32, getFReg(fM)))),
   15697                  condT);
   15698          DIP("fsitos%s s%u, s%u\n", nCC(conq), fD, fM);
   15699       } else {
   15700          // FUITOS
   15701          putFReg(fD, binop(Iop_F64toF32,
   15702                            mkexpr(rmode),
   15703                            unop(Iop_I32UtoF64,
   15704                                 unop(Iop_ReinterpF32asI32, getFReg(fM)))),
   15705                  condT);
   15706          DIP("fuitos%s s%u, s%u\n", nCC(conq), fD, fM);
   15707       }
   15708       goto decode_success_vfp;
   15709    }
   15710 
   15711    // FTO{S,U}IS fD, fM
   15712    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   15713        && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
   15714        && BITS4(1,0,1,0) == INSN(11,8)
   15715        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
   15716       UInt   bM    = (insn28 >> 5) & 1;
   15717       UInt   bD    = (insn28 >> 22) & 1;
   15718       UInt   fD    = (INSN(15,12) << 1) | bD;
   15719       UInt   fM    = (INSN(3,0) << 1) | bM;
   15720       UInt   bZ    = (insn28 >> 7) & 1;
   15721       UInt   syned = (insn28 >> 16) & 1;
   15722       IRTemp rmode = newTemp(Ity_I32);
   15723       assign(rmode, bZ ? mkU32(Irrm_ZERO)
   15724                        : mkexpr(mk_get_IR_rounding_mode()));
   15725       if (syned) {
   15726          // FTOSIS
   15727          putFReg(fD, unop(Iop_ReinterpI32asF32,
   15728                           binop(Iop_F64toI32S, mkexpr(rmode),
   15729                                 unop(Iop_F32toF64, getFReg(fM)))),
   15730                  condT);
   15731          DIP("ftosi%ss%s s%u, d%u\n", bZ ? "z" : "",
   15732              nCC(conq), fD, fM);
   15733          goto decode_success_vfp;
   15734       } else {
   15735          // FTOUIS
   15736          putFReg(fD, unop(Iop_ReinterpI32asF32,
   15737                           binop(Iop_F64toI32U, mkexpr(rmode),
   15738                                 unop(Iop_F32toF64, getFReg(fM)))),
   15739                  condT);
   15740          DIP("ftoui%ss%s s%u, d%u\n", bZ ? "z" : "",
   15741              nCC(conq), fD, fM);
   15742          goto decode_success_vfp;
   15743       }
   15744    }
   15745 
   15746    /* ----------------- S <-> D conversions ----------------- */
   15747 
   15748    // FCVTDS
   15749    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   15750        && BITS4(0,1,1,1) == INSN(19,16)
   15751        && BITS4(1,0,1,0) == INSN(11,8)
   15752        && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
   15753       UInt dD = INSN(15,12) | (INSN(22,22) << 4);
   15754       UInt bM = (insn28 >> 5) & 1;
   15755       UInt fM = (INSN(3,0) << 1) | bM;
   15756       putDReg(dD, unop(Iop_F32toF64, getFReg(fM)), condT);
   15757       DIP("fcvtds%s d%u, s%u\n", nCC(conq), dD, fM);
   15758       goto decode_success_vfp;
   15759    }
   15760 
   15761    // FCVTSD
   15762    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   15763        && BITS4(0,1,1,1) == INSN(19,16)
   15764        && BITS4(1,0,1,1) == INSN(11,8)
   15765        && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
   15766       UInt   bD    = (insn28 >> 22) & 1;
   15767       UInt   fD    = (INSN(15,12) << 1) | bD;
   15768       UInt   dM    = INSN(3,0) | (INSN(5,5) << 4);
   15769       IRTemp rmode = newTemp(Ity_I32);
   15770       assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
   15771       putFReg(fD, binop(Iop_F64toF32, mkexpr(rmode), getDReg(dM)),
   15772                   condT);
   15773       DIP("fcvtsd%s s%u, d%u\n", nCC(conq), fD, dM);
   15774       goto decode_success_vfp;
   15775    }
   15776 
   15777    /* --------------- VCVT fixed<->floating, VFP --------------- */
   15778    /*          31   27   23   19   15 11   7    3
   15779                  28   24   20   16 12    8    4    0
   15780 
   15781                cond 1110 1D11 1p1U Vd 101f x1i0 imm4
   15782 
   15783       VCVT<c>.<Td>.F64 <Dd>, <Dd>, #fbits
   15784       VCVT<c>.<Td>.F32 <Dd>, <Dd>, #fbits
   15785       VCVT<c>.F64.<Td> <Dd>, <Dd>, #fbits
   15786       VCVT<c>.F32.<Td> <Dd>, <Dd>, #fbits
   15787       are of this form.  We only handle a subset of the cases though.
   15788    */
   15789    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   15790        && BITS4(1,0,1,0) == (INSN(19,16) & BITS4(1,0,1,0))
   15791        && BITS3(1,0,1) == INSN(11,9)
   15792        && BITS3(1,0,0) == (INSN(6,4) & BITS3(1,0,1))) {
   15793       UInt bD        = INSN(22,22);
   15794       UInt bOP       = INSN(18,18);
   15795       UInt bU        = INSN(16,16);
   15796       UInt Vd        = INSN(15,12);
   15797       UInt bSF       = INSN(8,8);
   15798       UInt bSX       = INSN(7,7);
   15799       UInt bI        = INSN(5,5);
   15800       UInt imm4      = INSN(3,0);
   15801       Bool to_fixed  = bOP == 1;
   15802       Bool dp_op     = bSF == 1;
   15803       Bool unsyned   = bU == 1;
   15804       UInt size      = bSX == 0 ? 16 : 32;
   15805       Int  frac_bits = size - ((imm4 << 1) | bI);
   15806       UInt d         = dp_op  ? ((bD << 4) | Vd)  : ((Vd << 1) | bD);
   15807 
   15808       IRExpr* rm     = mkU32(Irrm_NEAREST);
   15809       IRTemp  scale  = newTemp(Ity_F64);
   15810       assign(scale, unop(Iop_I32UtoF64, mkU32( ((UInt)1) << (frac_bits-1) )));
   15811 
   15812       if (frac_bits >= 1 && frac_bits <= 32 && !to_fixed && !dp_op
   15813                                             && size == 32) {
   15814          /* VCVT.F32.{S,U}32 S[d], S[d], #frac_bits */
   15815          /* This generates really horrible code.  We could potentially
   15816             do much better. */
   15817          IRTemp rmode = newTemp(Ity_I32);
   15818          assign(rmode, mkU32(Irrm_NEAREST)); // per the spec
   15819          IRTemp src32 = newTemp(Ity_I32);
   15820          assign(src32,  unop(Iop_ReinterpF32asI32, getFReg(d)));
   15821          IRExpr* as_F64 = unop( unsyned ? Iop_I32UtoF64 : Iop_I32StoF64,
   15822                                 mkexpr(src32 ) );
   15823          IRExpr* resF64 = triop(Iop_DivF64,
   15824                                 rm, as_F64,
   15825                                 triop(Iop_AddF64, rm, mkexpr(scale),
   15826                                                       mkexpr(scale)));
   15827          IRExpr* resF32 = binop(Iop_F64toF32, mkexpr(rmode), resF64);
   15828          putFReg(d, resF32, condT);
   15829          DIP("vcvt.f32.%c32, s%u, s%u, #%d\n",
   15830              unsyned ? 'u' : 's', d, d, frac_bits);
   15831          goto decode_success_vfp;
   15832       }
   15833       if (frac_bits >= 1 && frac_bits <= 32 && !to_fixed && dp_op
   15834                                             && size == 32) {
   15835          /* VCVT.F64.{S,U}32 D[d], D[d], #frac_bits */
   15836          /* This generates really horrible code.  We could potentially
   15837             do much better. */
   15838          IRTemp src32 = newTemp(Ity_I32);
   15839          assign(src32, unop(Iop_64to32, getDRegI64(d)));
   15840          IRExpr* as_F64 = unop( unsyned ? Iop_I32UtoF64 : Iop_I32StoF64,
   15841                                 mkexpr(src32 ) );
   15842          IRExpr* resF64 = triop(Iop_DivF64,
   15843                                 rm, as_F64,
   15844                                 triop(Iop_AddF64, rm, mkexpr(scale),
   15845                                                       mkexpr(scale)));
   15846          putDReg(d, resF64, condT);
   15847          DIP("vcvt.f64.%c32, d%u, d%u, #%d\n",
   15848              unsyned ? 'u' : 's', d, d, frac_bits);
   15849          goto decode_success_vfp;
   15850       }
   15851       if (frac_bits >= 1 && frac_bits <= 32 && to_fixed && dp_op
   15852                                             && size == 32) {
   15853          /* VCVT.{S,U}32.F64 D[d], D[d], #frac_bits */
   15854          IRTemp srcF64 = newTemp(Ity_F64);
   15855          assign(srcF64, getDReg(d));
   15856          IRTemp scaledF64 = newTemp(Ity_F64);
   15857          assign(scaledF64, triop(Iop_MulF64,
   15858                                  rm, mkexpr(srcF64),
   15859                                  triop(Iop_AddF64, rm, mkexpr(scale),
   15860                                                        mkexpr(scale))));
   15861          IRTemp rmode = newTemp(Ity_I32);
   15862          assign(rmode, mkU32(Irrm_ZERO)); // as per the spec
   15863          IRTemp asI32 = newTemp(Ity_I32);
   15864          assign(asI32, binop(unsyned ? Iop_F64toI32U : Iop_F64toI32S,
   15865                              mkexpr(rmode), mkexpr(scaledF64)));
   15866          putDRegI64(d, unop(unsyned ? Iop_32Uto64 : Iop_32Sto64,
   15867                             mkexpr(asI32)), condT);
   15868 
   15869          DIP("vcvt.%c32.f64, d%u, d%u, #%d\n",
   15870              unsyned ? 'u' : 's', d, d, frac_bits);
   15871          goto decode_success_vfp;
   15872       }
   15873       if (frac_bits >= 1 && frac_bits <= 32 && to_fixed && !dp_op
   15874                                             && size == 32) {
   15875          /* VCVT.{S,U}32.F32 S[d], S[d], #frac_bits */
   15876          IRTemp srcF32 = newTemp(Ity_F32);
   15877          assign(srcF32, getFReg(d));
   15878          IRTemp scaledF64 = newTemp(Ity_F64);
   15879          assign(scaledF64, triop(Iop_MulF64,
   15880                                  rm, unop(Iop_F32toF64, mkexpr(srcF32)),
   15881                                  triop(Iop_AddF64, rm, mkexpr(scale),
   15882                                                        mkexpr(scale))));
   15883          IRTemp rmode = newTemp(Ity_I32);
   15884          assign(rmode, mkU32(Irrm_ZERO)); // as per the spec
   15885          IRTemp asI32 = newTemp(Ity_I32);
   15886          assign(asI32, binop(unsyned ? Iop_F64toI32U : Iop_F64toI32S,
   15887                              mkexpr(rmode), mkexpr(scaledF64)));
   15888          putFReg(d, unop(Iop_ReinterpI32asF32, mkexpr(asI32)), condT);
   15889          DIP("vcvt.%c32.f32, d%u, d%u, #%d\n",
   15890              unsyned ? 'u' : 's', d, d, frac_bits);
   15891          goto decode_success_vfp;
   15892       }
   15893       /* fall through */
   15894    }
   15895 
   15896    /* FAILURE */
   15897    return False;
   15898 
   15899   decode_success_vfp:
   15900    /* Check that any accepted insn really is a CP10 or CP11 insn, iow,
   15901       assert that we aren't accepting, in this fn, insns that actually
   15902       should be handled somewhere else. */
   15903    vassert(INSN(11,9) == BITS3(1,0,1)); // 11:8 = 1010 or 1011
   15904    return True;
   15905 
   15906 #  undef INSN
   15907 }
   15908 
   15909 
   15910 /*------------------------------------------------------------*/
   15911 /*--- Instructions in NV (never) space                     ---*/
   15912 /*------------------------------------------------------------*/
   15913 
   15914 /* ARM only */
   15915 /* Translate a NV space instruction.  If successful, returns True and
   15916    *dres may or may not be updated.  If failure, returns False and
   15917    doesn't change *dres nor create any IR.
   15918 
   15919    Note that all NEON instructions (in ARM mode) up to and including
   15920    ARMv7, but not later, are handled through here, since they are all
   15921    in NV space.
   15922 */
   15923 static Bool decode_NV_instruction_ARMv7_and_below
   15924                                  ( /*MOD*/DisResult* dres,
   15925                                     const VexArchInfo* archinfo,
   15926                                     UInt insn )
   15927 {
   15928 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   15929 #  define INSN_COND          SLICE_UInt(insn, 31, 28)
   15930 
   15931    HChar dis_buf[128];
   15932 
   15933    // Should only be called for NV instructions
   15934    vassert(BITS4(1,1,1,1) == INSN_COND);
   15935 
   15936    /* ------------------------ pld{w} ------------------------ */
   15937    if (BITS8(0,1,0,1, 0,0, 0,1) == (INSN(27,20) & BITS8(1,1,1,1, 0,0, 1,1))
   15938        && BITS4(1,1,1,1) == INSN(15,12)) {
   15939       UInt rN    = INSN(19,16);
   15940       UInt imm12 = INSN(11,0);
   15941       UInt bU    = INSN(23,23);
   15942       UInt bR    = INSN(22,22);
   15943       DIP("pld%c [r%u, #%c%u]\n", bR ? ' ' : 'w', rN, bU ? '+' : '-', imm12);
   15944       return True;
   15945    }
   15946 
   15947    if (BITS8(0,1,1,1, 0,0, 0,1) == (INSN(27,20) & BITS8(1,1,1,1, 0,0, 1,1))
   15948        && BITS4(1,1,1,1) == INSN(15,12)
   15949        && 0 == INSN(4,4)) {
   15950       UInt rN   = INSN(19,16);
   15951       UInt rM   = INSN(3,0);
   15952       UInt imm5 = INSN(11,7);
   15953       UInt sh2  = INSN(6,5);
   15954       UInt bU   = INSN(23,23);
   15955       UInt bR   = INSN(22,22);
   15956       if (rM != 15 && (rN != 15 || bR)) {
   15957          IRExpr* eaE = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
   15958                                                        sh2, imm5, dis_buf);
   15959          IRTemp eaT = newTemp(Ity_I32);
   15960          /* Bind eaE to a temp merely for debugging-vex purposes, so we
   15961             can check it's a plausible decoding.  It will get removed
   15962             by iropt a little later on. */
   15963          vassert(eaE);
   15964          assign(eaT, eaE);
   15965          DIP("pld%c %s\n", bR ? ' ' : 'w', dis_buf);
   15966          return True;
   15967       }
   15968       /* fall through */
   15969    }
   15970 
   15971    /* ------------------------ pli ------------------------ */
   15972    if (BITS8(0,1,0,0, 0, 1,0,1) == (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1))
   15973        && BITS4(1,1,1,1) == INSN(15,12)) {
   15974       UInt rN    = INSN(19,16);
   15975       UInt imm12 = INSN(11,0);
   15976       UInt bU    = INSN(23,23);
   15977       DIP("pli [r%u, #%c%u]\n", rN, bU ? '+' : '-', imm12);
   15978       return True;
   15979    }
   15980 
   15981    /* --------------------- Interworking branches --------------------- */
   15982 
   15983    // BLX (1), viz, unconditional branch and link to R15+simm24
   15984    // and set CPSR.T = 1, that is, switch to Thumb mode
   15985    if (INSN(31,25) == BITS7(1,1,1,1,1,0,1)) {
   15986       UInt bitH   = INSN(24,24);
   15987       UInt uimm24 = INSN(23,0);   uimm24 <<= 8;
   15988       Int  simm24 = (Int)uimm24;  simm24 >>= 8;
   15989       simm24 = (((UInt)simm24) << 2) + (bitH << 1);
   15990       /* Now this is a bit tricky.  Since we're decoding an ARM insn,
   15991          it is implies that CPSR.T == 0.  Hence the current insn's
   15992          address is guaranteed to be of the form X--(30)--X00.  So, no
   15993          need to mask any bits off it.  But need to set the lowest bit
   15994          to 1 to denote we're in Thumb mode after this, since
   15995          guest_R15T has CPSR.T as the lowest bit.  And we can't chase
   15996          into the call, so end the block at this point. */
   15997       UInt dst = guest_R15_curr_instr_notENC + 8 + (simm24 | 1);
   15998       putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
   15999                     IRTemp_INVALID/*because AL*/, Ijk_Boring );
   16000       llPutIReg(15, mkU32(dst));
   16001       dres->jk_StopHere = Ijk_Call;
   16002       dres->whatNext    = Dis_StopHere;
   16003       DIP("blx 0x%x (and switch to Thumb mode)\n", dst - 1);
   16004       return True;
   16005    }
   16006 
   16007    /* ------------------- v7 barrier insns ------------------- */
   16008    switch (insn) {
   16009       case 0xF57FF06F: /* ISB */
   16010          stmt( IRStmt_MBE(Imbe_Fence) );
   16011          DIP("ISB\n");
   16012          return True;
   16013       case 0xF57FF04F: /* DSB sy */
   16014       case 0xF57FF04E: /* DSB st */
   16015       case 0xF57FF04B: /* DSB ish */
   16016       case 0xF57FF04A: /* DSB ishst */
   16017       case 0xF57FF047: /* DSB nsh */
   16018       case 0xF57FF046: /* DSB nshst */
   16019       case 0xF57FF043: /* DSB osh */
   16020       case 0xF57FF042: /* DSB oshst */
   16021          stmt( IRStmt_MBE(Imbe_Fence) );
   16022          DIP("DSB\n");
   16023          return True;
   16024       case 0xF57FF05F: /* DMB sy */
   16025       case 0xF57FF05E: /* DMB st */
   16026       case 0xF57FF05B: /* DMB ish */
   16027       case 0xF57FF05A: /* DMB ishst */
   16028       case 0xF57FF057: /* DMB nsh */
   16029       case 0xF57FF056: /* DMB nshst */
   16030       case 0xF57FF053: /* DMB osh */
   16031       case 0xF57FF052: /* DMB oshst */
   16032          stmt( IRStmt_MBE(Imbe_Fence) );
   16033          DIP("DMB\n");
   16034          return True;
   16035       default:
   16036          break;
   16037    }
   16038 
   16039    /* ------------------- CLREX ------------------ */
   16040    if (insn == 0xF57FF01F) {
   16041       /* AFAICS, this simply cancels a (all?) reservations made by a
   16042          (any?) preceding LDREX(es).  Arrange to hand it through to
   16043          the back end. */
   16044       stmt( IRStmt_MBE(Imbe_CancelReservation) );
   16045       DIP("clrex\n");
   16046       return True;
   16047    }
   16048 
   16049    /* ------------------- NEON ------------------- */
   16050    if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
   16051       Bool ok_neon = decode_NEON_instruction_ARMv7_and_below(
   16052                         dres, insn, IRTemp_INVALID/*unconditional*/,
   16053                         False/*!isT*/
   16054                      );
   16055       if (ok_neon)
   16056          return True;
   16057    }
   16058 
   16059    // unrecognised
   16060    return False;
   16061 
   16062 #  undef INSN_COND
   16063 #  undef INSN
   16064 }
   16065 
   16066 
   16067 /*------------------------------------------------------------*/
   16068 /*--- Disassemble a single ARM instruction                 ---*/
   16069 /*------------------------------------------------------------*/
   16070 
   16071 /* Disassemble a single ARM instruction into IR.  The instruction is
   16072    located in host memory at guest_instr, and has (decoded) guest IP
   16073    of guest_R15_curr_instr_notENC, which will have been set before the
   16074    call here. */
   16075 
   16076 static
   16077 DisResult disInstr_ARM_WRK (
   16078              Bool         (*resteerOkFn) ( /*opaque*/void*, Addr ),
   16079              Bool         resteerCisOk,
   16080              void*        callback_opaque,
   16081              const UChar* guest_instr,
   16082              const VexArchInfo* archinfo,
   16083              const VexAbiInfo*  abiinfo,
   16084              Bool         sigill_diag
   16085           )
   16086 {
   16087    // A macro to fish bits out of 'insn'.
   16088 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   16089 #  define INSN_COND          SLICE_UInt(insn, 31, 28)
   16090 
   16091    DisResult dres;
   16092    UInt      insn;
   16093    IRTemp    condT; /* :: Ity_I32 */
   16094    UInt      summary;
   16095    HChar     dis_buf[128];  // big enough to hold LDMIA etc text
   16096 
   16097    /* Set result defaults. */
   16098    dres.whatNext    = Dis_Continue;
   16099    dres.len         = 4;
   16100    dres.continueAt  = 0;
   16101    dres.jk_StopHere = Ijk_INVALID;
   16102    dres.hint        = Dis_HintNone;
   16103 
   16104    /* Set default actions for post-insn handling of writes to r15, if
   16105       required. */
   16106    r15written = False;
   16107    r15guard   = IRTemp_INVALID; /* unconditional */
   16108    r15kind    = Ijk_Boring;
   16109 
   16110    /* At least this is simple on ARM: insns are all 4 bytes long, and
   16111       4-aligned.  So just fish the whole thing out of memory right now
   16112       and have done. */
   16113    insn = getUIntLittleEndianly( guest_instr );
   16114 
   16115    if (0) vex_printf("insn: 0x%x\n", insn);
   16116 
   16117    DIP("\t(arm) 0x%x:  ", (UInt)guest_R15_curr_instr_notENC);
   16118 
   16119    vassert(0 == (guest_R15_curr_instr_notENC & 3));
   16120 
   16121    /* ----------------------------------------------------------- */
   16122 
   16123    /* Spot "Special" instructions (see comment at top of file). */
   16124    {
   16125       const UChar* code = guest_instr;
   16126       /* Spot the 16-byte preamble:
   16127 
   16128          e1a0c1ec  mov r12, r12, ROR #3
   16129          e1a0c6ec  mov r12, r12, ROR #13
   16130          e1a0ceec  mov r12, r12, ROR #29
   16131          e1a0c9ec  mov r12, r12, ROR #19
   16132       */
   16133       UInt word1 = 0xE1A0C1EC;
   16134       UInt word2 = 0xE1A0C6EC;
   16135       UInt word3 = 0xE1A0CEEC;
   16136       UInt word4 = 0xE1A0C9EC;
   16137       if (getUIntLittleEndianly(code+ 0) == word1 &&
   16138           getUIntLittleEndianly(code+ 4) == word2 &&
   16139           getUIntLittleEndianly(code+ 8) == word3 &&
   16140           getUIntLittleEndianly(code+12) == word4) {
   16141          /* Got a "Special" instruction preamble.  Which one is it? */
   16142          if (getUIntLittleEndianly(code+16) == 0xE18AA00A
   16143                                                /* orr r10,r10,r10 */) {
   16144             /* R3 = client_request ( R4 ) */
   16145             DIP("r3 = client_request ( %%r4 )\n");
   16146             llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 ));
   16147             dres.jk_StopHere = Ijk_ClientReq;
   16148             dres.whatNext    = Dis_StopHere;
   16149             goto decode_success;
   16150          }
   16151          else
   16152          if (getUIntLittleEndianly(code+16) == 0xE18BB00B
   16153                                                /* orr r11,r11,r11 */) {
   16154             /* R3 = guest_NRADDR */
   16155             DIP("r3 = guest_NRADDR\n");
   16156             dres.len = 20;
   16157             llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
   16158             goto decode_success;
   16159          }
   16160          else
   16161          if (getUIntLittleEndianly(code+16) == 0xE18CC00C
   16162                                                /* orr r12,r12,r12 */) {
   16163             /*  branch-and-link-to-noredir R4 */
   16164             DIP("branch-and-link-to-noredir r4\n");
   16165             llPutIReg(14, mkU32( guest_R15_curr_instr_notENC + 20) );
   16166             llPutIReg(15, llGetIReg(4));
   16167             dres.jk_StopHere = Ijk_NoRedir;
   16168             dres.whatNext    = Dis_StopHere;
   16169             goto decode_success;
   16170          }
   16171          else
   16172          if (getUIntLittleEndianly(code+16) == 0xE1899009
   16173                                                /* orr r9,r9,r9 */) {
   16174             /* IR injection */
   16175             DIP("IR injection\n");
   16176             vex_inject_ir(irsb, Iend_LE);
   16177             // Invalidate the current insn. The reason is that the IRop we're
   16178             // injecting here can change. In which case the translation has to
   16179             // be redone. For ease of handling, we simply invalidate all the
   16180             // time.
   16181             stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_R15_curr_instr_notENC)));
   16182             stmt(IRStmt_Put(OFFB_CMLEN,   mkU32(20)));
   16183             llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 ));
   16184             dres.whatNext    = Dis_StopHere;
   16185             dres.jk_StopHere = Ijk_InvalICache;
   16186             goto decode_success;
   16187          }
   16188          /* We don't know what it is.  Set opc1/opc2 so decode_failure
   16189             can print the insn following the Special-insn preamble. */
   16190          insn = getUIntLittleEndianly(code+16);
   16191          goto decode_failure;
   16192          /*NOTREACHED*/
   16193       }
   16194 
   16195    }
   16196 
   16197    /* ----------------------------------------------------------- */
   16198 
   16199    /* Main ARM instruction decoder starts here. */
   16200 
   16201    /* Deal with the condition.  Strategy is to merely generate a
   16202       condition temporary at this point (or IRTemp_INVALID, meaning
   16203       unconditional).  We leave it to lower-level instruction decoders
   16204       to decide whether they can generate straight-line code, or
   16205       whether they must generate a side exit before the instruction.
   16206       condT :: Ity_I32 and is always either zero or one. */
   16207    condT = IRTemp_INVALID;
   16208    switch ( (ARMCondcode)INSN_COND ) {
   16209       case ARMCondNV: {
   16210          // Illegal instruction prior to v5 (see ARM ARM A3-5), but
   16211          // some cases are acceptable
   16212          Bool ok
   16213             = decode_NV_instruction_ARMv7_and_below(&dres, archinfo, insn);
   16214          if (ok)
   16215             goto decode_success;
   16216          else
   16217             goto after_v7_decoder;
   16218       }
   16219       case ARMCondAL: // Always executed
   16220          break;
   16221       case ARMCondEQ: case ARMCondNE: case ARMCondHS: case ARMCondLO:
   16222       case ARMCondMI: case ARMCondPL: case ARMCondVS: case ARMCondVC:
   16223       case ARMCondHI: case ARMCondLS: case ARMCondGE: case ARMCondLT:
   16224       case ARMCondGT: case ARMCondLE:
   16225          condT = newTemp(Ity_I32);
   16226          assign( condT, mk_armg_calculate_condition( INSN_COND ));
   16227          break;
   16228    }
   16229 
   16230    /* ----------------------------------------------------------- */
   16231    /* -- ARMv5 integer instructions                            -- */
   16232    /* ----------------------------------------------------------- */
   16233 
   16234    /* ---------------- Data processing ops ------------------- */
   16235 
   16236    if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0))
   16237        && !(INSN(25,25) == 0 && INSN(7,7) == 1 && INSN(4,4) == 1)) {
   16238       IRTemp  shop = IRTemp_INVALID; /* shifter operand */
   16239       IRTemp  shco = IRTemp_INVALID; /* shifter carry out */
   16240       UInt    rD   = (insn >> 12) & 0xF; /* 15:12 */
   16241       UInt    rN   = (insn >> 16) & 0xF; /* 19:16 */
   16242       UInt    bitS = (insn >> 20) & 1; /* 20:20 */
   16243       IRTemp  rNt  = IRTemp_INVALID;
   16244       IRTemp  res  = IRTemp_INVALID;
   16245       IRTemp  oldV = IRTemp_INVALID;
   16246       IRTemp  oldC = IRTemp_INVALID;
   16247       const HChar*  name = NULL;
   16248       IROp    op   = Iop_INVALID;
   16249       Bool    ok;
   16250 
   16251       switch (INSN(24,21)) {
   16252 
   16253          /* --------- ADD, SUB, AND, OR --------- */
   16254          case BITS4(0,1,0,0): /* ADD:  Rd = Rn + shifter_operand */
   16255             name = "add"; op = Iop_Add32; goto rd_eq_rn_op_SO;
   16256          case BITS4(0,0,1,0): /* SUB:  Rd = Rn - shifter_operand */
   16257             name = "sub"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
   16258          case BITS4(0,0,1,1): /* RSB:  Rd = shifter_operand - Rn */
   16259             name = "rsb"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
   16260          case BITS4(0,0,0,0): /* AND:  Rd = Rn & shifter_operand */
   16261             name = "and"; op = Iop_And32; goto rd_eq_rn_op_SO;
   16262          case BITS4(1,1,0,0): /* OR:   Rd = Rn | shifter_operand */
   16263             name = "orr"; op = Iop_Or32; goto rd_eq_rn_op_SO;
   16264          case BITS4(0,0,0,1): /* EOR:  Rd = Rn ^ shifter_operand */
   16265             name = "eor"; op = Iop_Xor32; goto rd_eq_rn_op_SO;
   16266          case BITS4(1,1,1,0): /* BIC:  Rd = Rn & ~shifter_operand */
   16267             name = "bic"; op = Iop_And32; goto rd_eq_rn_op_SO;
   16268          rd_eq_rn_op_SO: {
   16269             Bool isRSB = False;
   16270             Bool isBIC = False;
   16271             switch (INSN(24,21)) {
   16272                case BITS4(0,0,1,1):
   16273                   vassert(op == Iop_Sub32); isRSB = True; break;
   16274                case BITS4(1,1,1,0):
   16275                   vassert(op == Iop_And32); isBIC = True; break;
   16276                default:
   16277                   break;
   16278             }
   16279             rNt = newTemp(Ity_I32);
   16280             assign(rNt, getIRegA(rN));
   16281             ok = mk_shifter_operand(
   16282                     INSN(25,25), INSN(11,0),
   16283                     &shop, bitS ? &shco : NULL, dis_buf
   16284                  );
   16285             if (!ok)
   16286                break;
   16287             res = newTemp(Ity_I32);
   16288             // compute the main result
   16289             if (isRSB) {
   16290                // reverse-subtract: shifter_operand - Rn
   16291                vassert(op == Iop_Sub32);
   16292                assign(res, binop(op, mkexpr(shop), mkexpr(rNt)) );
   16293             } else if (isBIC) {
   16294                // andn: shifter_operand & ~Rn
   16295                vassert(op == Iop_And32);
   16296                assign(res, binop(op, mkexpr(rNt),
   16297                                      unop(Iop_Not32, mkexpr(shop))) );
   16298             } else {
   16299                // normal: Rn op shifter_operand
   16300                assign(res, binop(op, mkexpr(rNt), mkexpr(shop)) );
   16301             }
   16302             // but don't commit it until after we've finished
   16303             // all necessary reads from the guest state
   16304             if (bitS
   16305                 && (op == Iop_And32 || op == Iop_Or32 || op == Iop_Xor32)) {
   16306                oldV = newTemp(Ity_I32);
   16307                assign( oldV, mk_armg_calculate_flag_v() );
   16308             }
   16309             // can't safely read guest state after here
   16310             // now safe to put the main result
   16311             putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
   16312             // XXXX!! not safe to read any guest state after
   16313             // this point (I think the code below doesn't do that).
   16314             if (!bitS)
   16315                vassert(shco == IRTemp_INVALID);
   16316             /* Update the flags thunk if necessary */
   16317             if (bitS) {
   16318                vassert(shco != IRTemp_INVALID);
   16319                switch (op) {
   16320                   case Iop_Add32:
   16321                      setFlags_D1_D2( ARMG_CC_OP_ADD, rNt, shop, condT );
   16322                      break;
   16323                   case Iop_Sub32:
   16324                      if (isRSB) {
   16325                         setFlags_D1_D2( ARMG_CC_OP_SUB, shop, rNt, condT );
   16326                      } else {
   16327                         setFlags_D1_D2( ARMG_CC_OP_SUB, rNt, shop, condT );
   16328                      }
   16329                      break;
   16330                   case Iop_And32: /* BIC and AND set the flags the same */
   16331                   case Iop_Or32:
   16332                   case Iop_Xor32:
   16333                      // oldV has been read just above
   16334                      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
   16335                                         res, shco, oldV, condT );
   16336                      break;
   16337                   default:
   16338                      vassert(0);
   16339                }
   16340             }
   16341             DIP("%s%s%s r%u, r%u, %s\n",
   16342                 name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
   16343             goto decode_success;
   16344          }
   16345 
   16346          /* --------- MOV, MVN --------- */
   16347          case BITS4(1,1,0,1):   /* MOV: Rd = shifter_operand */
   16348          case BITS4(1,1,1,1): { /* MVN: Rd = not(shifter_operand) */
   16349             Bool isMVN = INSN(24,21) == BITS4(1,1,1,1);
   16350             IRTemp jk = Ijk_Boring;
   16351             if (rN != 0)
   16352                break; /* rN must be zero */
   16353             ok = mk_shifter_operand(
   16354                     INSN(25,25), INSN(11,0),
   16355                     &shop, bitS ? &shco : NULL, dis_buf
   16356                  );
   16357             if (!ok)
   16358                break;
   16359             res = newTemp(Ity_I32);
   16360             assign( res, isMVN ? unop(Iop_Not32, mkexpr(shop))
   16361                                : mkexpr(shop) );
   16362             if (bitS) {
   16363                vassert(shco != IRTemp_INVALID);
   16364                oldV = newTemp(Ity_I32);
   16365                assign( oldV, mk_armg_calculate_flag_v() );
   16366             } else {
   16367                vassert(shco == IRTemp_INVALID);
   16368             }
   16369             /* According to the Cortex A8 TRM Sec. 5.2.1, MOV PC, r14 is a
   16370                 return for purposes of branch prediction. */
   16371             if (!isMVN && INSN(11,0) == 14) {
   16372               jk = Ijk_Ret;
   16373             }
   16374             // can't safely read guest state after here
   16375             putIRegA( rD, mkexpr(res), condT, jk );
   16376             /* Update the flags thunk if necessary */
   16377             if (bitS) {
   16378                setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
   16379                                   res, shco, oldV, condT );
   16380             }
   16381             DIP("%s%s%s r%u, %s\n",
   16382                 isMVN ? "mvn" : "mov",
   16383                 nCC(INSN_COND), bitS ? "s" : "", rD, dis_buf );
   16384             goto decode_success;
   16385          }
   16386 
   16387          /* --------- CMP --------- */
   16388          case BITS4(1,0,1,0):   /* CMP:  (void) Rn - shifter_operand */
   16389          case BITS4(1,0,1,1): { /* CMN:  (void) Rn + shifter_operand */
   16390             Bool isCMN = INSN(24,21) == BITS4(1,0,1,1);
   16391             if (rD != 0)
   16392                break; /* rD must be zero */
   16393             if (bitS == 0)
   16394                break; /* if S (bit 20) is not set, it's not CMP/CMN */
   16395             rNt = newTemp(Ity_I32);
   16396             assign(rNt, getIRegA(rN));
   16397             ok = mk_shifter_operand(
   16398                     INSN(25,25), INSN(11,0),
   16399                     &shop, NULL, dis_buf
   16400                  );
   16401             if (!ok)
   16402                break;
   16403             // can't safely read guest state after here
   16404             /* Update the flags thunk. */
   16405             setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
   16406                             rNt, shop, condT );
   16407             DIP("%s%s r%u, %s\n",
   16408                 isCMN ? "cmn" : "cmp",
   16409                 nCC(INSN_COND), rN, dis_buf );
   16410             goto decode_success;
   16411          }
   16412 
   16413          /* --------- TST --------- */
   16414          case BITS4(1,0,0,0):   /* TST:  (void) Rn & shifter_operand */
   16415          case BITS4(1,0,0,1): { /* TEQ:  (void) Rn ^ shifter_operand */
   16416             Bool isTEQ = INSN(24,21) == BITS4(1,0,0,1);
   16417             if (rD != 0)
   16418                break; /* rD must be zero */
   16419             if (bitS == 0)
   16420                break; /* if S (bit 20) is not set, it's not TST/TEQ */
   16421             rNt = newTemp(Ity_I32);
   16422             assign(rNt, getIRegA(rN));
   16423             ok = mk_shifter_operand(
   16424                     INSN(25,25), INSN(11,0),
   16425                     &shop, &shco, dis_buf
   16426                  );
   16427             if (!ok)
   16428                break;
   16429             /* Update the flags thunk. */
   16430             res = newTemp(Ity_I32);
   16431             assign( res, binop(isTEQ ? Iop_Xor32 : Iop_And32,
   16432                                mkexpr(rNt), mkexpr(shop)) );
   16433             oldV = newTemp(Ity_I32);
   16434             assign( oldV, mk_armg_calculate_flag_v() );
   16435             // can't safely read guest state after here
   16436             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
   16437                                res, shco, oldV, condT );
   16438             DIP("%s%s r%u, %s\n",
   16439                 isTEQ ? "teq" : "tst",
   16440                 nCC(INSN_COND), rN, dis_buf );
   16441             goto decode_success;
   16442          }
   16443 
   16444          /* --------- ADC, SBC, RSC --------- */
   16445          case BITS4(0,1,0,1): /* ADC:  Rd = Rn + shifter_operand + oldC */
   16446             name = "adc"; goto rd_eq_rn_op_SO_op_oldC;
   16447          case BITS4(0,1,1,0): /* SBC:  Rd = Rn - shifter_operand - (oldC ^ 1) */
   16448             name = "sbc"; goto rd_eq_rn_op_SO_op_oldC;
   16449          case BITS4(0,1,1,1): /* RSC:  Rd = shifter_operand - Rn - (oldC ^ 1) */
   16450             name = "rsc"; goto rd_eq_rn_op_SO_op_oldC;
   16451          rd_eq_rn_op_SO_op_oldC: {
   16452             // FIXME: shco isn't used for anything.  Get rid of it.
   16453             rNt = newTemp(Ity_I32);
   16454             assign(rNt, getIRegA(rN));
   16455             ok = mk_shifter_operand(
   16456                     INSN(25,25), INSN(11,0),
   16457                     &shop, bitS ? &shco : NULL, dis_buf
   16458                  );
   16459             if (!ok)
   16460                break;
   16461             oldC = newTemp(Ity_I32);
   16462             assign( oldC, mk_armg_calculate_flag_c() );
   16463             res = newTemp(Ity_I32);
   16464             // compute the main result
   16465             switch (INSN(24,21)) {
   16466                case BITS4(0,1,0,1): /* ADC */
   16467                   assign(res,
   16468                          binop(Iop_Add32,
   16469                                binop(Iop_Add32, mkexpr(rNt), mkexpr(shop)),
   16470                                mkexpr(oldC) ));
   16471                   break;
   16472                case BITS4(0,1,1,0): /* SBC */
   16473                   assign(res,
   16474                          binop(Iop_Sub32,
   16475                                binop(Iop_Sub32, mkexpr(rNt), mkexpr(shop)),
   16476                                binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
   16477                   break;
   16478                case BITS4(0,1,1,1): /* RSC */
   16479                   assign(res,
   16480                          binop(Iop_Sub32,
   16481                                binop(Iop_Sub32, mkexpr(shop), mkexpr(rNt)),
   16482                                binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
   16483                   break;
   16484                default:
   16485                   vassert(0);
   16486             }
   16487             // but don't commit it until after we've finished
   16488             // all necessary reads from the guest state
   16489             // now safe to put the main result
   16490             putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
   16491             // XXXX!! not safe to read any guest state after
   16492             // this point (I think the code below doesn't do that).
   16493             if (!bitS)
   16494                vassert(shco == IRTemp_INVALID);
   16495             /* Update the flags thunk if necessary */
   16496             if (bitS) {
   16497                vassert(shco != IRTemp_INVALID);
   16498                switch (INSN(24,21)) {
   16499                   case BITS4(0,1,0,1): /* ADC */
   16500                      setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
   16501                                         rNt, shop, oldC, condT );
   16502                      break;
   16503                   case BITS4(0,1,1,0): /* SBC */
   16504                      setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
   16505                                         rNt, shop, oldC, condT );
   16506                      break;
   16507                   case BITS4(0,1,1,1): /* RSC */
   16508                      setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
   16509                                         shop, rNt, oldC, condT );
   16510                      break;
   16511                   default:
   16512                      vassert(0);
   16513                }
   16514             }
   16515             DIP("%s%s%s r%u, r%u, %s\n",
   16516                 name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
   16517             goto decode_success;
   16518          }
   16519 
   16520          default:
   16521             vassert(0);
   16522       }
   16523    } /* if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0)) */
   16524 
   16525    /* --------------------- Load/store (ubyte & word) -------- */
   16526    // LDR STR LDRB STRB
   16527    /*                 31   27   23   19 15 11    6   4 3  # highest bit
   16528                         28   24   20 16 12
   16529       A5-20   1 | 16  cond 0101 UB0L Rn Rd imm12
   16530       A5-22   1 | 32  cond 0111 UBOL Rn Rd imm5  sh2 0 Rm
   16531       A5-24   2 | 16  cond 0101 UB1L Rn Rd imm12
   16532       A5-26   2 | 32  cond 0111 UB1L Rn Rd imm5  sh2 0 Rm
   16533       A5-28   3 | 16  cond 0100 UB0L Rn Rd imm12
   16534       A5-32   3 | 32  cond 0110 UB0L Rn Rd imm5  sh2 0 Rm
   16535    */
   16536    /* case coding:
   16537              1   at-ea               (access at ea)
   16538              2   at-ea-then-upd      (access at ea, then Rn = ea)
   16539              3   at-Rn-then-upd      (access at Rn, then Rn = ea)
   16540       ea coding
   16541              16  Rn +/- imm12
   16542              32  Rn +/- Rm sh2 imm5
   16543    */
   16544    /* Quickly skip over all of this for hopefully most instructions */
   16545    if ((INSN(27,24) & BITS4(1,1,0,0)) != BITS4(0,1,0,0))
   16546       goto after_load_store_ubyte_or_word;
   16547 
   16548    summary = 0;
   16549 
   16550    /**/ if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 0) {
   16551       summary = 1 | 16;
   16552    }
   16553    else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 0
   16554                                           && INSN(4,4) == 0) {
   16555       summary = 1 | 32;
   16556    }
   16557    else if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 1) {
   16558       summary = 2 | 16;
   16559    }
   16560    else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 1
   16561                                           && INSN(4,4) == 0) {
   16562       summary = 2 | 32;
   16563    }
   16564    else if (INSN(27,24) == BITS4(0,1,0,0) && INSN(21,21) == 0) {
   16565       summary = 3 | 16;
   16566    }
   16567    else if (INSN(27,24) == BITS4(0,1,1,0) && INSN(21,21) == 0
   16568                                           && INSN(4,4) == 0) {
   16569       summary = 3 | 32;
   16570    }
   16571    else goto after_load_store_ubyte_or_word;
   16572 
   16573    { UInt rN = (insn >> 16) & 0xF; /* 19:16 */
   16574      UInt rD = (insn >> 12) & 0xF; /* 15:12 */
   16575      UInt rM = (insn >> 0)  & 0xF; /*  3:0  */
   16576      UInt bU = (insn >> 23) & 1;      /* 23 */
   16577      UInt bB = (insn >> 22) & 1;      /* 22 */
   16578      UInt bL = (insn >> 20) & 1;      /* 20 */
   16579      UInt imm12 = (insn >> 0) & 0xFFF; /* 11:0 */
   16580      UInt imm5  = (insn >> 7) & 0x1F;  /* 11:7 */
   16581      UInt sh2   = (insn >> 5) & 3;     /* 6:5 */
   16582 
   16583      /* Skip some invalid cases, which would lead to two competing
   16584         updates to the same register, or which are otherwise
   16585         disallowed by the spec. */
   16586      switch (summary) {
   16587         case 1 | 16:
   16588            break;
   16589         case 1 | 32:
   16590            if (rM == 15) goto after_load_store_ubyte_or_word;
   16591            break;
   16592         case 2 | 16: case 3 | 16:
   16593            if (rN == 15) goto after_load_store_ubyte_or_word;
   16594            if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
   16595            break;
   16596         case 2 | 32: case 3 | 32:
   16597            if (rM == 15) goto after_load_store_ubyte_or_word;
   16598            if (rN == 15) goto after_load_store_ubyte_or_word;
   16599            if (rN == rM) goto after_load_store_ubyte_or_word;
   16600            if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
   16601            break;
   16602         default:
   16603            vassert(0);
   16604      }
   16605 
   16606      /* compute the effective address.  Bind it to a tmp since we
   16607         may need to use it twice. */
   16608      IRExpr* eaE = NULL;
   16609      switch (summary & 0xF0) {
   16610         case 16:
   16611            eaE = mk_EA_reg_plusminus_imm12( rN, bU, imm12, dis_buf );
   16612            break;
   16613         case 32:
   16614            eaE = mk_EA_reg_plusminus_shifted_reg( rN, bU, rM, sh2, imm5,
   16615                                                   dis_buf );
   16616            break;
   16617      }
   16618      vassert(eaE);
   16619      IRTemp eaT = newTemp(Ity_I32);
   16620      assign(eaT, eaE);
   16621 
   16622      /* get the old Rn value */
   16623      IRTemp rnT = newTemp(Ity_I32);
   16624      assign(rnT, getIRegA(rN));
   16625 
   16626      /* decide on the transfer address */
   16627      IRTemp taT = IRTemp_INVALID;
   16628      switch (summary & 0x0F) {
   16629         case 1: case 2: taT = eaT; break;
   16630         case 3:         taT = rnT; break;
   16631      }
   16632      vassert(taT != IRTemp_INVALID);
   16633 
   16634      if (bL == 0) {
   16635        /* Store.  If necessary, update the base register before the
   16636           store itself, so that the common idiom of "str rX, [sp,
   16637           #-4]!" (store rX at sp-4, then do new sp = sp-4, a.k.a "push
   16638           rX") doesn't cause Memcheck to complain that the access is
   16639           below the stack pointer.  Also, not updating sp before the
   16640           store confuses Valgrind's dynamic stack-extending logic.  So
   16641           do it before the store.  Hence we need to snarf the store
   16642           data before doing the basereg update. */
   16643 
   16644         /* get hold of the data to be stored */
   16645         IRTemp rDt = newTemp(Ity_I32);
   16646         assign(rDt, getIRegA(rD));
   16647 
   16648         /* Update Rn if necessary. */
   16649         switch (summary & 0x0F) {
   16650            case 2: case 3:
   16651               putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
   16652               break;
   16653         }
   16654 
   16655         /* generate the transfer */
   16656         if (bB == 0) { // word store
   16657            storeGuardedLE( mkexpr(taT), mkexpr(rDt), condT );
   16658         } else { // byte store
   16659            vassert(bB == 1);
   16660            storeGuardedLE( mkexpr(taT), unop(Iop_32to8, mkexpr(rDt)), condT );
   16661         }
   16662 
   16663      } else {
   16664         /* Load */
   16665         vassert(bL == 1);
   16666 
   16667         /* generate the transfer */
   16668         if (bB == 0) { // word load
   16669            IRTemp jk = Ijk_Boring;
   16670            /* According to the Cortex A8 TRM Sec. 5.2.1, LDR(1) with r13 as the
   16671                base register and PC as the destination register is a return for
   16672                purposes of branch prediction.
   16673               The ARM ARM Sec. C9.10.1 further specifies that it must use a
   16674                post-increment by immediate addressing mode to be counted in
   16675                event 0x0E (Procedure return).*/
   16676            if (rN == 13 && summary == (3 | 16) && bB == 0) {
   16677               jk = Ijk_Ret;
   16678            }
   16679            IRTemp tD = newTemp(Ity_I32);
   16680            loadGuardedLE( tD, ILGop_Ident32,
   16681                           mkexpr(taT), llGetIReg(rD), condT );
   16682            /* "rD == 15 ? condT : IRTemp_INVALID": simply
   16683               IRTemp_INVALID would be correct in all cases here, and
   16684               for the non-r15 case it generates better code, by
   16685               avoiding two tests of the cond (since it is already
   16686               tested by loadGuardedLE).  However, the logic at the end
   16687               of this function, that deals with writes to r15, has an
   16688               optimisation which depends on seeing whether or not the
   16689               write is conditional.  Hence in this particular case we
   16690               let it "see" the guard condition. */
   16691            putIRegA( rD, mkexpr(tD),
   16692                      rD == 15 ? condT : IRTemp_INVALID, jk );
   16693         } else { // byte load
   16694            vassert(bB == 1);
   16695            IRTemp tD = newTemp(Ity_I32);
   16696            loadGuardedLE( tD, ILGop_8Uto32, mkexpr(taT), llGetIReg(rD), condT );
   16697            /* No point in similar 3rd arg complexity here, since we
   16698               can't sanely write anything to r15 like this. */
   16699            putIRegA( rD, mkexpr(tD), IRTemp_INVALID, Ijk_Boring );
   16700         }
   16701 
   16702         /* Update Rn if necessary. */
   16703         switch (summary & 0x0F) {
   16704            case 2: case 3:
   16705               // should be assured by logic above:
   16706               if (bL == 1)
   16707                  vassert(rD != rN); /* since we just wrote rD */
   16708               putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
   16709               break;
   16710         }
   16711      }
   16712 
   16713      switch (summary & 0x0F) {
   16714         case 1:  DIP("%sr%s%s r%u, %s\n",
   16715                      bL == 0 ? "st" : "ld",
   16716                      bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
   16717                  break;
   16718         case 2:  DIP("%sr%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
   16719                      bL == 0 ? "st" : "ld",
   16720                      bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
   16721                  break;
   16722         case 3:  DIP("%sr%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
   16723                      bL == 0 ? "st" : "ld",
   16724                      bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
   16725                  break;
   16726         default: vassert(0);
   16727      }
   16728 
   16729      /* XXX deal with alignment constraints */
   16730 
   16731      goto decode_success;
   16732 
   16733      /* Complications:
   16734 
   16735         For all loads: if the Amode specifies base register
   16736         writeback, and the same register is specified for Rd and Rn,
   16737         the results are UNPREDICTABLE.
   16738 
   16739         For all loads and stores: if R15 is written, branch to
   16740         that address afterwards.
   16741 
   16742         STRB: straightforward
   16743         LDRB: loaded data is zero extended
   16744         STR:  lowest 2 bits of address are ignored
   16745         LDR:  if the lowest 2 bits of the address are nonzero
   16746               then the loaded value is rotated right by 8 * the lowest 2 bits
   16747      */
   16748    }
   16749 
   16750   after_load_store_ubyte_or_word:
   16751 
   16752    /* --------------------- Load/store (sbyte & hword) -------- */
   16753    // LDRH LDRSH STRH LDRSB
   16754    /*                 31   27   23   19 15 11   7    3     # highest bit
   16755                         28   24   20 16 12    8    4    0
   16756       A5-36   1 | 16  cond 0001 U10L Rn Rd im4h 1SH1 im4l
   16757       A5-38   1 | 32  cond 0001 U00L Rn Rd 0000 1SH1 Rm
   16758       A5-40   2 | 16  cond 0001 U11L Rn Rd im4h 1SH1 im4l
   16759       A5-42   2 | 32  cond 0001 U01L Rn Rd 0000 1SH1 Rm
   16760       A5-44   3 | 16  cond 0000 U10L Rn Rd im4h 1SH1 im4l
   16761       A5-46   3 | 32  cond 0000 U00L Rn Rd 0000 1SH1 Rm
   16762    */
   16763    /* case coding:
   16764              1   at-ea               (access at ea)
   16765              2   at-ea-then-upd      (access at ea, then Rn = ea)
   16766              3   at-Rn-then-upd      (access at Rn, then Rn = ea)
   16767       ea coding
   16768              16  Rn +/- imm8
   16769              32  Rn +/- Rm
   16770    */
   16771    /* Quickly skip over all of this for hopefully most instructions */
   16772    if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
   16773       goto after_load_store_sbyte_or_hword;
   16774 
   16775    /* Check the "1SH1" thing. */
   16776    if ((INSN(7,4) & BITS4(1,0,0,1)) != BITS4(1,0,0,1))
   16777       goto after_load_store_sbyte_or_hword;
   16778 
   16779    summary = 0;
   16780 
   16781    /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,0)) {
   16782       summary = 1 | 16;
   16783    }
   16784    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,0)) {
   16785       summary = 1 | 32;
   16786    }
   16787    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,1)) {
   16788       summary = 2 | 16;
   16789    }
   16790    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,1)) {
   16791       summary = 2 | 32;
   16792    }
   16793    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(1,0)) {
   16794       summary = 3 | 16;
   16795    }
   16796    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(0,0)) {
   16797       summary = 3 | 32;
   16798    }
   16799    else goto after_load_store_sbyte_or_hword;
   16800 
   16801    { UInt rN   = (insn >> 16) & 0xF; /* 19:16 */
   16802      UInt rD   = (insn >> 12) & 0xF; /* 15:12 */
   16803      UInt rM   = (insn >> 0)  & 0xF; /*  3:0  */
   16804      UInt bU   = (insn >> 23) & 1;   /* 23 U=1 offset+, U=0 offset- */
   16805      UInt bL   = (insn >> 20) & 1;   /* 20 L=1 load, L=0 store */
   16806      UInt bH   = (insn >> 5) & 1;    /* H=1 halfword, H=0 byte */
   16807      UInt bS   = (insn >> 6) & 1;    /* S=1 signed, S=0 unsigned */
   16808      UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
   16809 
   16810      /* Skip combinations that are either meaningless or already
   16811         handled by main word-or-unsigned-byte load-store
   16812         instructions. */
   16813      if (bS == 0 && bH == 0) /* "unsigned byte" */
   16814         goto after_load_store_sbyte_or_hword;
   16815      if (bS == 1 && bL == 0) /* "signed store" */
   16816         goto after_load_store_sbyte_or_hword;
   16817 
   16818      /* Require 11:8 == 0 for Rn +/- Rm cases */
   16819      if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
   16820         goto after_load_store_sbyte_or_hword;
   16821 
   16822      /* Skip some invalid cases, which would lead to two competing
   16823         updates to the same register, or which are otherwise
   16824         disallowed by the spec. */
   16825      switch (summary) {
   16826         case 1 | 16:
   16827            break;
   16828         case 1 | 32:
   16829            if (rM == 15) goto after_load_store_sbyte_or_hword;
   16830            break;
   16831         case 2 | 16: case 3 | 16:
   16832            if (rN == 15) goto after_load_store_sbyte_or_hword;
   16833            if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
   16834            break;
   16835         case 2 | 32: case 3 | 32:
   16836            if (rM == 15) goto after_load_store_sbyte_or_hword;
   16837            if (rN == 15) goto after_load_store_sbyte_or_hword;
   16838            if (rN == rM) goto after_load_store_sbyte_or_hword;
   16839            if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
   16840            break;
   16841         default:
   16842            vassert(0);
   16843      }
   16844 
   16845      /* If this is a branch, make it unconditional at this point.
   16846         Doing conditional branches in-line is too complex (for now).
   16847         Note that you'd have to be insane to use any of these loads to
   16848         do a branch, since they only load 16 bits at most, but we
   16849         handle it just in case. */
   16850      if (bL == 1 && rD == 15 && condT != IRTemp_INVALID) {
   16851         // go uncond
   16852         mk_skip_over_A32_if_cond_is_false( condT );
   16853         condT = IRTemp_INVALID;
   16854         // now uncond
   16855      }
   16856 
   16857      /* compute the effective address.  Bind it to a tmp since we
   16858         may need to use it twice. */
   16859      IRExpr* eaE = NULL;
   16860      switch (summary & 0xF0) {
   16861         case 16:
   16862            eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
   16863            break;
   16864         case 32:
   16865            eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
   16866            break;
   16867      }
   16868      vassert(eaE);
   16869      IRTemp eaT = newTemp(Ity_I32);
   16870      assign(eaT, eaE);
   16871 
   16872      /* get the old Rn value */
   16873      IRTemp rnT = newTemp(Ity_I32);
   16874      assign(rnT, getIRegA(rN));
   16875 
   16876      /* decide on the transfer address */
   16877      IRTemp taT = IRTemp_INVALID;
   16878      switch (summary & 0x0F) {
   16879         case 1: case 2: taT = eaT; break;
   16880         case 3:         taT = rnT; break;
   16881      }
   16882      vassert(taT != IRTemp_INVALID);
   16883 
   16884      /* ll previous value of rD, for dealing with conditional loads */
   16885      IRTemp llOldRd = newTemp(Ity_I32);
   16886      assign(llOldRd, llGetIReg(rD));
   16887 
   16888      /* halfword store  H 1  L 0  S 0
   16889         uhalf load      H 1  L 1  S 0
   16890         shalf load      H 1  L 1  S 1
   16891         sbyte load      H 0  L 1  S 1
   16892      */
   16893      const HChar* name = NULL;
   16894      /* generate the transfer */
   16895      /**/ if (bH == 1 && bL == 0 && bS == 0) { // halfword store
   16896         storeGuardedLE( mkexpr(taT),
   16897                         unop(Iop_32to16, getIRegA(rD)), condT );
   16898         name = "strh";
   16899      }
   16900      else if (bH == 1 && bL == 1 && bS == 0) { // uhalf load
   16901         IRTemp newRd = newTemp(Ity_I32);
   16902         loadGuardedLE( newRd, ILGop_16Uto32,
   16903                        mkexpr(taT), mkexpr(llOldRd), condT );
   16904         putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
   16905         name = "ldrh";
   16906      }
   16907      else if (bH == 1 && bL == 1 && bS == 1) { // shalf load
   16908         IRTemp newRd = newTemp(Ity_I32);
   16909         loadGuardedLE( newRd, ILGop_16Sto32,
   16910                        mkexpr(taT), mkexpr(llOldRd), condT );
   16911         putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
   16912         name = "ldrsh";
   16913      }
   16914      else if (bH == 0 && bL == 1 && bS == 1) { // sbyte load
   16915         IRTemp newRd = newTemp(Ity_I32);
   16916         loadGuardedLE( newRd, ILGop_8Sto32,
   16917                        mkexpr(taT), mkexpr(llOldRd), condT );
   16918         putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
   16919         name = "ldrsb";
   16920      }
   16921      else
   16922         vassert(0); // should be assured by logic above
   16923 
   16924      /* Update Rn if necessary. */
   16925      switch (summary & 0x0F) {
   16926         case 2: case 3:
   16927            // should be assured by logic above:
   16928            if (bL == 1)
   16929               vassert(rD != rN); /* since we just wrote rD */
   16930            putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
   16931            break;
   16932      }
   16933 
   16934      switch (summary & 0x0F) {
   16935         case 1:  DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
   16936                  break;
   16937         case 2:  DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
   16938                      name, nCC(INSN_COND), rD, dis_buf);
   16939                  break;
   16940         case 3:  DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
   16941                      name, nCC(INSN_COND), rD, dis_buf);
   16942                  break;
   16943         default: vassert(0);
   16944      }
   16945 
   16946      /* XXX deal with alignment constraints */
   16947 
   16948      goto decode_success;
   16949 
   16950      /* Complications:
   16951 
   16952         For all loads: if the Amode specifies base register
   16953         writeback, and the same register is specified for Rd and Rn,
   16954         the results are UNPREDICTABLE.
   16955 
   16956         For all loads and stores: if R15 is written, branch to
   16957         that address afterwards.
   16958 
   16959         Misaligned halfword stores => Unpredictable
   16960         Misaligned halfword loads  => Unpredictable
   16961      */
   16962    }
   16963 
   16964   after_load_store_sbyte_or_hword:
   16965 
   16966    /* --------------------- Load/store multiple -------------- */
   16967    // LD/STMIA LD/STMIB LD/STMDA LD/STMDB
   16968    // Remarkably complex and difficult to get right
   16969    // match 27:20 as 100XX0WL
   16970    if (BITS8(1,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,1,0,0))) {
   16971       // A5-50 LD/STMIA  cond 1000 10WL Rn RegList
   16972       // A5-51 LD/STMIB  cond 1001 10WL Rn RegList
   16973       // A5-53 LD/STMDA  cond 1000 00WL Rn RegList
   16974       // A5-53 LD/STMDB  cond 1001 00WL Rn RegList
   16975       //                   28   24   20 16       0
   16976 
   16977       UInt bINC    = (insn >> 23) & 1;
   16978       UInt bBEFORE = (insn >> 24) & 1;
   16979 
   16980       UInt bL      = (insn >> 20) & 1;  /* load=1, store=0 */
   16981       UInt bW      = (insn >> 21) & 1;  /* Rn wback=1, no wback=0 */
   16982       UInt rN      = (insn >> 16) & 0xF;
   16983       UInt regList = insn & 0xFFFF;
   16984       /* Skip some invalid cases, which would lead to two competing
   16985          updates to the same register, or which are otherwise
   16986          disallowed by the spec.  Note the test above has required
   16987          that S == 0, since that looks like a kernel-mode only thing.
   16988          Done by forcing the real pattern, viz 100XXSWL to actually be
   16989          100XX0WL. */
   16990       if (rN == 15) goto after_load_store_multiple;
   16991       // reglist can't be empty
   16992       if (regList == 0) goto after_load_store_multiple;
   16993       // if requested to writeback Rn, and this is a load instruction,
   16994       // then Rn can't appear in RegList, since we'd have two competing
   16995       // new values for Rn.  We do however accept this case for store
   16996       // instructions.
   16997       if (bW == 1 && bL == 1 && ((1 << rN) & regList) > 0)
   16998          goto after_load_store_multiple;
   16999 
   17000       /* Now, we can't do a conditional load or store, since that very
   17001          likely will generate an exception.  So we have to take a side
   17002          exit at this point if the condition is false. */
   17003       if (condT != IRTemp_INVALID) {
   17004          mk_skip_over_A32_if_cond_is_false( condT );
   17005          condT = IRTemp_INVALID;
   17006       }
   17007 
   17008       /* Ok, now we're unconditional.  Generate the IR. */
   17009       mk_ldm_stm( True/*arm*/, rN, bINC, bBEFORE, bW, bL, regList );
   17010 
   17011       DIP("%sm%c%c%s r%u%s, {0x%04x}\n",
   17012           bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
   17013           nCC(INSN_COND),
   17014           rN, bW ? "!" : "", regList);
   17015 
   17016       goto decode_success;
   17017    }
   17018 
   17019   after_load_store_multiple:
   17020 
   17021    /* --------------------- Control flow --------------------- */
   17022    // B, BL (Branch, or Branch-and-Link, to immediate offset)
   17023    //
   17024    if (BITS8(1,0,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))) {
   17025       UInt link   = (insn >> 24) & 1;
   17026       UInt uimm24 = insn & ((1<<24)-1);  uimm24 <<= 8;
   17027       Int  simm24 = (Int)uimm24;         simm24 >>= 8;
   17028       UInt dst    = guest_R15_curr_instr_notENC + 8 + (((UInt)simm24) << 2);
   17029       IRJumpKind jk = link ? Ijk_Call : Ijk_Boring;
   17030       if (link) {
   17031          putIRegA(14, mkU32(guest_R15_curr_instr_notENC + 4),
   17032                       condT, Ijk_Boring);
   17033       }
   17034       if (condT == IRTemp_INVALID) {
   17035          /* unconditional transfer to 'dst'.  See if we can simply
   17036             continue tracing at the destination. */
   17037          if (resteerOkFn( callback_opaque, dst )) {
   17038             /* yes */
   17039             dres.whatNext   = Dis_ResteerU;
   17040             dres.continueAt = dst;
   17041          } else {
   17042             /* no; terminate the SB at this point. */
   17043             llPutIReg(15, mkU32(dst));
   17044             dres.jk_StopHere = jk;
   17045             dres.whatNext    = Dis_StopHere;
   17046          }
   17047          DIP("b%s 0x%x\n", link ? "l" : "", dst);
   17048       } else {
   17049          /* conditional transfer to 'dst' */
   17050          const HChar* comment = "";
   17051 
   17052          /* First see if we can do some speculative chasing into one
   17053             arm or the other.  Be conservative and only chase if
   17054             !link, that is, this is a normal conditional branch to a
   17055             known destination. */
   17056          if (!link
   17057              && resteerCisOk
   17058              && vex_control.guest_chase_cond
   17059              && dst < guest_R15_curr_instr_notENC
   17060              && resteerOkFn( callback_opaque, dst) ) {
   17061             /* Speculation: assume this backward branch is taken.  So
   17062                we need to emit a side-exit to the insn following this
   17063                one, on the negation of the condition, and continue at
   17064                the branch target address (dst). */
   17065             stmt( IRStmt_Exit( unop(Iop_Not1,
   17066                                     unop(Iop_32to1, mkexpr(condT))),
   17067                                Ijk_Boring,
   17068                                IRConst_U32(guest_R15_curr_instr_notENC+4),
   17069                                OFFB_R15T ));
   17070             dres.whatNext   = Dis_ResteerC;
   17071             dres.continueAt = (Addr32)dst;
   17072             comment = "(assumed taken)";
   17073          }
   17074          else
   17075          if (!link
   17076              && resteerCisOk
   17077              && vex_control.guest_chase_cond
   17078              && dst >= guest_R15_curr_instr_notENC
   17079              && resteerOkFn( callback_opaque,
   17080                              guest_R15_curr_instr_notENC+4) ) {
   17081             /* Speculation: assume this forward branch is not taken.
   17082                So we need to emit a side-exit to dst (the dest) and
   17083                continue disassembling at the insn immediately
   17084                following this one. */
   17085             stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
   17086                                Ijk_Boring,
   17087                                IRConst_U32(dst),
   17088                                OFFB_R15T ));
   17089             dres.whatNext   = Dis_ResteerC;
   17090             dres.continueAt = guest_R15_curr_instr_notENC+4;
   17091             comment = "(assumed not taken)";
   17092          }
   17093          else {
   17094             /* Conservative default translation - end the block at
   17095                this point. */
   17096             stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
   17097                                jk, IRConst_U32(dst), OFFB_R15T ));
   17098             llPutIReg(15, mkU32(guest_R15_curr_instr_notENC + 4));
   17099             dres.jk_StopHere = Ijk_Boring;
   17100             dres.whatNext    = Dis_StopHere;
   17101          }
   17102          DIP("b%s%s 0x%x %s\n", link ? "l" : "", nCC(INSN_COND),
   17103              dst, comment);
   17104       }
   17105       goto decode_success;
   17106    }
   17107 
   17108    // B, BL (Branch, or Branch-and-Link, to a register)
   17109    // NB: interworking branch
   17110    if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
   17111        && INSN(19,12) == BITS8(1,1,1,1,1,1,1,1)
   17112        && (INSN(11,4) == BITS8(1,1,1,1,0,0,1,1)
   17113            || INSN(11,4) == BITS8(1,1,1,1,0,0,0,1))) {
   17114       IRTemp  dst = newTemp(Ity_I32);
   17115       UInt    link = (INSN(11,4) >> 1) & 1;
   17116       UInt    rM   = INSN(3,0);
   17117       // we don't decode the case (link && rM == 15), as that's
   17118       // Unpredictable.
   17119       if (!(link && rM == 15)) {
   17120          if (condT != IRTemp_INVALID) {
   17121             mk_skip_over_A32_if_cond_is_false( condT );
   17122          }
   17123          // rM contains an interworking address exactly as we require
   17124          // (with continuation CPSR.T in bit 0), so we can use it
   17125          // as-is, with no masking.
   17126          assign( dst, getIRegA(rM) );
   17127          if (link) {
   17128             putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
   17129                       IRTemp_INVALID/*because AL*/, Ijk_Boring );
   17130          }
   17131          llPutIReg(15, mkexpr(dst));
   17132          dres.jk_StopHere = link ? Ijk_Call
   17133                                  : (rM == 14 ? Ijk_Ret : Ijk_Boring);
   17134          dres.whatNext    = Dis_StopHere;
   17135          if (condT == IRTemp_INVALID) {
   17136             DIP("b%sx r%u\n", link ? "l" : "", rM);
   17137          } else {
   17138             DIP("b%sx%s r%u\n", link ? "l" : "", nCC(INSN_COND), rM);
   17139          }
   17140          goto decode_success;
   17141       }
   17142       /* else: (link && rM == 15): just fall through */
   17143    }
   17144 
   17145    /* --- NB: ARM interworking branches are in NV space, hence
   17146       are handled elsewhere by decode_NV_instruction_ARMv7_and_below.
   17147       ---
   17148    */
   17149 
   17150    /* --------------------- Clz --------------------- */
   17151    // CLZ
   17152    if (INSN(27,20) == BITS8(0,0,0,1,0,1,1,0)
   17153        && INSN(19,16) == BITS4(1,1,1,1)
   17154        && INSN(11,4) == BITS8(1,1,1,1,0,0,0,1)) {
   17155       UInt rD = INSN(15,12);
   17156       UInt rM = INSN(3,0);
   17157       IRTemp arg = newTemp(Ity_I32);
   17158       IRTemp res = newTemp(Ity_I32);
   17159       assign(arg, getIRegA(rM));
   17160       assign(res, IRExpr_ITE(
   17161                      binop(Iop_CmpEQ32, mkexpr(arg), mkU32(0)),
   17162                      mkU32(32),
   17163                      unop(Iop_Clz32, mkexpr(arg))
   17164             ));
   17165       putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   17166       DIP("clz%s r%u, r%u\n", nCC(INSN_COND), rD, rM);
   17167       goto decode_success;
   17168    }
   17169 
   17170    /* --------------------- Mul etc --------------------- */
   17171    // MUL
   17172    if (BITS8(0,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
   17173        && INSN(15,12) == BITS4(0,0,0,0)
   17174        && INSN(7,4) == BITS4(1,0,0,1)) {
   17175       UInt bitS = (insn >> 20) & 1; /* 20:20 */
   17176       UInt rD = INSN(19,16);
   17177       UInt rS = INSN(11,8);
   17178       UInt rM = INSN(3,0);
   17179       if (rD == 15 || rM == 15 || rS == 15) {
   17180          /* Unpredictable; don't decode; fall through */
   17181       } else {
   17182          IRTemp argL = newTemp(Ity_I32);
   17183          IRTemp argR = newTemp(Ity_I32);
   17184          IRTemp res  = newTemp(Ity_I32);
   17185          IRTemp oldC = IRTemp_INVALID;
   17186          IRTemp oldV = IRTemp_INVALID;
   17187          assign( argL, getIRegA(rM));
   17188          assign( argR, getIRegA(rS));
   17189          assign( res, binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) );
   17190          if (bitS) {
   17191             oldC = newTemp(Ity_I32);
   17192             assign(oldC, mk_armg_calculate_flag_c());
   17193             oldV = newTemp(Ity_I32);
   17194             assign(oldV, mk_armg_calculate_flag_v());
   17195          }
   17196          // now update guest state
   17197          putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
   17198          if (bitS) {
   17199             IRTemp pair = newTemp(Ity_I32);
   17200             assign( pair, binop(Iop_Or32,
   17201                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
   17202                                 mkexpr(oldV)) );
   17203             setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
   17204          }
   17205          DIP("mul%c%s r%u, r%u, r%u\n",
   17206              bitS ? 's' : ' ', nCC(INSN_COND), rD, rM, rS);
   17207          goto decode_success;
   17208       }
   17209       /* fall through */
   17210    }
   17211 
   17212    /* --------------------- Integer Divides --------------------- */
   17213    // SDIV
   17214    if (BITS8(0,1,1,1,0,0,0,1) == INSN(27,20)
   17215        && INSN(15,12) == BITS4(1,1,1,1)
   17216        && INSN(7,4) == BITS4(0,0,0,1)) {
   17217       UInt rD = INSN(19,16);
   17218       UInt rM = INSN(11,8);
   17219       UInt rN = INSN(3,0);
   17220       if (rD == 15 || rM == 15 || rN == 15) {
   17221          /* Unpredictable; don't decode; fall through */
   17222       } else {
   17223          IRTemp res  = newTemp(Ity_I32);
   17224          IRTemp argL = newTemp(Ity_I32);
   17225          IRTemp argR = newTemp(Ity_I32);
   17226          assign(argL, getIRegA(rN));
   17227          assign(argR, getIRegA(rM));
   17228          assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
   17229          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   17230          DIP("sdiv r%u, r%u, r%u\n", rD, rN, rM);
   17231          goto decode_success;
   17232       }
   17233     }
   17234 
   17235    // UDIV
   17236    if (BITS8(0,1,1,1,0,0,1,1) == INSN(27,20)
   17237        && INSN(15,12) == BITS4(1,1,1,1)
   17238        && INSN(7,4) == BITS4(0,0,0,1)) {
   17239       UInt rD = INSN(19,16);
   17240       UInt rM = INSN(11,8);
   17241       UInt rN = INSN(3,0);
   17242       if (rD == 15 || rM == 15 || rN == 15) {
   17243          /* Unpredictable; don't decode; fall through */
   17244       } else {
   17245          IRTemp res  = newTemp(Ity_I32);
   17246          IRTemp argL = newTemp(Ity_I32);
   17247          IRTemp argR = newTemp(Ity_I32);
   17248          assign(argL, getIRegA(rN));
   17249          assign(argR, getIRegA(rM));
   17250          assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
   17251          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   17252          DIP("udiv r%u, r%u, r%u\n", rD, rN, rM);
   17253          goto decode_success;
   17254       }
   17255    }
   17256 
   17257    // MLA, MLS
   17258    if (BITS8(0,0,0,0,0,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
   17259        && INSN(7,4) == BITS4(1,0,0,1)) {
   17260       UInt bitS  = (insn >> 20) & 1; /* 20:20 */
   17261       UInt isMLS = (insn >> 22) & 1; /* 22:22 */
   17262       UInt rD = INSN(19,16);
   17263       UInt rN = INSN(15,12);
   17264       UInt rS = INSN(11,8);
   17265       UInt rM = INSN(3,0);
   17266       if (bitS == 1 && isMLS == 1) {
   17267          /* This isn't allowed (MLS that sets flags).  don't decode;
   17268             fall through */
   17269       }
   17270       else
   17271       if (rD == 15 || rM == 15 || rS == 15 || rN == 15) {
   17272          /* Unpredictable; don't decode; fall through */
   17273       } else {
   17274          IRTemp argL = newTemp(Ity_I32);
   17275          IRTemp argR = newTemp(Ity_I32);
   17276          IRTemp argP = newTemp(Ity_I32);
   17277          IRTemp res  = newTemp(Ity_I32);
   17278          IRTemp oldC = IRTemp_INVALID;
   17279          IRTemp oldV = IRTemp_INVALID;
   17280          assign( argL, getIRegA(rM));
   17281          assign( argR, getIRegA(rS));
   17282          assign( argP, getIRegA(rN));
   17283          assign( res, binop(isMLS ? Iop_Sub32 : Iop_Add32,
   17284                             mkexpr(argP),
   17285                             binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) ));
   17286          if (bitS) {
   17287             vassert(!isMLS); // guaranteed above
   17288             oldC = newTemp(Ity_I32);
   17289             assign(oldC, mk_armg_calculate_flag_c());
   17290             oldV = newTemp(Ity_I32);
   17291             assign(oldV, mk_armg_calculate_flag_v());
   17292          }
   17293          // now update guest state
   17294          putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
   17295          if (bitS) {
   17296             IRTemp pair = newTemp(Ity_I32);
   17297             assign( pair, binop(Iop_Or32,
   17298                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
   17299                                 mkexpr(oldV)) );
   17300             setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
   17301          }
   17302          DIP("ml%c%c%s r%u, r%u, r%u, r%u\n",
   17303              isMLS ? 's' : 'a', bitS ? 's' : ' ',
   17304              nCC(INSN_COND), rD, rM, rS, rN);
   17305          goto decode_success;
   17306       }
   17307       /* fall through */
   17308    }
   17309 
   17310    // SMULL, UMULL
   17311    if (BITS8(0,0,0,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
   17312        && INSN(7,4) == BITS4(1,0,0,1)) {
   17313       UInt bitS = (insn >> 20) & 1; /* 20:20 */
   17314       UInt rDhi = INSN(19,16);
   17315       UInt rDlo = INSN(15,12);
   17316       UInt rS   = INSN(11,8);
   17317       UInt rM   = INSN(3,0);
   17318       UInt isS  = (INSN(27,20) >> 2) & 1; /* 22:22 */
   17319       if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo)  {
   17320          /* Unpredictable; don't decode; fall through */
   17321       } else {
   17322          IRTemp argL  = newTemp(Ity_I32);
   17323          IRTemp argR  = newTemp(Ity_I32);
   17324          IRTemp res   = newTemp(Ity_I64);
   17325          IRTemp resHi = newTemp(Ity_I32);
   17326          IRTemp resLo = newTemp(Ity_I32);
   17327          IRTemp oldC  = IRTemp_INVALID;
   17328          IRTemp oldV  = IRTemp_INVALID;
   17329          IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
   17330          assign( argL, getIRegA(rM));
   17331          assign( argR, getIRegA(rS));
   17332          assign( res, binop(mulOp, mkexpr(argL), mkexpr(argR)) );
   17333          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
   17334          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
   17335          if (bitS) {
   17336             oldC = newTemp(Ity_I32);
   17337             assign(oldC, mk_armg_calculate_flag_c());
   17338             oldV = newTemp(Ity_I32);
   17339             assign(oldV, mk_armg_calculate_flag_v());
   17340          }
   17341          // now update guest state
   17342          putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
   17343          putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
   17344          if (bitS) {
   17345             IRTemp pair = newTemp(Ity_I32);
   17346             assign( pair, binop(Iop_Or32,
   17347                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
   17348                                 mkexpr(oldV)) );
   17349             setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
   17350          }
   17351          DIP("%cmull%c%s r%u, r%u, r%u, r%u\n",
   17352              isS ? 's' : 'u', bitS ? 's' : ' ',
   17353              nCC(INSN_COND), rDlo, rDhi, rM, rS);
   17354          goto decode_success;
   17355       }
   17356       /* fall through */
   17357    }
   17358 
   17359    // SMLAL, UMLAL
   17360    if (BITS8(0,0,0,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
   17361        && INSN(7,4) == BITS4(1,0,0,1)) {
   17362       UInt bitS = (insn >> 20) & 1; /* 20:20 */
   17363       UInt rDhi = INSN(19,16);
   17364       UInt rDlo = INSN(15,12);
   17365       UInt rS   = INSN(11,8);
   17366       UInt rM   = INSN(3,0);
   17367       UInt isS  = (INSN(27,20) >> 2) & 1; /* 22:22 */
   17368       if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo)  {
   17369          /* Unpredictable; don't decode; fall through */
   17370       } else {
   17371          IRTemp argL  = newTemp(Ity_I32);
   17372          IRTemp argR  = newTemp(Ity_I32);
   17373          IRTemp old   = newTemp(Ity_I64);
   17374          IRTemp res   = newTemp(Ity_I64);
   17375          IRTemp resHi = newTemp(Ity_I32);
   17376          IRTemp resLo = newTemp(Ity_I32);
   17377          IRTemp oldC  = IRTemp_INVALID;
   17378          IRTemp oldV  = IRTemp_INVALID;
   17379          IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
   17380          assign( argL, getIRegA(rM));
   17381          assign( argR, getIRegA(rS));
   17382          assign( old, binop(Iop_32HLto64, getIRegA(rDhi), getIRegA(rDlo)) );
   17383          assign( res, binop(Iop_Add64,
   17384                             mkexpr(old),
   17385                             binop(mulOp, mkexpr(argL), mkexpr(argR))) );
   17386          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
   17387          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
   17388          if (bitS) {
   17389             oldC = newTemp(Ity_I32);
   17390             assign(oldC, mk_armg_calculate_flag_c());
   17391             oldV = newTemp(Ity_I32);
   17392             assign(oldV, mk_armg_calculate_flag_v());
   17393          }
   17394          // now update guest state
   17395          putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
   17396          putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
   17397          if (bitS) {
   17398             IRTemp pair = newTemp(Ity_I32);
   17399             assign( pair, binop(Iop_Or32,
   17400                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
   17401                                 mkexpr(oldV)) );
   17402             setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
   17403          }
   17404          DIP("%cmlal%c%s r%u, r%u, r%u, r%u\n",
   17405              isS ? 's' : 'u', bitS ? 's' : ' ', nCC(INSN_COND),
   17406              rDlo, rDhi, rM, rS);
   17407          goto decode_success;
   17408       }
   17409       /* fall through */
   17410    }
   17411 
   17412    // UMAAL
   17413    if (BITS8(0,0,0,0,0,1,0,0) == INSN(27,20) && INSN(7,4) == BITS4(1,0,0,1)) {
   17414       UInt rDhi = INSN(19,16);
   17415       UInt rDlo = INSN(15,12);
   17416       UInt rM   = INSN(11,8);
   17417       UInt rN   = INSN(3,0);
   17418       if (rDlo == 15 || rDhi == 15 || rN == 15 || rM == 15 || rDhi == rDlo)  {
   17419          /* Unpredictable; don't decode; fall through */
   17420       } else {
   17421          IRTemp argN   = newTemp(Ity_I32);
   17422          IRTemp argM   = newTemp(Ity_I32);
   17423          IRTemp argDhi = newTemp(Ity_I32);
   17424          IRTemp argDlo = newTemp(Ity_I32);
   17425          IRTemp res    = newTemp(Ity_I64);
   17426          IRTemp resHi  = newTemp(Ity_I32);
   17427          IRTemp resLo  = newTemp(Ity_I32);
   17428          assign( argN,   getIRegA(rN) );
   17429          assign( argM,   getIRegA(rM) );
   17430          assign( argDhi, getIRegA(rDhi) );
   17431          assign( argDlo, getIRegA(rDlo) );
   17432          assign( res,
   17433                  binop(Iop_Add64,
   17434                        binop(Iop_Add64,
   17435                              binop(Iop_MullU32, mkexpr(argN), mkexpr(argM)),
   17436                              unop(Iop_32Uto64, mkexpr(argDhi))),
   17437                        unop(Iop_32Uto64, mkexpr(argDlo))) );
   17438          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
   17439          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
   17440          // now update guest state
   17441          putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
   17442          putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
   17443          DIP("umaal %s r%u, r%u, r%u, r%u\n",
   17444              nCC(INSN_COND), rDlo, rDhi, rN, rM);
   17445          goto decode_success;
   17446       }
   17447       /* fall through */
   17448    }
   17449 
   17450    /* --------------------- Msr etc --------------------- */
   17451 
   17452    // MSR apsr, #imm
   17453    if (INSN(27,20) == BITS8(0,0,1,1,0,0,1,0)
   17454        && INSN(17,12) == BITS6(0,0,1,1,1,1)) {
   17455       UInt write_ge    = INSN(18,18);
   17456       UInt write_nzcvq = INSN(19,19);
   17457       if (write_nzcvq || write_ge) {
   17458          UInt   imm = (INSN(11,0) >> 0) & 0xFF;
   17459          UInt   rot = 2 * ((INSN(11,0) >> 8) & 0xF);
   17460          IRTemp immT = newTemp(Ity_I32);
   17461          vassert(rot <= 30);
   17462          imm = ROR32(imm, rot);
   17463          assign(immT, mkU32(imm));
   17464          desynthesise_APSR( write_nzcvq, write_ge, immT, condT );
   17465          DIP("msr%s cpsr%s%sf, #0x%08x\n", nCC(INSN_COND),
   17466              write_nzcvq ? "f" : "", write_ge ? "g" : "", imm);
   17467          goto decode_success;
   17468       }
   17469       /* fall through */
   17470    }
   17471 
   17472    // MSR apsr, reg
   17473    if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
   17474        && INSN(17,12) == BITS6(0,0,1,1,1,1)
   17475        && INSN(11,4) == BITS8(0,0,0,0,0,0,0,0)) {
   17476       UInt rN          = INSN(3,0);
   17477       UInt write_ge    = INSN(18,18);
   17478       UInt write_nzcvq = INSN(19,19);
   17479       if (rN != 15 && (write_nzcvq || write_ge)) {
   17480          IRTemp rNt = newTemp(Ity_I32);
   17481          assign(rNt, getIRegA(rN));
   17482          desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
   17483          DIP("msr%s cpsr_%s%s, r%u\n", nCC(INSN_COND),
   17484              write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
   17485          goto decode_success;
   17486       }
   17487       /* fall through */
   17488    }
   17489 
   17490    // MRS rD, cpsr
   17491    if ((insn & 0x0FFF0FFF) == 0x010F0000) {
   17492       UInt rD   = INSN(15,12);
   17493       if (rD != 15) {
   17494          IRTemp apsr = synthesise_APSR();
   17495          putIRegA( rD, mkexpr(apsr), condT, Ijk_Boring );
   17496          DIP("mrs%s r%u, cpsr\n", nCC(INSN_COND), rD);
   17497          goto decode_success;
   17498       }
   17499       /* fall through */
   17500    }
   17501 
   17502    /* --------------------- Svc --------------------- */
   17503    if (BITS8(1,1,1,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))) {
   17504       UInt imm24 = (insn >> 0) & 0xFFFFFF;
   17505       if (imm24 == 0) {
   17506          /* A syscall.  We can't do this conditionally, hence: */
   17507          if (condT != IRTemp_INVALID) {
   17508             mk_skip_over_A32_if_cond_is_false( condT );
   17509          }
   17510          // AL after here
   17511          llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 4 ));
   17512          dres.jk_StopHere = Ijk_Sys_syscall;
   17513          dres.whatNext    = Dis_StopHere;
   17514          DIP("svc%s #0x%08x\n", nCC(INSN_COND), imm24);
   17515          goto decode_success;
   17516       }
   17517       /* fall through */
   17518    }
   17519 
   17520    /* ------------------------ swp ------------------------ */
   17521 
   17522    // SWP, SWPB
   17523    if (BITS8(0,0,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   17524        && BITS4(0,0,0,0) == INSN(11,8)
   17525        && BITS4(1,0,0,1) == INSN(7,4)) {
   17526       UInt   rN   = INSN(19,16);
   17527       UInt   rD   = INSN(15,12);
   17528       UInt   rM   = INSN(3,0);
   17529       IRTemp tRn  = newTemp(Ity_I32);
   17530       IRTemp tNew = newTemp(Ity_I32);
   17531       IRTemp tOld = IRTemp_INVALID;
   17532       IRTemp tSC1 = newTemp(Ity_I1);
   17533       UInt   isB  = (insn >> 22) & 1;
   17534 
   17535       if (rD == 15 || rN == 15 || rM == 15 || rN == rM || rN == rD) {
   17536          /* undecodable; fall through */
   17537       } else {
   17538          /* make unconditional */
   17539          if (condT != IRTemp_INVALID) {
   17540             mk_skip_over_A32_if_cond_is_false( condT );
   17541             condT = IRTemp_INVALID;
   17542          }
   17543          /* Ok, now we're unconditional.  Generate a LL-SC loop. */
   17544          assign(tRn, getIRegA(rN));
   17545          assign(tNew, getIRegA(rM));
   17546          if (isB) {
   17547             /* swpb */
   17548             tOld = newTemp(Ity_I8);
   17549             stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
   17550                               NULL/*=>isLL*/) );
   17551             stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
   17552                               unop(Iop_32to8, mkexpr(tNew))) );
   17553          } else {
   17554             /* swp */
   17555             tOld = newTemp(Ity_I32);
   17556             stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
   17557                               NULL/*=>isLL*/) );
   17558             stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
   17559                               mkexpr(tNew)) );
   17560          }
   17561          stmt( IRStmt_Exit(unop(Iop_Not1, mkexpr(tSC1)),
   17562                            /*Ijk_NoRedir*/Ijk_Boring,
   17563                            IRConst_U32(guest_R15_curr_instr_notENC),
   17564                            OFFB_R15T ));
   17565          putIRegA(rD, isB ? unop(Iop_8Uto32, mkexpr(tOld)) : mkexpr(tOld),
   17566                       IRTemp_INVALID, Ijk_Boring);
   17567          DIP("swp%s%s r%u, r%u, [r%u]\n",
   17568              isB ? "b" : "", nCC(INSN_COND), rD, rM, rN);
   17569          goto decode_success;
   17570       }
   17571       /* fall through */
   17572    }
   17573 
   17574    /* ----------------------------------------------------------- */
   17575    /* -- ARMv6 instructions                                    -- */
   17576    /* ----------------------------------------------------------- */
   17577 
   17578    /* ------------------- {ldr,str}ex{,b,h,d} ------------------- */
   17579 
   17580    // LDREXD, LDREX, LDREXH, LDREXB
   17581    if (0x01900F9F == (insn & 0x0F900FFF)) {
   17582       UInt   rT    = INSN(15,12);
   17583       UInt   rN    = INSN(19,16);
   17584       IRType ty    = Ity_INVALID;
   17585       IROp   widen = Iop_INVALID;
   17586       const HChar* nm = NULL;
   17587       Bool   valid = True;
   17588       switch (INSN(22,21)) {
   17589          case 0: nm = "";  ty = Ity_I32; break;
   17590          case 1: nm = "d"; ty = Ity_I64; break;
   17591          case 2: nm = "b"; ty = Ity_I8;  widen = Iop_8Uto32; break;
   17592          case 3: nm = "h"; ty = Ity_I16; widen = Iop_16Uto32; break;
   17593          default: vassert(0);
   17594       }
   17595       if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
   17596          if (rT == 15 || rN == 15)
   17597             valid = False;
   17598       } else {
   17599          vassert(ty == Ity_I64);
   17600          if ((rT & 1) == 1 || rT == 14 || rN == 15)
   17601             valid = False;
   17602       }
   17603       if (valid) {
   17604          IRTemp res;
   17605          /* make unconditional */
   17606          if (condT != IRTemp_INVALID) {
   17607            mk_skip_over_A32_if_cond_is_false( condT );
   17608            condT = IRTemp_INVALID;
   17609          }
   17610          /* Ok, now we're unconditional.  Do the load. */
   17611          res = newTemp(ty);
   17612          // FIXME: assumes little-endian guest
   17613          stmt( IRStmt_LLSC(Iend_LE, res, getIRegA(rN),
   17614                            NULL/*this is a load*/) );
   17615          if (ty == Ity_I64) {
   17616             // FIXME: assumes little-endian guest
   17617             putIRegA(rT+0, unop(Iop_64to32, mkexpr(res)),
   17618                            IRTemp_INVALID, Ijk_Boring);
   17619             putIRegA(rT+1, unop(Iop_64HIto32, mkexpr(res)),
   17620                            IRTemp_INVALID, Ijk_Boring);
   17621             DIP("ldrex%s%s r%u, r%u, [r%u]\n",
   17622                 nm, nCC(INSN_COND), rT+0, rT+1, rN);
   17623          } else {
   17624             putIRegA(rT, widen == Iop_INVALID
   17625                             ? mkexpr(res) : unop(widen, mkexpr(res)),
   17626                      IRTemp_INVALID, Ijk_Boring);
   17627             DIP("ldrex%s%s r%u, [r%u]\n", nm, nCC(INSN_COND), rT, rN);
   17628          }
   17629          goto decode_success;
   17630       }
   17631       /* undecodable; fall through */
   17632    }
   17633 
   17634    // STREXD, STREX, STREXH, STREXB
   17635    if (0x01800F90 == (insn & 0x0F900FF0)) {
   17636       UInt   rT     = INSN(3,0);
   17637       UInt   rN     = INSN(19,16);
   17638       UInt   rD     = INSN(15,12);
   17639       IRType ty     = Ity_INVALID;
   17640       IROp   narrow = Iop_INVALID;
   17641       const HChar* nm = NULL;
   17642       Bool   valid  = True;
   17643       switch (INSN(22,21)) {
   17644          case 0: nm = "";  ty = Ity_I32; break;
   17645          case 1: nm = "d"; ty = Ity_I64; break;
   17646          case 2: nm = "b"; ty = Ity_I8;  narrow = Iop_32to8; break;
   17647          case 3: nm = "h"; ty = Ity_I16; narrow = Iop_32to16; break;
   17648          default: vassert(0);
   17649       }
   17650       if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
   17651          if (rD == 15 || rN == 15 || rT == 15
   17652              || rD == rN || rD == rT)
   17653             valid = False;
   17654       } else {
   17655          vassert(ty == Ity_I64);
   17656          if (rD == 15 || (rT & 1) == 1 || rT == 14 || rN == 15
   17657              || rD == rN || rD == rT || rD == rT+1)
   17658             valid = False;
   17659       }
   17660       if (valid) {
   17661          IRTemp resSC1, resSC32, data;
   17662          /* make unconditional */
   17663          if (condT != IRTemp_INVALID) {
   17664             mk_skip_over_A32_if_cond_is_false( condT );
   17665             condT = IRTemp_INVALID;
   17666          }
   17667          /* Ok, now we're unconditional.  Do the store. */
   17668          data = newTemp(ty);
   17669          assign(data,
   17670                 ty == Ity_I64
   17671                    // FIXME: assumes little-endian guest
   17672                    ? binop(Iop_32HLto64, getIRegA(rT+1), getIRegA(rT+0))
   17673                    : narrow == Iop_INVALID
   17674                       ? getIRegA(rT)
   17675                       : unop(narrow, getIRegA(rT)));
   17676          resSC1 = newTemp(Ity_I1);
   17677          // FIXME: assumes little-endian guest
   17678          stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegA(rN), mkexpr(data)) );
   17679 
   17680          /* Set rD to 1 on failure, 0 on success.  Currently we have
   17681             resSC1 == 0 on failure, 1 on success. */
   17682          resSC32 = newTemp(Ity_I32);
   17683          assign(resSC32,
   17684                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
   17685 
   17686          putIRegA(rD, mkexpr(resSC32),
   17687                       IRTemp_INVALID, Ijk_Boring);
   17688          if (ty == Ity_I64) {
   17689             DIP("strex%s%s r%u, r%u, r%u, [r%u]\n",
   17690                 nm, nCC(INSN_COND), rD, rT, rT+1, rN);
   17691          } else {
   17692             DIP("strex%s%s r%u, r%u, [r%u]\n",
   17693                 nm, nCC(INSN_COND), rD, rT, rN);
   17694          }
   17695          goto decode_success;
   17696       }
   17697       /* fall through */
   17698    }
   17699 
   17700    /* --------------------- movw, movt --------------------- */
   17701    if (0x03000000 == (insn & 0x0FF00000)
   17702        || 0x03400000 == (insn & 0x0FF00000)) /* pray for CSE */ {
   17703       UInt rD    = INSN(15,12);
   17704       UInt imm16 = (insn & 0xFFF) | ((insn >> 4) & 0x0000F000);
   17705       UInt isT   = (insn >> 22) & 1;
   17706       if (rD == 15) {
   17707          /* forget it */
   17708       } else {
   17709          if (isT) {
   17710             putIRegA(rD,
   17711                      binop(Iop_Or32,
   17712                            binop(Iop_And32, getIRegA(rD), mkU32(0xFFFF)),
   17713                            mkU32(imm16 << 16)),
   17714                      condT, Ijk_Boring);
   17715             DIP("movt%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
   17716             goto decode_success;
   17717          } else {
   17718             putIRegA(rD, mkU32(imm16), condT, Ijk_Boring);
   17719             DIP("movw%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
   17720             goto decode_success;
   17721          }
   17722       }
   17723       /* fall through */
   17724    }
   17725 
   17726    /* ----------- uxtb, sxtb, uxth, sxth, uxtb16, sxtb16 ----------- */
   17727    /* FIXME: this is an exact duplicate of the Thumb version.  They
   17728       should be commoned up. */
   17729    if (BITS8(0,1,1,0,1, 0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,0))
   17730        && BITS4(1,1,1,1) == INSN(19,16)
   17731        && BITS4(0,1,1,1) == INSN(7,4)
   17732        && BITS4(0,0, 0,0) == (INSN(11,8) & BITS4(0,0,1,1))) {
   17733       UInt subopc = INSN(27,20) & BITS8(0,0,0,0,0, 1,1,1);
   17734       if (subopc != BITS4(0,0,0,1) && subopc != BITS4(0,1,0,1)) {
   17735          Int    rot  = (INSN(11,8) >> 2) & 3;
   17736          UInt   rM   = INSN(3,0);
   17737          UInt   rD   = INSN(15,12);
   17738          IRTemp srcT = newTemp(Ity_I32);
   17739          IRTemp rotT = newTemp(Ity_I32);
   17740          IRTemp dstT = newTemp(Ity_I32);
   17741          const HChar* nm = "???";
   17742          assign(srcT, getIRegA(rM));
   17743          assign(rotT, genROR32(srcT, 8 * rot)); /* 0, 8, 16 or 24 only */
   17744          switch (subopc) {
   17745             case BITS4(0,1,1,0): // UXTB
   17746                assign(dstT, unop(Iop_8Uto32, unop(Iop_32to8, mkexpr(rotT))));
   17747                nm = "uxtb";
   17748                break;
   17749             case BITS4(0,0,1,0): // SXTB
   17750                assign(dstT, unop(Iop_8Sto32, unop(Iop_32to8, mkexpr(rotT))));
   17751                nm = "sxtb";
   17752                break;
   17753             case BITS4(0,1,1,1): // UXTH
   17754                assign(dstT, unop(Iop_16Uto32, unop(Iop_32to16, mkexpr(rotT))));
   17755                nm = "uxth";
   17756                break;
   17757             case BITS4(0,0,1,1): // SXTH
   17758                assign(dstT, unop(Iop_16Sto32, unop(Iop_32to16, mkexpr(rotT))));
   17759                nm = "sxth";
   17760                break;
   17761             case BITS4(0,1,0,0): // UXTB16
   17762                assign(dstT, binop(Iop_And32, mkexpr(rotT), mkU32(0x00FF00FF)));
   17763                nm = "uxtb16";
   17764                break;
   17765             case BITS4(0,0,0,0): { // SXTB16
   17766                IRTemp lo32 = newTemp(Ity_I32);
   17767                IRTemp hi32 = newTemp(Ity_I32);
   17768                assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
   17769                assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
   17770                assign(
   17771                   dstT,
   17772                   binop(Iop_Or32,
   17773                         binop(Iop_And32,
   17774                               unop(Iop_8Sto32,
   17775                                    unop(Iop_32to8, mkexpr(lo32))),
   17776                               mkU32(0xFFFF)),
   17777                         binop(Iop_Shl32,
   17778                               unop(Iop_8Sto32,
   17779                                    unop(Iop_32to8, mkexpr(hi32))),
   17780                               mkU8(16))
   17781                ));
   17782                nm = "sxtb16";
   17783                break;
   17784             }
   17785             default:
   17786                vassert(0); // guarded by "if" above
   17787          }
   17788          putIRegA(rD, mkexpr(dstT), condT, Ijk_Boring);
   17789          DIP("%s%s r%u, r%u, ROR #%d\n", nm, nCC(INSN_COND), rD, rM, rot);
   17790          goto decode_success;
   17791       }
   17792       /* fall through */
   17793    }
   17794 
   17795    /* ------------------- bfi, bfc ------------------- */
   17796    if (BITS8(0,1,1,1,1,1,0, 0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
   17797        && BITS4(0, 0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
   17798       UInt rD  = INSN(15,12);
   17799       UInt rN  = INSN(3,0);
   17800       UInt msb = (insn >> 16) & 0x1F; /* 20:16 */
   17801       UInt lsb = (insn >> 7) & 0x1F;  /* 11:7 */
   17802       if (rD == 15 || msb < lsb) {
   17803          /* undecodable; fall through */
   17804       } else {
   17805          IRTemp src    = newTemp(Ity_I32);
   17806          IRTemp olddst = newTemp(Ity_I32);
   17807          IRTemp newdst = newTemp(Ity_I32);
   17808          UInt   mask   = ((UInt)1) << (msb - lsb);
   17809          mask = (mask - 1) + mask;
   17810          vassert(mask != 0); // guaranteed by "msb < lsb" check above
   17811          mask <<= lsb;
   17812 
   17813          assign(src, rN == 15 ? mkU32(0) : getIRegA(rN));
   17814          assign(olddst, getIRegA(rD));
   17815          assign(newdst,
   17816                 binop(Iop_Or32,
   17817                    binop(Iop_And32,
   17818                          binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
   17819                          mkU32(mask)),
   17820                    binop(Iop_And32,
   17821                          mkexpr(olddst),
   17822                          mkU32(~mask)))
   17823                );
   17824 
   17825          putIRegA(rD, mkexpr(newdst), condT, Ijk_Boring);
   17826 
   17827          if (rN == 15) {
   17828             DIP("bfc%s r%u, #%u, #%u\n",
   17829                 nCC(INSN_COND), rD, lsb, msb-lsb+1);
   17830          } else {
   17831             DIP("bfi%s r%u, r%u, #%u, #%u\n",
   17832                 nCC(INSN_COND), rD, rN, lsb, msb-lsb+1);
   17833          }
   17834          goto decode_success;
   17835       }
   17836       /* fall through */
   17837    }
   17838 
   17839    /* ------------------- {u,s}bfx ------------------- */
   17840    if (BITS8(0,1,1,1,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
   17841        && BITS4(0,1,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
   17842       UInt rD  = INSN(15,12);
   17843       UInt rN  = INSN(3,0);
   17844       UInt wm1 = (insn >> 16) & 0x1F; /* 20:16 */
   17845       UInt lsb = (insn >> 7) & 0x1F;  /* 11:7 */
   17846       UInt msb = lsb + wm1;
   17847       UInt isU = (insn >> 22) & 1;    /* 22:22 */
   17848       if (rD == 15 || rN == 15 || msb >= 32) {
   17849          /* undecodable; fall through */
   17850       } else {
   17851          IRTemp src  = newTemp(Ity_I32);
   17852          IRTemp tmp  = newTemp(Ity_I32);
   17853          IRTemp res  = newTemp(Ity_I32);
   17854          UInt   mask = ((1 << wm1) - 1) + (1 << wm1);
   17855          vassert(msb >= 0 && msb <= 31);
   17856          vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
   17857 
   17858          assign(src, getIRegA(rN));
   17859          assign(tmp, binop(Iop_And32,
   17860                            binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
   17861                            mkU32(mask)));
   17862          assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
   17863                            binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
   17864                            mkU8(31-wm1)));
   17865 
   17866          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   17867 
   17868          DIP("%s%s r%u, r%u, #%u, #%u\n",
   17869              isU ? "ubfx" : "sbfx",
   17870              nCC(INSN_COND), rD, rN, lsb, wm1 + 1);
   17871          goto decode_success;
   17872       }
   17873       /* fall through */
   17874    }
   17875 
   17876    /* --------------------- Load/store doubleword ------------- */
   17877    // LDRD STRD
   17878    /*                 31   27   23   19 15 11   7    3     # highest bit
   17879                         28   24   20 16 12    8    4    0
   17880       A5-36   1 | 16  cond 0001 U100 Rn Rd im4h 11S1 im4l
   17881       A5-38   1 | 32  cond 0001 U000 Rn Rd 0000 11S1 Rm
   17882       A5-40   2 | 16  cond 0001 U110 Rn Rd im4h 11S1 im4l
   17883       A5-42   2 | 32  cond 0001 U010 Rn Rd 0000 11S1 Rm
   17884       A5-44   3 | 16  cond 0000 U100 Rn Rd im4h 11S1 im4l
   17885       A5-46   3 | 32  cond 0000 U000 Rn Rd 0000 11S1 Rm
   17886    */
   17887    /* case coding:
   17888              1   at-ea               (access at ea)
   17889              2   at-ea-then-upd      (access at ea, then Rn = ea)
   17890              3   at-Rn-then-upd      (access at Rn, then Rn = ea)
   17891       ea coding
   17892              16  Rn +/- imm8
   17893              32  Rn +/- Rm
   17894    */
   17895    /* Quickly skip over all of this for hopefully most instructions */
   17896    if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
   17897       goto after_load_store_doubleword;
   17898 
   17899    /* Check the "11S1" thing. */
   17900    if ((INSN(7,4) & BITS4(1,1,0,1)) != BITS4(1,1,0,1))
   17901       goto after_load_store_doubleword;
   17902 
   17903    summary = 0;
   17904 
   17905    /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,0,0)) {
   17906       summary = 1 | 16;
   17907    }
   17908    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,0,0)) {
   17909       summary = 1 | 32;
   17910    }
   17911    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,1,0)) {
   17912       summary = 2 | 16;
   17913    }
   17914    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,1,0)) {
   17915       summary = 2 | 32;
   17916    }
   17917    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(1,0,0)) {
   17918       summary = 3 | 16;
   17919    }
   17920    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(0,0,0)) {
   17921       summary = 3 | 32;
   17922    }
   17923    else goto after_load_store_doubleword;
   17924 
   17925    { UInt rN   = (insn >> 16) & 0xF; /* 19:16 */
   17926      UInt rD   = (insn >> 12) & 0xF; /* 15:12 */
   17927      UInt rM   = (insn >> 0)  & 0xF; /*  3:0  */
   17928      UInt bU   = (insn >> 23) & 1;   /* 23 U=1 offset+, U=0 offset- */
   17929      UInt bS   = (insn >> 5) & 1;    /* S=1 store, S=0 load */
   17930      UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
   17931 
   17932      /* Require rD to be an even numbered register */
   17933      if ((rD & 1) != 0)
   17934         goto after_load_store_doubleword;
   17935 
   17936      /* Require 11:8 == 0 for Rn +/- Rm cases */
   17937      if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
   17938         goto after_load_store_doubleword;
   17939 
   17940      /* Skip some invalid cases, which would lead to two competing
   17941         updates to the same register, or which are otherwise
   17942         disallowed by the spec. */
   17943      switch (summary) {
   17944         case 1 | 16:
   17945            break;
   17946         case 1 | 32:
   17947            if (rM == 15) goto after_load_store_doubleword;
   17948            break;
   17949         case 2 | 16: case 3 | 16:
   17950            if (rN == 15) goto after_load_store_doubleword;
   17951            if (bS == 0 && (rN == rD || rN == rD+1))
   17952               goto after_load_store_doubleword;
   17953            break;
   17954         case 2 | 32: case 3 | 32:
   17955            if (rM == 15) goto after_load_store_doubleword;
   17956            if (rN == 15) goto after_load_store_doubleword;
   17957            if (rN == rM) goto after_load_store_doubleword;
   17958            if (bS == 0 && (rN == rD || rN == rD+1))
   17959               goto after_load_store_doubleword;
   17960            break;
   17961         default:
   17962            vassert(0);
   17963      }
   17964 
   17965      /* If this is a branch, make it unconditional at this point.
   17966         Doing conditional branches in-line is too complex (for
   17967         now). */
   17968      vassert((rD & 1) == 0); /* from tests above */
   17969      if (bS == 0 && rD+1 == 15 && condT != IRTemp_INVALID) {
   17970         // go uncond
   17971         mk_skip_over_A32_if_cond_is_false( condT );
   17972         condT = IRTemp_INVALID;
   17973         // now uncond
   17974      }
   17975 
   17976      /* compute the effective address.  Bind it to a tmp since we
   17977         may need to use it twice. */
   17978      IRExpr* eaE = NULL;
   17979      switch (summary & 0xF0) {
   17980         case 16:
   17981            eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
   17982            break;
   17983         case 32:
   17984            eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
   17985            break;
   17986      }
   17987      vassert(eaE);
   17988      IRTemp eaT = newTemp(Ity_I32);
   17989      assign(eaT, eaE);
   17990 
   17991      /* get the old Rn value */
   17992      IRTemp rnT = newTemp(Ity_I32);
   17993      assign(rnT, getIRegA(rN));
   17994 
   17995      /* decide on the transfer address */
   17996      IRTemp taT = IRTemp_INVALID;
   17997      switch (summary & 0x0F) {
   17998         case 1: case 2: taT = eaT; break;
   17999         case 3:         taT = rnT; break;
   18000      }
   18001      vassert(taT != IRTemp_INVALID);
   18002 
   18003      /* XXX deal with alignment constraints */
   18004      /* XXX: but the A8 doesn't seem to trap for misaligned loads, so,
   18005         ignore alignment issues for the time being. */
   18006 
   18007      /* For almost all cases, we do the writeback after the transfers.
   18008         However, that leaves the stack "uncovered" in cases like:
   18009            strd    rD, [sp, #-8]
   18010            strd    rD, [sp, #-16]
   18011         In which case, do the writeback to SP now, instead of later.
   18012         This is bad in that it makes the insn non-restartable if the
   18013         accesses fault, but at least keeps Memcheck happy. */
   18014      Bool writeback_already_done = False;
   18015      if (bS == 1 /*store*/ && summary == (2 | 16)
   18016          && rN == 13 && rN != rD && rN != rD+1
   18017          && bU == 0/*minus*/
   18018          && (imm8 == 8 || imm8 == 16)) {
   18019         putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
   18020         writeback_already_done = True;
   18021      }
   18022 
   18023      /* doubleword store  S 1
   18024         doubleword load   S 0
   18025      */
   18026      const HChar* name = NULL;
   18027      /* generate the transfers */
   18028      if (bS == 1) { // doubleword store
   18029         storeGuardedLE( binop(Iop_Add32, mkexpr(taT), mkU32(0)),
   18030                         getIRegA(rD+0), condT );
   18031         storeGuardedLE( binop(Iop_Add32, mkexpr(taT), mkU32(4)),
   18032                         getIRegA(rD+1), condT );
   18033         name = "strd";
   18034      } else { // doubleword load
   18035         IRTemp oldRd0 = newTemp(Ity_I32);
   18036         IRTemp oldRd1 = newTemp(Ity_I32);
   18037         assign(oldRd0, llGetIReg(rD+0));
   18038         assign(oldRd1, llGetIReg(rD+1));
   18039         IRTemp newRd0 = newTemp(Ity_I32);
   18040         IRTemp newRd1 = newTemp(Ity_I32);
   18041         loadGuardedLE( newRd0, ILGop_Ident32,
   18042                        binop(Iop_Add32, mkexpr(taT), mkU32(0)),
   18043                        mkexpr(oldRd0), condT );
   18044         putIRegA( rD+0, mkexpr(newRd0), IRTemp_INVALID, Ijk_Boring );
   18045         loadGuardedLE( newRd1, ILGop_Ident32,
   18046                        binop(Iop_Add32, mkexpr(taT), mkU32(4)),
   18047                        mkexpr(oldRd1), condT );
   18048         putIRegA( rD+1, mkexpr(newRd1), IRTemp_INVALID, Ijk_Boring );
   18049         name = "ldrd";
   18050      }
   18051 
   18052      /* Update Rn if necessary. */
   18053      switch (summary & 0x0F) {
   18054         case 2: case 3:
   18055            // should be assured by logic above:
   18056            vassert(rN != 15); /* from checks above */
   18057            if (bS == 0) {
   18058               vassert(rD+0 != rN); /* since we just wrote rD+0 */
   18059               vassert(rD+1 != rN); /* since we just wrote rD+1 */
   18060            }
   18061            if (!writeback_already_done)
   18062               putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
   18063            break;
   18064      }
   18065 
   18066      switch (summary & 0x0F) {
   18067         case 1:  DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
   18068                  break;
   18069         case 2:  DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
   18070                      name, nCC(INSN_COND), rD, dis_buf);
   18071                  break;
   18072         case 3:  DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
   18073                      name, nCC(INSN_COND), rD, dis_buf);
   18074                  break;
   18075         default: vassert(0);
   18076      }
   18077 
   18078      goto decode_success;
   18079    }
   18080 
   18081   after_load_store_doubleword:
   18082 
   18083    /* ------------------- {s,u}xtab ------------- */
   18084    if (BITS8(0,1,1,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   18085        && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
   18086        && BITS4(0,1,1,1) == INSN(7,4)) {
   18087       UInt rN  = INSN(19,16);
   18088       UInt rD  = INSN(15,12);
   18089       UInt rM  = INSN(3,0);
   18090       UInt rot = (insn >> 10) & 3;
   18091       UInt isU = INSN(22,22);
   18092       if (rN == 15/*it's {S,U}XTB*/ || rD == 15 || rM == 15) {
   18093          /* undecodable; fall through */
   18094       } else {
   18095          IRTemp srcL = newTemp(Ity_I32);
   18096          IRTemp srcR = newTemp(Ity_I32);
   18097          IRTemp res  = newTemp(Ity_I32);
   18098          assign(srcR, getIRegA(rM));
   18099          assign(srcL, getIRegA(rN));
   18100          assign(res,  binop(Iop_Add32,
   18101                             mkexpr(srcL),
   18102                             unop(isU ? Iop_8Uto32 : Iop_8Sto32,
   18103                                  unop(Iop_32to8,
   18104                                       genROR32(srcR, 8 * rot)))));
   18105          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   18106          DIP("%cxtab%s r%u, r%u, r%u, ror #%u\n",
   18107              isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
   18108          goto decode_success;
   18109       }
   18110       /* fall through */
   18111    }
   18112 
   18113    /* ------------------- {s,u}xtah ------------- */
   18114    if (BITS8(0,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
   18115        && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
   18116        && BITS4(0,1,1,1) == INSN(7,4)) {
   18117       UInt rN  = INSN(19,16);
   18118       UInt rD  = INSN(15,12);
   18119       UInt rM  = INSN(3,0);
   18120       UInt rot = (insn >> 10) & 3;
   18121       UInt isU = INSN(22,22);
   18122       if (rN == 15/*it's {S,U}XTH*/ || rD == 15 || rM == 15) {
   18123          /* undecodable; fall through */
   18124       } else {
   18125          IRTemp srcL = newTemp(Ity_I32);
   18126          IRTemp srcR = newTemp(Ity_I32);
   18127          IRTemp res  = newTemp(Ity_I32);
   18128          assign(srcR, getIRegA(rM));
   18129          assign(srcL, getIRegA(rN));
   18130          assign(res,  binop(Iop_Add32,
   18131                             mkexpr(srcL),
   18132                             unop(isU ? Iop_16Uto32 : Iop_16Sto32,
   18133                                  unop(Iop_32to16,
   18134                                       genROR32(srcR, 8 * rot)))));
   18135          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   18136 
   18137          DIP("%cxtah%s r%u, r%u, r%u, ror #%u\n",
   18138              isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
   18139          goto decode_success;
   18140       }
   18141       /* fall through */
   18142    }
   18143 
   18144    /* ------------------- rev16, rev ------------------ */
   18145    if (INSN(27,16) == 0x6BF
   18146        && (INSN(11,4) == 0xFB/*rev16*/ || INSN(11,4) == 0xF3/*rev*/)) {
   18147       Bool isREV = INSN(11,4) == 0xF3;
   18148       UInt rM    = INSN(3,0);
   18149       UInt rD    = INSN(15,12);
   18150       if (rM != 15 && rD != 15) {
   18151          IRTemp rMt = newTemp(Ity_I32);
   18152          assign(rMt, getIRegA(rM));
   18153          IRTemp res = isREV ? gen_REV(rMt) : gen_REV16(rMt);
   18154          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   18155          DIP("rev%s%s r%u, r%u\n", isREV ? "" : "16",
   18156              nCC(INSN_COND), rD, rM);
   18157          goto decode_success;
   18158       }
   18159    }
   18160 
   18161    /* ------------------- revsh ----------------------- */
   18162    if (INSN(27,16) == 0x6FF && INSN(11,4) == 0xFB) {
   18163       UInt rM = INSN(3,0);
   18164       UInt rD = INSN(15,12);
   18165       if (rM != 15 && rD != 15) {
   18166          IRTemp irt_rM  = newTemp(Ity_I32);
   18167          IRTemp irt_hi  = newTemp(Ity_I32);
   18168          IRTemp irt_low = newTemp(Ity_I32);
   18169          IRTemp irt_res = newTemp(Ity_I32);
   18170          assign(irt_rM, getIRegA(rM));
   18171          assign(irt_hi,
   18172                 binop(Iop_Sar32,
   18173                       binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
   18174                       mkU8(16)
   18175                 )
   18176          );
   18177          assign(irt_low,
   18178                 binop(Iop_And32,
   18179                       binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
   18180                       mkU32(0xFF)
   18181                 )
   18182          );
   18183          assign(irt_res,
   18184                 binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
   18185          );
   18186          putIRegA(rD, mkexpr(irt_res), condT, Ijk_Boring);
   18187          DIP("revsh%s r%u, r%u\n", nCC(INSN_COND), rD, rM);
   18188          goto decode_success;
   18189       }
   18190    }
   18191 
   18192    /* ------------------- rbit ------------------ */
   18193    if (INSN(27,16) == 0x6FF && INSN(11,4) == 0xF3) {
   18194       UInt rD = INSN(15,12);
   18195       UInt rM = INSN(3,0);
   18196       if (rD != 15 && rM != 15) {
   18197          IRTemp arg = newTemp(Ity_I32);
   18198          assign(arg, getIRegA(rM));
   18199          IRTemp res = gen_BITREV(arg);
   18200          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
   18201          DIP("rbit r%u, r%u\n", rD, rM);
   18202          goto decode_success;
   18203       }
   18204    }
   18205 
   18206    /* ------------------- smmul ------------------ */
   18207    if (INSN(27,20) == BITS8(0,1,1,1,0,1,0,1)
   18208        && INSN(15,12) == BITS4(1,1,1,1)
   18209        && (INSN(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
   18210       UInt bitR = INSN(5,5);
   18211       UInt rD = INSN(19,16);
   18212       UInt rM = INSN(11,8);
   18213       UInt rN = INSN(3,0);
   18214       if (rD != 15 && rM != 15 && rN != 15) {
   18215          IRExpr* res
   18216          = unop(Iop_64HIto32,
   18217                 binop(Iop_Add64,
   18218                       binop(Iop_MullS32, getIRegA(rN), getIRegA(rM)),
   18219                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
   18220          putIRegA(rD, res, condT, Ijk_Boring);
   18221          DIP("smmul%s%s r%u, r%u, r%u\n",
   18222              nCC(INSN_COND), bitR ? "r" : "", rD, rN, rM);
   18223          goto decode_success;
   18224       }
   18225    }
   18226 
   18227    /* ------------------- smmla ------------------ */
   18228    if (INSN(27,20) == BITS8(0,1,1,1,0,1,0,1)
   18229        && INSN(15,12) != BITS4(1,1,1,1)
   18230        && (INSN(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
   18231       UInt bitR = INSN(5,5);
   18232       UInt rD = INSN(19,16);
   18233       UInt rA = INSN(15,12);
   18234       UInt rM = INSN(11,8);
   18235       UInt rN = INSN(3,0);
   18236       if (rD != 15 && rM != 15 && rN != 15) {
   18237          IRExpr* res
   18238          = unop(Iop_64HIto32,
   18239                 binop(Iop_Add64,
   18240                       binop(Iop_Add64,
   18241                             binop(Iop_32HLto64, getIRegA(rA), mkU32(0)),
   18242                             binop(Iop_MullS32, getIRegA(rN), getIRegA(rM))),
   18243                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
   18244          putIRegA(rD, res, condT, Ijk_Boring);
   18245          DIP("smmla%s%s r%u, r%u, r%u, r%u\n",
   18246              nCC(INSN_COND), bitR ? "r" : "", rD, rN, rM, rA);
   18247          goto decode_success;
   18248       }
   18249    }
   18250 
   18251    /* -------------- (A1) LDRT reg+/-#imm12 -------------- */
   18252    /* Load Register Unprivileged:
   18253       ldrt<c> Rt, [Rn] {, #+/-imm12}
   18254    */
   18255    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,0,1,1) ) {
   18256       UInt rT     = INSN(15,12);
   18257       UInt rN     = INSN(19,16);
   18258       UInt imm12  = INSN(11,0);
   18259       UInt bU     = INSN(23,23);
   18260       Bool valid  = True;
   18261       if (rT == 15 || rN == 15 || rN == rT) valid = False;
   18262       if (valid) {
   18263          IRTemp newRt = newTemp(Ity_I32);
   18264          loadGuardedLE( newRt,
   18265                         ILGop_Ident32, getIRegA(rN), getIRegA(rT), condT );
   18266          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   18267          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   18268                              getIRegA(rN), mkU32(imm12));
   18269          putIRegA(rN, erN, condT, Ijk_Boring);
   18270          DIP("ldrt%s r%u, [r%u], #%c%u\n",
   18271              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
   18272          goto decode_success;
   18273       }
   18274    }
   18275 
   18276    /* -------------- (A2) LDRT reg+/-reg with shift -------------- */
   18277    /* Load Register Unprivileged:
   18278       ldrt<c> Rt, [Rn], +/-Rm{, shift}
   18279    */
   18280    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,0,1,1)
   18281         && INSN(4,4) == 0 ) {
   18282       UInt rT     = INSN(15,12);
   18283       UInt rN     = INSN(19,16);
   18284       UInt rM     = INSN(3,0);
   18285       UInt imm5   = INSN(11,7);
   18286       UInt bU     = INSN(23,23);
   18287       UInt type   = INSN(6,5);
   18288       Bool valid  = True;
   18289       if (rT == 15 || rN == 15 || rN == rT || rM == 15
   18290           /* || (ArchVersion() < 6 && rM == rN) */)
   18291          valid = False;
   18292       if (valid) {
   18293          IRTemp newRt = newTemp(Ity_I32);
   18294          loadGuardedLE( newRt,
   18295                         ILGop_Ident32, getIRegA(rN), getIRegA(rT), condT );
   18296          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   18297          // dis_buf generated is slightly bogus, in fact.
   18298          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
   18299                                                        type, imm5, dis_buf);
   18300          putIRegA(rN, erN, condT, Ijk_Boring);
   18301          DIP("ldrt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
   18302          goto decode_success;
   18303       }
   18304    }
   18305 
   18306    /* -------------- (A1) LDRBT reg+/-#imm12 -------------- */
   18307    /* Load Register Byte Unprivileged:
   18308       ldrbt<c> Rt, [Rn], #+/-imm12
   18309    */
   18310    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,1,1,1) ) {
   18311       UInt rT     = INSN(15,12);
   18312       UInt rN     = INSN(19,16);
   18313       UInt imm12  = INSN(11,0);
   18314       UInt bU     = INSN(23,23);
   18315       Bool valid  = True;
   18316       if (rT == 15 || rN == 15 || rN == rT) valid = False;
   18317       if (valid) {
   18318          IRTemp newRt = newTemp(Ity_I32);
   18319          loadGuardedLE( newRt,
   18320                         ILGop_8Uto32, getIRegA(rN), getIRegA(rT), condT );
   18321          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   18322          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   18323                              getIRegA(rN), mkU32(imm12));
   18324          putIRegA(rN, erN, condT, Ijk_Boring);
   18325          DIP("ldrbt%s r%u, [r%u], #%c%u\n",
   18326              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
   18327          goto decode_success;
   18328       }
   18329    }
   18330 
   18331    /* -------------- (A2) LDRBT reg+/-reg with shift -------------- */
   18332    /* Load Register Byte Unprivileged:
   18333       ldrbt<c> Rt, [Rn], +/-Rm{, shift}
   18334    */
   18335    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,1,1,1)
   18336         && INSN(4,4) == 0 ) {
   18337       UInt rT     = INSN(15,12);
   18338       UInt rN     = INSN(19,16);
   18339       UInt rM     = INSN(3,0);
   18340       UInt imm5   = INSN(11,7);
   18341       UInt bU     = INSN(23,23);
   18342       UInt type   = INSN(6,5);
   18343       Bool valid  = True;
   18344       if (rT == 15 || rN == 15 || rN == rT || rM == 15
   18345           /* || (ArchVersion() < 6 && rM == rN) */)
   18346          valid = False;
   18347       if (valid) {
   18348          IRTemp newRt = newTemp(Ity_I32);
   18349          loadGuardedLE( newRt,
   18350                         ILGop_8Uto32, getIRegA(rN), getIRegA(rT), condT );
   18351          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   18352          // dis_buf generated is slightly bogus, in fact.
   18353          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
   18354                                                        type, imm5, dis_buf);
   18355          putIRegA(rN, erN, condT, Ijk_Boring);
   18356          DIP("ldrbt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
   18357          goto decode_success;
   18358       }
   18359    }
   18360 
   18361    /* -------------- (A1) LDRHT reg+#imm8 -------------- */
   18362    /* Load Register Halfword Unprivileged:
   18363       ldrht<c> Rt, [Rn] {, #+/-imm8}
   18364    */
   18365    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
   18366        && INSN(7,4) == BITS4(1,0,1,1) ) {
   18367       UInt rT    = INSN(15,12);
   18368       UInt rN    = INSN(19,16);
   18369       UInt bU    = INSN(23,23);
   18370       UInt imm4H = INSN(11,8);
   18371       UInt imm4L = INSN(3,0);
   18372       UInt imm8  = (imm4H << 4) | imm4L;
   18373       Bool valid = True;
   18374       if (rT == 15 || rN == 15 || rN == rT)
   18375          valid = False;
   18376       if (valid) {
   18377          IRTemp newRt = newTemp(Ity_I32);
   18378          loadGuardedLE( newRt,
   18379                         ILGop_16Uto32, getIRegA(rN), getIRegA(rT), condT );
   18380          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   18381          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   18382                              getIRegA(rN), mkU32(imm8));
   18383          putIRegA(rN, erN, condT, Ijk_Boring);
   18384          DIP("ldrht%s r%u, [r%u], #%c%u\n",
   18385              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
   18386          goto decode_success;
   18387       }
   18388    }
   18389 
   18390    /* -------------- (A2) LDRHT reg+/-reg -------------- */
   18391    /* Load Register Halfword Unprivileged:
   18392       ldrht<c> Rt, [Rn], +/-Rm
   18393    */
   18394    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
   18395        && INSN(11,4) == BITS8(0,0,0,0,1,0,1,1) ) {
   18396       UInt rT    = INSN(15,12);
   18397       UInt rN    = INSN(19,16);
   18398       UInt rM    = INSN(3,0);
   18399       UInt bU    = INSN(23,23);
   18400       Bool valid = True;
   18401       if (rT == 15 || rN == 15 || rN == rT || rM == 15)
   18402          valid = False;
   18403       if (valid) {
   18404          IRTemp newRt = newTemp(Ity_I32);
   18405          loadGuardedLE( newRt,
   18406                         ILGop_16Uto32, getIRegA(rN), getIRegA(rT), condT );
   18407          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   18408          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   18409                              getIRegA(rN), getIRegA(rM));
   18410          putIRegA(rN, erN, condT, Ijk_Boring);
   18411          DIP("ldrht%s r%u, [r%u], %cr%u\n",
   18412              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
   18413          goto decode_success;
   18414       }
   18415    }
   18416 
   18417    /* -------------- (A1) LDRSHT reg+#imm8 -------------- */
   18418    /* Load Register Signed Halfword Unprivileged:
   18419       ldrsht<c> Rt, [Rn] {, #+/-imm8}
   18420    */
   18421    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
   18422        && INSN(7,4) == BITS4(1,1,1,1)) {
   18423       UInt rT    = INSN(15,12);
   18424       UInt rN    = INSN(19,16);
   18425       UInt bU    = INSN(23,23);
   18426       UInt imm4H = INSN(11,8);
   18427       UInt imm4L = INSN(3,0);
   18428       UInt imm8  = (imm4H << 4) | imm4L;
   18429       Bool valid = True;
   18430       if (rN == 15 || rT == 15 || rN == rT)
   18431          valid = False;
   18432       if (valid) {
   18433          IRTemp newRt = newTemp(Ity_I32);
   18434          loadGuardedLE( newRt,
   18435                         ILGop_16Sto32, getIRegA(rN), getIRegA(rT), condT );
   18436          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   18437          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   18438                              getIRegA(rN), mkU32(imm8));
   18439          putIRegA(rN, erN, condT, Ijk_Boring);
   18440          DIP("ldrsht%s r%u, [r%u], #%c%u\n",
   18441              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
   18442          goto decode_success;
   18443       }
   18444    }
   18445 
   18446    /* -------------- (A2) LDRSHT reg+/-reg -------------- */
   18447    /* Load Register Signed Halfword Unprivileged:
   18448       ldrsht<c> Rt, [Rn], +/-Rm
   18449    */
   18450    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
   18451        && INSN(11,4) == BITS8(0,0,0,0,1,1,1,1)) {
   18452       UInt rT    = INSN(15,12);
   18453       UInt rN    = INSN(19,16);
   18454       UInt rM    = INSN(3,0);
   18455       UInt bU    = INSN(23,23);
   18456       Bool valid = True;
   18457       if (rN == 15 || rT == 15 || rN == rT || rM == 15)
   18458          valid = False;
   18459       if (valid) {
   18460          IRTemp newRt = newTemp(Ity_I32);
   18461          loadGuardedLE( newRt,
   18462                         ILGop_16Sto32, getIRegA(rN), getIRegA(rT), condT );
   18463          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   18464          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   18465                              getIRegA(rN), getIRegA(rM));
   18466          putIRegA(rN, erN, condT, Ijk_Boring);
   18467          DIP("ldrsht%s r%u, [r%u], %cr%u\n",
   18468              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
   18469          goto decode_success;
   18470       }
   18471    }
   18472 
   18473    /* -------------- (A1) LDRSBT reg+#imm8 -------------- */
   18474    /* Load Register Signed Byte Unprivileged:
   18475       ldrsbt<c> Rt, [Rn] {, #+/-imm8}
   18476    */
   18477    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
   18478        && INSN(7,4) == BITS4(1,1,0,1)) {
   18479       UInt rT    = INSN(15,12);
   18480       UInt rN    = INSN(19,16);
   18481       UInt bU    = INSN(23,23);
   18482       UInt imm4H = INSN(11,8);
   18483       UInt imm4L = INSN(3,0);
   18484       UInt imm8  = (imm4H << 4) | imm4L;
   18485       Bool valid = True;
   18486       if (rT == 15 || rN == 15 || rN == rT)
   18487          valid = False;
   18488       if (valid) {
   18489          IRTemp newRt = newTemp(Ity_I32);
   18490          loadGuardedLE( newRt,
   18491                         ILGop_8Sto32, getIRegA(rN), getIRegA(rT), condT );
   18492          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   18493          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   18494                              getIRegA(rN), mkU32(imm8));
   18495          putIRegA(rN, erN, condT, Ijk_Boring);
   18496          DIP("ldrsbt%s r%u, [r%u], #%c%u\n",
   18497              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
   18498          goto decode_success;
   18499       }
   18500    }
   18501 
   18502    /* -------------- (A2) LDRSBT reg+/-reg -------------- */
   18503    /* Load Register Signed Byte Unprivileged:
   18504       ldrsbt<c> Rt, [Rn], +/-Rm
   18505    */
   18506    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
   18507        && INSN(11,4) == BITS8(0,0,0,0,1,1,0,1)) {
   18508       UInt rT    = INSN(15,12);
   18509       UInt rN    = INSN(19,16);
   18510       UInt bU    = INSN(23,23);
   18511       UInt rM    = INSN(3,0);
   18512       Bool valid = True;
   18513       if (rT == 15 || rN == 15 || rN == rT || rM == 15)
   18514          valid = False;
   18515       if (valid) {
   18516          IRTemp newRt = newTemp(Ity_I32);
   18517          loadGuardedLE( newRt,
   18518                         ILGop_8Sto32, getIRegA(rN), getIRegA(rT), condT );
   18519          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
   18520          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
   18521                              getIRegA(rN), getIRegA(rM));
   18522          putIRegA(rN, erN, condT, Ijk_Boring);
   18523          DIP("ldrsbt%s r%u, [r%u], %cr%u\n",
   18524              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
   18525          goto decode_success;
   18526       }
   18527    }
   18528 
   18529    /* -------------- (A1) STRBT reg+#imm12 -------------- */
   18530    /* Store Register Byte Unprivileged:
   18531       strbt<c> Rt, [Rn], #+/-imm12
   18532    */
   18533    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,1,1,0) ) {
   18534       UInt rT     = INSN(15,12);
   18535       UInt rN     = INSN(19,16);
   18536       UInt imm12  = INSN(11,0);
   18537       UInt bU     = INSN(23,23);
   18538       Bool valid = True;
   18539       if (rT == 15 || rN == 15 || rN == rT) valid = False;
   18540       if (valid) {
   18541          IRExpr* address = getIRegA(rN);
   18542          IRExpr* data = unop(Iop_32to8, getIRegA(rT));
   18543          storeGuardedLE( address, data, condT);
   18544          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
   18545                                getIRegA(rN), mkU32(imm12));
   18546          putIRegA(rN, newRn, condT, Ijk_Boring);
   18547          DIP("strbt%s r%u, [r%u], #%c%u\n",
   18548              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
   18549          goto decode_success;
   18550       }
   18551    }
   18552 
   18553    /* -------------- (A2) STRBT reg+/-reg -------------- */
   18554    /* Store Register Byte Unprivileged:
   18555       strbt<c> Rt, [Rn], +/-Rm{, shift}
   18556    */
   18557    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,1,1,0)
   18558        && INSN(4,4) == 0) {
   18559       UInt rT     = INSN(15,12);
   18560       UInt rN     = INSN(19,16);
   18561       UInt imm5   = INSN(11,7);
   18562       UInt type   = INSN(6,5);
   18563       UInt rM     = INSN(3,0);
   18564       UInt bU     = INSN(23,23);
   18565       Bool valid  = True;
   18566       if (rT == 15 || rN == 15 || rN == rT || rM == 15) valid = False;
   18567       if (valid) {
   18568          IRExpr* address = getIRegA(rN);
   18569          IRExpr* data = unop(Iop_32to8, getIRegA(rT));
   18570          storeGuardedLE( address, data, condT);
   18571          // dis_buf generated is slightly bogus, in fact.
   18572          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
   18573                                                        type, imm5, dis_buf);
   18574          putIRegA(rN, erN, condT, Ijk_Boring);
   18575          DIP("strbt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
   18576          goto decode_success;
   18577       }
   18578    }
   18579 
   18580    /* -------------- (A1) STRHT reg+#imm8 -------------- */
   18581    /* Store Register Halfword Unprivileged:
   18582       strht<c> Rt, [Rn], #+/-imm8
   18583    */
   18584    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,0)
   18585        && INSN(7,4) == BITS4(1,0,1,1) ) {
   18586       UInt rT    = INSN(15,12);
   18587       UInt rN    = INSN(19,16);
   18588       UInt imm4H = INSN(11,8);
   18589       UInt imm4L = INSN(3,0);
   18590       UInt imm8  = (imm4H << 4) | imm4L;
   18591       UInt bU    = INSN(23,23);
   18592       Bool valid = True;
   18593       if (rT == 15 || rN == 15 || rN == rT) valid = False;
   18594       if (valid) {
   18595          IRExpr* address = getIRegA(rN);
   18596          IRExpr* data = unop(Iop_32to16, getIRegA(rT));
   18597          storeGuardedLE( address, data, condT);
   18598          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
   18599                                getIRegA(rN), mkU32(imm8));
   18600          putIRegA(rN, newRn, condT, Ijk_Boring);
   18601          DIP("strht%s r%u, [r%u], #%c%u\n",
   18602              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
   18603          goto decode_success;
   18604       }
   18605    }
   18606 
   18607    /* -------------- (A2) STRHT reg+reg -------------- */
   18608    /* Store Register Halfword Unprivileged:
   18609       strht<c> Rt, [Rn], +/-Rm
   18610    */
   18611    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,0)
   18612        && INSN(11,4) == BITS8(0,0,0,0,1,0,1,1) ) {
   18613       UInt rT    = INSN(15,12);
   18614       UInt rN    = INSN(19,16);
   18615       UInt rM    = INSN(3,0);
   18616       UInt bU    = INSN(23,23);
   18617       Bool valid = True;
   18618       if (rT == 15 || rN == 15 || rN == rT || rM == 15) valid = False;
   18619       if (valid) {
   18620          IRExpr* address = getIRegA(rN);
   18621          IRExpr* data = unop(Iop_32to16, getIRegA(rT));
   18622          storeGuardedLE( address, data, condT);
   18623          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
   18624                                getIRegA(rN), getIRegA(rM));
   18625          putIRegA(rN, newRn, condT, Ijk_Boring);
   18626          DIP("strht%s r%u, [r%u], %cr%u\n",
   18627              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
   18628          goto decode_success;
   18629       }
   18630    }
   18631 
   18632    /* -------------- (A1) STRT reg+imm12 -------------- */
   18633    /* Store Register Unprivileged:
   18634       strt<c> Rt, [Rn], #+/-imm12
   18635    */
   18636    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,0,1,0) ) {
   18637       UInt rT    = INSN(15,12);
   18638       UInt rN    = INSN(19,16);
   18639       UInt imm12 = INSN(11,0);
   18640       UInt bU    = INSN(23,23);
   18641       Bool valid = True;
   18642       if (rN == 15 || rN == rT) valid = False;
   18643       if (valid) {
   18644          IRExpr* address = getIRegA(rN);
   18645          storeGuardedLE( address, getIRegA(rT), condT);
   18646          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
   18647                                getIRegA(rN), mkU32(imm12));
   18648          putIRegA(rN, newRn, condT, Ijk_Boring);
   18649          DIP("strt%s r%u, [r%u], %c%u\n",
   18650              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
   18651          goto decode_success;
   18652       }
   18653    }
   18654 
   18655    /* -------------- (A2) STRT reg+reg -------------- */
   18656    /* Store Register Unprivileged:
   18657       strt<c> Rt, [Rn], +/-Rm{, shift}
   18658    */
   18659    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,0,1,0)
   18660        && INSN(4,4) == 0 ) {
   18661       UInt rT    = INSN(15,12);
   18662       UInt rN    = INSN(19,16);
   18663       UInt rM    = INSN(3,0);
   18664       UInt type  = INSN(6,5);
   18665       UInt imm5  = INSN(11,7);
   18666       UInt bU    = INSN(23,23);
   18667       Bool valid = True;
   18668       if (rN == 15 || rN == rT || rM == 15) valid = False;
   18669       /* FIXME We didn't do:
   18670          if ArchVersion() < 6 && rM == rN then UNPREDICTABLE */
   18671       if (valid) {
   18672          storeGuardedLE( getIRegA(rN), getIRegA(rT), condT);
   18673          // dis_buf generated is slightly bogus, in fact.
   18674          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
   18675                                                        type, imm5, dis_buf);
   18676          putIRegA(rN, erN, condT, Ijk_Boring);
   18677          DIP("strt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
   18678          goto decode_success;
   18679       }
   18680    }
   18681 
   18682    /* ----------------------------------------------------------- */
   18683    /* -- ARMv7 instructions                                    -- */
   18684    /* ----------------------------------------------------------- */
   18685 
   18686    /* -------------- read CP15 TPIDRURO register ------------- */
   18687    /* mrc     p15, 0, r0,  c13, c0, 3  up to
   18688       mrc     p15, 0, r14, c13, c0, 3
   18689    */
   18690    /* I don't know whether this is really v7-only.  But anyway, we
   18691       have to support it since arm-linux uses TPIDRURO as a thread
   18692       state register. */
   18693    if (0x0E1D0F70 == (insn & 0x0FFF0FFF)) {
   18694       UInt rD = INSN(15,12);
   18695       if (rD <= 14) {
   18696          /* skip r15, that's too stupid to handle */
   18697          putIRegA(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32),
   18698                       condT, Ijk_Boring);
   18699          DIP("mrc%s p15,0, r%u, c13, c0, 3\n", nCC(INSN_COND), rD);
   18700          goto decode_success;
   18701       }
   18702       /* fall through */
   18703    }
   18704 
   18705    /* -------------- read CP15 PMUSRENR register ------------- */
   18706    /* mrc     p15, 0, r0,  c9, c14, 0  up to
   18707       mrc     p15, 0, r14, c9, c14, 0
   18708    */
   18709    /* A program reading this register is really asking "which
   18710       performance monitoring registes are available in user space?
   18711       The simple answer here is to return zero, meaning "none".  See
   18712       #345984. */
   18713    if (0x0E190F1E == (insn & 0x0FFF0FFF)) {
   18714       UInt rD = INSN(15,12);
   18715       if (rD <= 14) {
   18716          /* skip r15, that's too stupid to handle */
   18717          putIRegA(rD, mkU32(0), condT, Ijk_Boring);
   18718          DIP("mrc%s p15,0, r%u, c9, c14, 0\n", nCC(INSN_COND), rD);
   18719          goto decode_success;
   18720       }
   18721       /* fall through */
   18722    }
   18723 
   18724    /* Handle various kinds of barriers.  This is rather indiscriminate
   18725       in the sense that they are all turned into an IR Fence, which
   18726       means we don't know which they are, so the back end has to
   18727       re-emit them all when it comes acrosss an IR Fence.
   18728    */
   18729    /* v6 */ /* mcr 15, 0, rT, c7, c10, 5 */
   18730    if (0xEE070FBA == (insn & 0xFFFF0FFF)) {
   18731       UInt rT = INSN(15,12);
   18732       if (rT <= 14) {
   18733          /* mcr 15, 0, rT, c7, c10, 5 (v6) equiv to DMB (v7).  Data
   18734             Memory Barrier -- ensures ordering of memory accesses. */
   18735          stmt( IRStmt_MBE(Imbe_Fence) );
   18736          DIP("mcr 15, 0, r%u, c7, c10, 5 (data memory barrier)\n", rT);
   18737          goto decode_success;
   18738       }
   18739       /* fall through */
   18740    }
   18741    /* other flavours of barrier */
   18742    switch (insn) {
   18743       case 0xEE070F9A: /* v6 */
   18744          /* mcr 15, 0, r0, c7, c10, 4 (v6) equiv to DSB (v7).  Data
   18745             Synch Barrier -- ensures completion of memory accesses. */
   18746          stmt( IRStmt_MBE(Imbe_Fence) );
   18747          DIP("mcr 15, 0, r0, c7, c10, 4 (data synch barrier)\n");
   18748          goto decode_success;
   18749       case 0xEE070F95: /* v6 */
   18750          /* mcr 15, 0, r0, c7, c5, 4 (v6) equiv to ISB (v7).
   18751             Instruction Synchronisation Barrier (or Flush Prefetch
   18752             Buffer) -- a pipe flush, I think.  I suspect we could
   18753             ignore those, but to be on the safe side emit a fence
   18754             anyway. */
   18755          stmt( IRStmt_MBE(Imbe_Fence) );
   18756          DIP("mcr 15, 0, r0, c7, c5, 4 (insn synch barrier)\n");
   18757          goto decode_success;
   18758       default:
   18759          break;
   18760    }
   18761 
   18762    /* ----------------------------------------------------------- */
   18763    /* -- Hints                                                 -- */
   18764    /* ----------------------------------------------------------- */
   18765 
   18766    switch (insn & 0x0FFFFFFF) {
   18767       /* ------------------- NOP ------------------ */
   18768       case 0x0320F000:
   18769          DIP("nop%s\n", nCC(INSN_COND));
   18770          goto decode_success;
   18771       /* ------------------- YIELD ------------------ */
   18772       case 0x0320F001:
   18773          /* Continue after conditionally yielding. */
   18774          DIP("yield%s\n", nCC(INSN_COND));
   18775          stmt( IRStmt_Exit( unop(Iop_32to1,
   18776                                  condT == IRTemp_INVALID
   18777                                     ? mkU32(1) : mkexpr(condT)),
   18778                             Ijk_Yield,
   18779                             IRConst_U32(guest_R15_curr_instr_notENC + 4),
   18780                             OFFB_R15T ));
   18781          goto decode_success;
   18782       default:
   18783          break;
   18784    }
   18785 
   18786    /* ----------------------------------------------------------- */
   18787    /* -- VFP (CP 10, CP 11) instructions (in ARM mode)         -- */
   18788    /* ----------------------------------------------------------- */
   18789 
   18790    if (INSN_COND != ARMCondNV) {
   18791       Bool ok_vfp = decode_CP10_CP11_instruction (
   18792                        &dres, INSN(27,0), condT, INSN_COND,
   18793                        False/*!isT*/
   18794                     );
   18795       if (ok_vfp)
   18796          goto decode_success;
   18797    }
   18798 
   18799    /* ----------------------------------------------------------- */
   18800    /* -- NEON instructions (in ARM mode)                       -- */
   18801    /* ----------------------------------------------------------- */
   18802 
   18803    /* These are all in NV space, and so are taken care of (far) above,
   18804       by a call from this function to
   18805       decode_NV_instruction_ARMv7_and_below(). */
   18806 
   18807    /* ----------------------------------------------------------- */
   18808    /* -- v6 media instructions (in ARM mode)                   -- */
   18809    /* ----------------------------------------------------------- */
   18810 
   18811    { Bool ok_v6m = decode_V6MEDIA_instruction(
   18812                        &dres, INSN(27,0), condT, INSN_COND,
   18813                        False/*!isT*/
   18814                    );
   18815      if (ok_v6m)
   18816         goto decode_success;
   18817    }
   18818 
   18819    /* ----------------------------------------------------------- */
   18820    /* -- v8 instructions (in ARM mode)                         -- */
   18821    /* ----------------------------------------------------------- */
   18822 
   18823   after_v7_decoder:
   18824 
   18825    /* If we get here, it means that all attempts to decode the
   18826       instruction as ARMv7 or earlier have failed.  So, if we're doing
   18827       ARMv8 or later, here is the point to try for it. */
   18828 
   18829    if (VEX_ARM_ARCHLEVEL(archinfo->hwcaps) >= 8) {
   18830       Bool ok_v8
   18831          = decode_V8_instruction( &dres, insn, condT, False/*!isT*/,
   18832                                   IRTemp_INVALID, IRTemp_INVALID );
   18833       if (ok_v8)
   18834          goto decode_success;
   18835    }
   18836 
   18837    /* ----------------------------------------------------------- */
   18838    /* -- Undecodable                                           -- */
   18839    /* ----------------------------------------------------------- */
   18840 
   18841    goto decode_failure;
   18842    /*NOTREACHED*/
   18843 
   18844   decode_failure:
   18845    /* All decode failures end up here. */
   18846    if (sigill_diag) {
   18847       vex_printf("disInstr(arm): unhandled instruction: "
   18848                  "0x%x\n", insn);
   18849       vex_printf("                 cond=%d(0x%x) 27:20=%d(0x%02x) "
   18850                                    "4:4=%d "
   18851                                    "3:0=%d(0x%x)\n",
   18852                  (Int)INSN_COND, (UInt)INSN_COND,
   18853                  (Int)INSN(27,20), (UInt)INSN(27,20),
   18854                  (Int)INSN(4,4),
   18855                  (Int)INSN(3,0), (UInt)INSN(3,0) );
   18856    }
   18857 
   18858    /* Tell the dispatcher that this insn cannot be decoded, and so has
   18859       not been executed, and (is currently) the next to be executed.
   18860       R15 should be up-to-date since it made so at the start of each
   18861       insn, but nevertheless be paranoid and update it again right
   18862       now. */
   18863    vassert(0 == (guest_R15_curr_instr_notENC & 3));
   18864    llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC) );
   18865    dres.len         = 0;
   18866    dres.whatNext    = Dis_StopHere;
   18867    dres.jk_StopHere = Ijk_NoDecode;
   18868    dres.continueAt  = 0;
   18869    return dres;
   18870 
   18871   decode_success:
   18872    /* All decode successes end up here. */
   18873    DIP("\n");
   18874 
   18875    vassert(dres.len == 4 || dres.len == 20);
   18876 
   18877    /* Now then.  Do we have an implicit jump to r15 to deal with? */
   18878    if (r15written) {
   18879       /* If we get jump to deal with, we assume that there's been no
   18880          other competing branch stuff previously generated for this
   18881          insn.  That's reasonable, in the sense that the ARM insn set
   18882          appears to declare as "Unpredictable" any instruction which
   18883          generates more than one possible new value for r15.  Hence
   18884          just assert.  The decoders themselves should check against
   18885          all such instructions which are thusly Unpredictable, and
   18886          decline to decode them.  Hence we should never get here if we
   18887          have competing new values for r15, and hence it is safe to
   18888          assert here. */
   18889       vassert(dres.whatNext == Dis_Continue);
   18890       vassert(irsb->next == NULL);
   18891       vassert(irsb->jumpkind == Ijk_Boring);
   18892       /* If r15 is unconditionally written, terminate the block by
   18893          jumping to it.  If it's conditionally written, still
   18894          terminate the block (a shame, but we can't do side exits to
   18895          arbitrary destinations), but first jump to the next
   18896          instruction if the condition doesn't hold. */
   18897       /* We can't use getIReg(15) to get the destination, since that
   18898          will produce r15+8, which isn't what we want.  Must use
   18899          llGetIReg(15) instead. */
   18900       if (r15guard == IRTemp_INVALID) {
   18901          /* unconditional */
   18902       } else {
   18903          /* conditional */
   18904          stmt( IRStmt_Exit(
   18905                   unop(Iop_32to1,
   18906                        binop(Iop_Xor32,
   18907                              mkexpr(r15guard), mkU32(1))),
   18908                   r15kind,
   18909                   IRConst_U32(guest_R15_curr_instr_notENC + 4),
   18910                   OFFB_R15T
   18911          ));
   18912       }
   18913       /* This seems crazy, but we're required to finish the insn with
   18914          a write to the guest PC.  As usual we rely on ir_opt to tidy
   18915          up later. */
   18916       llPutIReg(15, llGetIReg(15));
   18917       dres.whatNext    = Dis_StopHere;
   18918       dres.jk_StopHere = r15kind;
   18919    } else {
   18920       /* Set up the end-state in the normal way. */
   18921       switch (dres.whatNext) {
   18922          case Dis_Continue:
   18923             llPutIReg(15, mkU32(dres.len + guest_R15_curr_instr_notENC));
   18924             break;
   18925          case Dis_ResteerU:
   18926          case Dis_ResteerC:
   18927             llPutIReg(15, mkU32(dres.continueAt));
   18928             break;
   18929          case Dis_StopHere:
   18930             break;
   18931          default:
   18932             vassert(0);
   18933       }
   18934    }
   18935 
   18936    return dres;
   18937 
   18938 #  undef INSN_COND
   18939 #  undef INSN
   18940 }
   18941 
   18942 
   18943 /*------------------------------------------------------------*/
   18944 /*--- Disassemble a single Thumb2 instruction              ---*/
   18945 /*------------------------------------------------------------*/
   18946 
   18947 static const UChar it_length_table[256]; /* fwds */
   18948 
   18949 /* NB: in Thumb mode we do fetches of regs with getIRegT, which
   18950    automagically adds 4 to fetches of r15.  However, writes to regs
   18951    are done with putIRegT, which disallows writes to r15.  Hence any
   18952    r15 writes and associated jumps have to be done "by hand". */
   18953 
   18954 /* Disassemble a single Thumb instruction into IR.  The instruction is
   18955    located in host memory at guest_instr, and has (decoded) guest IP
   18956    of guest_R15_curr_instr_notENC, which will have been set before the
   18957    call here. */
   18958 
   18959 static
   18960 DisResult disInstr_THUMB_WRK (
   18961              Bool         (*resteerOkFn) ( /*opaque*/void*, Addr ),
   18962              Bool         resteerCisOk,
   18963              void*        callback_opaque,
   18964              const UChar* guest_instr,
   18965              const VexArchInfo* archinfo,
   18966              const VexAbiInfo*  abiinfo,
   18967              Bool         sigill_diag
   18968           )
   18969 {
   18970    /* A macro to fish bits out of insn0.  There's also INSN1, to fish
   18971       bits out of insn1, but that's defined only after the end of the
   18972       16-bit insn decoder, so as to stop it mistakenly being used
   18973       therein. */
   18974 #  define INSN0(_bMax,_bMin)  SLICE_UInt(((UInt)insn0), (_bMax), (_bMin))
   18975 
   18976    DisResult dres;
   18977    UShort    insn0; /*  first 16 bits of the insn */
   18978    UShort    insn1; /* second 16 bits of the insn */
   18979    HChar     dis_buf[128];  // big enough to hold LDMIA etc text
   18980 
   18981    /* Summary result of the ITxxx backwards analysis: False == safe
   18982       but suboptimal. */
   18983    Bool guaranteedUnconditional = False;
   18984 
   18985    /* Set result defaults. */
   18986    dres.whatNext    = Dis_Continue;
   18987    dres.len         = 2;
   18988    dres.continueAt  = 0;
   18989    dres.jk_StopHere = Ijk_INVALID;
   18990    dres.hint        = Dis_HintNone;
   18991 
   18992    /* Set default actions for post-insn handling of writes to r15, if
   18993       required. */
   18994    r15written = False;
   18995    r15guard   = IRTemp_INVALID; /* unconditional */
   18996    r15kind    = Ijk_Boring;
   18997 
   18998    /* Insns could be 2 or 4 bytes long.  Just get the first 16 bits at
   18999       this point.  If we need the second 16, get them later.  We can't
   19000       get them both out immediately because it risks a fault (very
   19001       unlikely, but ..) if the second 16 bits aren't actually
   19002       necessary. */
   19003    insn0 = getUShortLittleEndianly( guest_instr );
   19004    insn1 = 0; /* We'll get it later, once we know we need it. */
   19005 
   19006    /* Similarly, will set this later. */
   19007    IRTemp old_itstate = IRTemp_INVALID;
   19008 
   19009    if (0) vex_printf("insn: 0x%x\n", insn0);
   19010 
   19011    DIP("\t(thumb) 0x%x:  ", (UInt)guest_R15_curr_instr_notENC);
   19012 
   19013    vassert(0 == (guest_R15_curr_instr_notENC & 1));
   19014 
   19015    /* ----------------------------------------------------------- */
   19016    /* Spot "Special" instructions (see comment at top of file). */
   19017    {
   19018       const UChar* code = guest_instr;
   19019       /* Spot the 16-byte preamble:
   19020 
   19021          ea4f 0cfc  mov.w   ip, ip, ror #3
   19022          ea4f 3c7c  mov.w   ip, ip, ror #13
   19023          ea4f 7c7c  mov.w   ip, ip, ror #29
   19024          ea4f 4cfc  mov.w   ip, ip, ror #19
   19025       */
   19026       UInt word1 = 0x0CFCEA4F;
   19027       UInt word2 = 0x3C7CEA4F;
   19028       UInt word3 = 0x7C7CEA4F;
   19029       UInt word4 = 0x4CFCEA4F;
   19030       if (getUIntLittleEndianly(code+ 0) == word1 &&
   19031           getUIntLittleEndianly(code+ 4) == word2 &&
   19032           getUIntLittleEndianly(code+ 8) == word3 &&
   19033           getUIntLittleEndianly(code+12) == word4) {
   19034          /* Got a "Special" instruction preamble.  Which one is it? */
   19035          // 0x 0A 0A EA 4A
   19036          if (getUIntLittleEndianly(code+16) == 0x0A0AEA4A
   19037                                                /* orr.w r10,r10,r10 */) {
   19038             /* R3 = client_request ( R4 ) */
   19039             DIP("r3 = client_request ( %%r4 )\n");
   19040             llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
   19041             dres.jk_StopHere = Ijk_ClientReq;
   19042             dres.whatNext    = Dis_StopHere;
   19043             goto decode_success;
   19044          }
   19045          else
   19046          // 0x 0B 0B EA 4B
   19047          if (getUIntLittleEndianly(code+16) == 0x0B0BEA4B
   19048                                                /* orr r11,r11,r11 */) {
   19049             /* R3 = guest_NRADDR */
   19050             DIP("r3 = guest_NRADDR\n");
   19051             dres.len = 20;
   19052             llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
   19053             goto decode_success;
   19054          }
   19055          else
   19056          // 0x 0C 0C EA 4C
   19057          if (getUIntLittleEndianly(code+16) == 0x0C0CEA4C
   19058                                                /* orr r12,r12,r12 */) {
   19059             /*  branch-and-link-to-noredir R4 */
   19060             DIP("branch-and-link-to-noredir r4\n");
   19061             llPutIReg(14, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
   19062             llPutIReg(15, getIRegT(4));
   19063             dres.jk_StopHere = Ijk_NoRedir;
   19064             dres.whatNext    = Dis_StopHere;
   19065             goto decode_success;
   19066          }
   19067          else
   19068          // 0x 09 09 EA 49
   19069          if (getUIntLittleEndianly(code+16) == 0x0909EA49
   19070                                                /* orr r9,r9,r9 */) {
   19071             /* IR injection */
   19072             DIP("IR injection\n");
   19073             vex_inject_ir(irsb, Iend_LE);
   19074             // Invalidate the current insn. The reason is that the IRop we're
   19075             // injecting here can change. In which case the translation has to
   19076             // be redone. For ease of handling, we simply invalidate all the
   19077             // time.
   19078             stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_R15_curr_instr_notENC)));
   19079             stmt(IRStmt_Put(OFFB_CMLEN,   mkU32(20)));
   19080             llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
   19081             dres.whatNext    = Dis_StopHere;
   19082             dres.jk_StopHere = Ijk_InvalICache;
   19083             goto decode_success;
   19084          }
   19085          /* We don't know what it is.  Set insn0 so decode_failure
   19086             can print the insn following the Special-insn preamble. */
   19087          insn0 = getUShortLittleEndianly(code+16);
   19088          goto decode_failure;
   19089          /*NOTREACHED*/
   19090       }
   19091 
   19092    }
   19093 
   19094    /* ----------------------------------------------------------- */
   19095 
   19096    /* Main Thumb instruction decoder starts here.  It's a series of
   19097       switches which examine ever longer bit sequences at the MSB of
   19098       the instruction word, first for 16-bit insns, then for 32-bit
   19099       insns. */
   19100 
   19101    /* --- BEGIN ITxxx optimisation analysis --- */
   19102    /* This is a crucial optimisation for the ITState boilerplate that
   19103       follows.  Examine the 9 halfwords preceding this instruction,
   19104       and if we are absolutely sure that none of them constitute an
   19105       'it' instruction, then we can be sure that this instruction is
   19106       not under the control of any 'it' instruction, and so
   19107       guest_ITSTATE must be zero.  So write zero into ITSTATE right
   19108       now, so that iropt can fold out almost all of the resulting
   19109       junk.
   19110 
   19111       If we aren't sure, we can always safely skip this step.  So be a
   19112       bit conservative about it: only poke around in the same page as
   19113       this instruction, lest we get a fault from the previous page
   19114       that would not otherwise have happened.  The saving grace is
   19115       that such skipping is pretty rare -- it only happens,
   19116       statistically, 18/4096ths of the time, so is judged unlikely to
   19117       be a performance problems.
   19118 
   19119       FIXME: do better.  Take into account the number of insns covered
   19120       by any IT insns we find, to rule out cases where an IT clearly
   19121       cannot cover this instruction.  This would improve behaviour for
   19122       branch targets immediately following an IT-guarded group that is
   19123       not of full length.  Eg, (and completely ignoring issues of 16-
   19124       vs 32-bit insn length):
   19125 
   19126              ite cond
   19127              insn1
   19128              insn2
   19129       label: insn3
   19130              insn4
   19131 
   19132       The 'it' only conditionalises insn1 and insn2.  However, the
   19133       current analysis is conservative and considers insn3 and insn4
   19134       also possibly guarded.  Hence if 'label:' is the start of a hot
   19135       loop we will get a big performance hit.
   19136    */
   19137    {
   19138       /* Summary result of this analysis: False == safe but
   19139          suboptimal. */
   19140       vassert(guaranteedUnconditional == False);
   19141 
   19142       UInt pc = guest_R15_curr_instr_notENC;
   19143       vassert(0 == (pc & 1));
   19144 
   19145       UInt pageoff = pc & 0xFFF;
   19146       if (pageoff >= 18) {
   19147          /* It's safe to poke about in the 9 halfwords preceding this
   19148             insn.  So, have a look at them. */
   19149          guaranteedUnconditional = True; /* assume no 'it' insn found,
   19150                                             till we do */
   19151          UShort* hwp = (UShort*)(HWord)pc;
   19152          Int i;
   19153          for (i = -1; i >= -9; i--) {
   19154             /* We're in the same page.  (True, but commented out due
   19155                to expense.) */
   19156             /*
   19157             vassert( ( ((UInt)(&hwp[i])) & 0xFFFFF000 )
   19158                       == ( pc & 0xFFFFF000 ) );
   19159             */
   19160             /* All valid IT instructions must have the form 0xBFxy,
   19161                where x can be anything, but y must be nonzero.  Find
   19162                the number of insns covered by it (1 .. 4) and check to
   19163                see if it can possibly reach up to the instruction in
   19164                question.  Some (x,y) combinations mean UNPREDICTABLE,
   19165                and the table is constructed to be conservative by
   19166                returning 4 for those cases, so the analysis is safe
   19167                even if the code uses unpredictable IT instructions (in
   19168                which case its authors are nuts, but hey.)  */
   19169             UShort hwp_i = hwp[i];
   19170             if (UNLIKELY((hwp_i & 0xFF00) == 0xBF00 && (hwp_i & 0xF) != 0)) {
   19171                /* might be an 'it' insn. */
   19172                /* # guarded insns */
   19173                Int n_guarded = (Int)it_length_table[hwp_i & 0xFF];
   19174                vassert(n_guarded >= 1 && n_guarded <= 4);
   19175                if (n_guarded * 2 /* # guarded HWs, worst case */
   19176                    > (-(i+1)))   /* -(i+1): # remaining HWs after the IT */
   19177                    /* -(i+0) also seems to work, even though I think
   19178                       it's wrong.  I don't understand that. */
   19179                   guaranteedUnconditional = False;
   19180                break;
   19181             }
   19182          }
   19183       }
   19184    }
   19185    /* --- END ITxxx optimisation analysis --- */
   19186 
   19187    /* Generate the guarding condition for this insn, by examining
   19188       ITSTATE.  Assign it to condT.  Also, generate new
   19189       values for ITSTATE ready for stuffing back into the
   19190       guest state, but don't actually do the Put yet, since it will
   19191       need to stuffed back in only after the instruction gets to a
   19192       point where it is sure to complete.  Mostly we let the code at
   19193       decode_success handle this, but in cases where the insn contains
   19194       a side exit, we have to update them before the exit. */
   19195 
   19196    /* If the ITxxx optimisation analysis above could not prove that
   19197       this instruction is guaranteed unconditional, we insert a
   19198       lengthy IR preamble to compute the guarding condition at
   19199       runtime.  If it can prove it (which obviously we hope is the
   19200       normal case) then we insert a minimal preamble, which is
   19201       equivalent to setting guest_ITSTATE to zero and then folding
   19202       that through the full preamble (which completely disappears). */
   19203 
   19204    IRTemp condT              = IRTemp_INVALID;
   19205    IRTemp cond_AND_notInIT_T = IRTemp_INVALID;
   19206 
   19207    IRTemp new_itstate        = IRTemp_INVALID;
   19208    vassert(old_itstate == IRTemp_INVALID);
   19209 
   19210    if (guaranteedUnconditional) {
   19211       /* BEGIN "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
   19212 
   19213       // ITSTATE = 0 :: I32
   19214       IRTemp z32 = newTemp(Ity_I32);
   19215       assign(z32, mkU32(0));
   19216       put_ITSTATE(z32);
   19217 
   19218       // old_itstate = 0 :: I32
   19219       //
   19220       // old_itstate = get_ITSTATE();
   19221       old_itstate = z32; /* 0 :: I32 */
   19222 
   19223       // new_itstate = old_itstate >> 8
   19224       //             = 0 >> 8
   19225       //             = 0 :: I32
   19226       //
   19227       // new_itstate = newTemp(Ity_I32);
   19228       // assign(new_itstate,
   19229       //        binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
   19230       new_itstate = z32;
   19231 
   19232       // ITSTATE = 0 :: I32(again)
   19233       //
   19234       // put_ITSTATE(new_itstate);
   19235 
   19236       // condT1 = calc_cond_dyn( xor(and(old_istate,0xF0), 0xE0) )
   19237       //        = calc_cond_dyn( xor(0,0xE0) )
   19238       //        = calc_cond_dyn ( 0xE0 )
   19239       //        = 1 :: I32
   19240       // Not that this matters, since the computed value is not used:
   19241       // see condT folding below
   19242       //
   19243       // IRTemp condT1 = newTemp(Ity_I32);
   19244       // assign(condT1,
   19245       //        mk_armg_calculate_condition_dyn(
   19246       //           binop(Iop_Xor32,
   19247       //                 binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
   19248       //                 mkU32(0xE0))
   19249       //       )
   19250       // );
   19251 
   19252       // condT = 32to8(and32(old_itstate,0xF0)) == 0  ? 1  : condT1
   19253       //       = 32to8(and32(0,0xF0)) == 0  ? 1  : condT1
   19254       //       = 32to8(0) == 0  ? 1  : condT1
   19255       //       = 0 == 0  ? 1  : condT1
   19256       //       = 1
   19257       //
   19258       // condT = newTemp(Ity_I32);
   19259       // assign(condT, IRExpr_ITE(
   19260       //                  unop(Iop_32to8, binop(Iop_And32,
   19261       //                                        mkexpr(old_itstate),
   19262       //                                        mkU32(0xF0))),
   19263       //                  mkexpr(condT1),
   19264       //                  mkU32(1))
   19265       //       ));
   19266       condT = newTemp(Ity_I32);
   19267       assign(condT, mkU32(1));
   19268 
   19269       // notInITt = xor32(and32(old_itstate, 1), 1)
   19270       //          = xor32(and32(0, 1), 1)
   19271       //          = xor32(0, 1)
   19272       //          = 1 :: I32
   19273       //
   19274       // IRTemp notInITt = newTemp(Ity_I32);
   19275       // assign(notInITt,
   19276       //        binop(Iop_Xor32,
   19277       //              binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
   19278       //              mkU32(1)));
   19279 
   19280       // cond_AND_notInIT_T = and32(notInITt, condT)
   19281       //                    = and32(1, 1)
   19282       //                    = 1
   19283       //
   19284       // cond_AND_notInIT_T = newTemp(Ity_I32);
   19285       // assign(cond_AND_notInIT_T,
   19286       //        binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
   19287       cond_AND_notInIT_T = condT; /* 1 :: I32 */
   19288 
   19289       /* END "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
   19290    } else {
   19291       /* BEGIN { STANDARD PREAMBLE; } */
   19292 
   19293       old_itstate = get_ITSTATE();
   19294 
   19295       new_itstate = newTemp(Ity_I32);
   19296       assign(new_itstate,
   19297              binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
   19298 
   19299       put_ITSTATE(new_itstate);
   19300 
   19301       /* Same strategy as for ARM insns: generate a condition
   19302          temporary at this point (or IRTemp_INVALID, meaning
   19303          unconditional).  We leave it to lower-level instruction
   19304          decoders to decide whether they can generate straight-line
   19305          code, or whether they must generate a side exit before the
   19306          instruction.  condT :: Ity_I32 and is always either zero or
   19307          one. */
   19308       IRTemp condT1 = newTemp(Ity_I32);
   19309       assign(condT1,
   19310              mk_armg_calculate_condition_dyn(
   19311                 binop(Iop_Xor32,
   19312                       binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
   19313                       mkU32(0xE0))
   19314             )
   19315       );
   19316 
   19317       /* This is a bit complex, but needed to make Memcheck understand
   19318          that, if the condition in old_itstate[7:4] denotes AL (that
   19319          is, if this instruction is to be executed unconditionally),
   19320          then condT does not depend on the results of calling the
   19321          helper.
   19322 
   19323          We test explicitly for old_itstate[7:4] == AL ^ 0xE, and in
   19324          that case set condT directly to 1.  Else we use the results
   19325          of the helper.  Since old_itstate is always defined and
   19326          because Memcheck does lazy V-bit propagation through ITE,
   19327          this will cause condT to always be a defined 1 if the
   19328          condition is 'AL'.  From an execution semantics point of view
   19329          this is irrelevant since we're merely duplicating part of the
   19330          behaviour of the helper.  But it makes it clear to Memcheck,
   19331          in this case, that condT does not in fact depend on the
   19332          contents of the condition code thunk.  Without it, we get
   19333          quite a lot of false errors.
   19334 
   19335          So, just to clarify: from a straight semantics point of view,
   19336          we can simply do "assign(condT, mkexpr(condT1))", and the
   19337          simulator still runs fine.  It's just that we get loads of
   19338          false errors from Memcheck. */
   19339       condT = newTemp(Ity_I32);
   19340       assign(condT, IRExpr_ITE(
   19341                        binop(Iop_CmpNE32, binop(Iop_And32,
   19342                                                 mkexpr(old_itstate),
   19343                                                 mkU32(0xF0)),
   19344                                           mkU32(0)),
   19345                        mkexpr(condT1),
   19346                        mkU32(1)
   19347             ));
   19348 
   19349       /* Something we don't have in ARM: generate a 0 or 1 value
   19350          indicating whether or not we are in an IT block (NB: 0 = in
   19351          IT block, 1 = not in IT block).  This is used to gate
   19352          condition code updates in 16-bit Thumb instructions. */
   19353       IRTemp notInITt = newTemp(Ity_I32);
   19354       assign(notInITt,
   19355              binop(Iop_Xor32,
   19356                    binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
   19357                    mkU32(1)));
   19358 
   19359       /* Compute 'condT && notInITt' -- that is, the instruction is
   19360          going to execute, and we're not in an IT block.  This is the
   19361          gating condition for updating condition codes in 16-bit Thumb
   19362          instructions, except for CMP, CMN and TST. */
   19363       cond_AND_notInIT_T = newTemp(Ity_I32);
   19364       assign(cond_AND_notInIT_T,
   19365              binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
   19366       /* END { STANDARD PREAMBLE; } */
   19367    }
   19368 
   19369 
   19370    /* At this point:
   19371       * ITSTATE has been updated
   19372       * condT holds the guarding condition for this instruction (0 or 1),
   19373       * notInITt is 1 if we're in "normal" code, 0 if in an IT block
   19374       * cond_AND_notInIT_T is the AND of the above two.
   19375 
   19376       If the instruction proper can't trap, then there's nothing else
   19377       to do w.r.t. ITSTATE -- just go and and generate IR for the
   19378       insn, taking into account the guarding condition.
   19379 
   19380       If, however, the instruction might trap, then we must back up
   19381       ITSTATE to the old value, and re-update it after the potentially
   19382       trapping IR section.  A trap can happen either via a memory
   19383       reference or because we need to throw SIGILL.
   19384 
   19385       If an instruction has a side exit, we need to be sure that any
   19386       ITSTATE backup is re-updated before the side exit.
   19387    */
   19388 
   19389    /* ----------------------------------------------------------- */
   19390    /* --                                                       -- */
   19391    /* -- Thumb 16-bit integer instructions                     -- */
   19392    /* --                                                       -- */
   19393    /* -- IMPORTANT: references to insn1 or INSN1 are           -- */
   19394    /* --            not allowed in this section                -- */
   19395    /* --                                                       -- */
   19396    /* ----------------------------------------------------------- */
   19397 
   19398    /* 16-bit instructions inside an IT block, apart from CMP, CMN and
   19399       TST, do not set the condition codes.  Hence we must dynamically
   19400       test for this case for every condition code update. */
   19401 
   19402    IROp   anOp   = Iop_INVALID;
   19403    const HChar* anOpNm = NULL;
   19404 
   19405    /* ================ 16-bit 15:6 cases ================ */
   19406 
   19407    switch (INSN0(15,6)) {
   19408 
   19409    case 0x10a:   // CMP
   19410    case 0x10b: { // CMN
   19411       /* ---------------- CMP Rn, Rm ---------------- */
   19412       Bool   isCMN = INSN0(15,6) == 0x10b;
   19413       UInt   rN    = INSN0(2,0);
   19414       UInt   rM    = INSN0(5,3);
   19415       IRTemp argL  = newTemp(Ity_I32);
   19416       IRTemp argR  = newTemp(Ity_I32);
   19417       assign( argL, getIRegT(rN) );
   19418       assign( argR, getIRegT(rM) );
   19419       /* Update flags regardless of whether in an IT block or not. */
   19420       setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
   19421                       argL, argR, condT );
   19422       DIP("%s r%u, r%u\n", isCMN ? "cmn" : "cmp", rN, rM);
   19423       goto decode_success;
   19424    }
   19425 
   19426    case 0x108: {
   19427       /* ---------------- TST Rn, Rm ---------------- */
   19428       UInt   rN   = INSN0(2,0);
   19429       UInt   rM   = INSN0(5,3);
   19430       IRTemp oldC = newTemp(Ity_I32);
   19431       IRTemp oldV = newTemp(Ity_I32);
   19432       IRTemp res  = newTemp(Ity_I32);
   19433       assign( oldC, mk_armg_calculate_flag_c() );
   19434       assign( oldV, mk_armg_calculate_flag_v() );
   19435       assign( res,  binop(Iop_And32, getIRegT(rN), getIRegT(rM)) );
   19436       /* Update flags regardless of whether in an IT block or not. */
   19437       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
   19438       DIP("tst r%u, r%u\n", rN, rM);
   19439       goto decode_success;
   19440    }
   19441 
   19442    case 0x109: {
   19443       /* ---------------- NEGS Rd, Rm ---------------- */
   19444       /* Rd = -Rm */
   19445       UInt   rM   = INSN0(5,3);
   19446       UInt   rD   = INSN0(2,0);
   19447       IRTemp arg  = newTemp(Ity_I32);
   19448       IRTemp zero = newTemp(Ity_I32);
   19449       assign(arg, getIRegT(rM));
   19450       assign(zero, mkU32(0));
   19451       // rD can never be r15
   19452       putIRegT(rD, binop(Iop_Sub32, mkexpr(zero), mkexpr(arg)), condT);
   19453       setFlags_D1_D2( ARMG_CC_OP_SUB, zero, arg, cond_AND_notInIT_T);
   19454       DIP("negs r%u, r%u\n", rD, rM);
   19455       goto decode_success;
   19456    }
   19457 
   19458    case 0x10F: {
   19459       /* ---------------- MVNS Rd, Rm ---------------- */
   19460       /* Rd = ~Rm */
   19461       UInt   rM   = INSN0(5,3);
   19462       UInt   rD   = INSN0(2,0);
   19463       IRTemp oldV = newTemp(Ity_I32);
   19464       IRTemp oldC = newTemp(Ity_I32);
   19465       IRTemp res  = newTemp(Ity_I32);
   19466       assign( oldV, mk_armg_calculate_flag_v() );
   19467       assign( oldC, mk_armg_calculate_flag_c() );
   19468       assign(res, unop(Iop_Not32, getIRegT(rM)));
   19469       // rD can never be r15
   19470       putIRegT(rD, mkexpr(res), condT);
   19471       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   19472                          cond_AND_notInIT_T );
   19473       DIP("mvns r%u, r%u\n", rD, rM);
   19474       goto decode_success;
   19475    }
   19476 
   19477    case 0x10C:
   19478       /* ---------------- ORRS Rd, Rm ---------------- */
   19479       anOp = Iop_Or32; anOpNm = "orr"; goto and_orr_eor_mul;
   19480    case 0x100:
   19481       /* ---------------- ANDS Rd, Rm ---------------- */
   19482       anOp = Iop_And32; anOpNm = "and"; goto and_orr_eor_mul;
   19483    case 0x101:
   19484       /* ---------------- EORS Rd, Rm ---------------- */
   19485       anOp = Iop_Xor32; anOpNm = "eor"; goto and_orr_eor_mul;
   19486    case 0x10d:
   19487       /* ---------------- MULS Rd, Rm ---------------- */
   19488       anOp = Iop_Mul32; anOpNm = "mul"; goto and_orr_eor_mul;
   19489    and_orr_eor_mul: {
   19490       /* Rd = Rd `op` Rm */
   19491       UInt   rM   = INSN0(5,3);
   19492       UInt   rD   = INSN0(2,0);
   19493       IRTemp res  = newTemp(Ity_I32);
   19494       IRTemp oldV = newTemp(Ity_I32);
   19495       IRTemp oldC = newTemp(Ity_I32);
   19496       assign( oldV, mk_armg_calculate_flag_v() );
   19497       assign( oldC, mk_armg_calculate_flag_c() );
   19498       assign( res, binop(anOp, getIRegT(rD), getIRegT(rM) ));
   19499       // not safe to read guest state after here
   19500       // rD can never be r15
   19501       putIRegT(rD, mkexpr(res), condT);
   19502       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   19503                          cond_AND_notInIT_T );
   19504       DIP("%s r%u, r%u\n", anOpNm, rD, rM);
   19505       goto decode_success;
   19506    }
   19507 
   19508    case 0x10E: {
   19509       /* ---------------- BICS Rd, Rm ---------------- */
   19510       /* Rd = Rd & ~Rm */
   19511       UInt   rM   = INSN0(5,3);
   19512       UInt   rD   = INSN0(2,0);
   19513       IRTemp res  = newTemp(Ity_I32);
   19514       IRTemp oldV = newTemp(Ity_I32);
   19515       IRTemp oldC = newTemp(Ity_I32);
   19516       assign( oldV, mk_armg_calculate_flag_v() );
   19517       assign( oldC, mk_armg_calculate_flag_c() );
   19518       assign( res, binop(Iop_And32, getIRegT(rD),
   19519                                     unop(Iop_Not32, getIRegT(rM) )));
   19520       // not safe to read guest state after here
   19521       // rD can never be r15
   19522       putIRegT(rD, mkexpr(res), condT);
   19523       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   19524                          cond_AND_notInIT_T );
   19525       DIP("bics r%u, r%u\n", rD, rM);
   19526       goto decode_success;
   19527    }
   19528 
   19529    case 0x105: {
   19530       /* ---------------- ADCS Rd, Rm ---------------- */
   19531       /* Rd = Rd + Rm + oldC */
   19532       UInt   rM   = INSN0(5,3);
   19533       UInt   rD   = INSN0(2,0);
   19534       IRTemp argL = newTemp(Ity_I32);
   19535       IRTemp argR = newTemp(Ity_I32);
   19536       IRTemp oldC = newTemp(Ity_I32);
   19537       IRTemp res  = newTemp(Ity_I32);
   19538       assign(argL, getIRegT(rD));
   19539       assign(argR, getIRegT(rM));
   19540       assign(oldC, mk_armg_calculate_flag_c());
   19541       assign(res, binop(Iop_Add32,
   19542                         binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
   19543                         mkexpr(oldC)));
   19544       // rD can never be r15
   19545       putIRegT(rD, mkexpr(res), condT);
   19546       setFlags_D1_D2_ND( ARMG_CC_OP_ADC, argL, argR, oldC,
   19547                          cond_AND_notInIT_T );
   19548       DIP("adcs r%u, r%u\n", rD, rM);
   19549       goto decode_success;
   19550    }
   19551 
   19552    case 0x106: {
   19553       /* ---------------- SBCS Rd, Rm ---------------- */
   19554       /* Rd = Rd - Rm - (oldC ^ 1) */
   19555       UInt   rM   = INSN0(5,3);
   19556       UInt   rD   = INSN0(2,0);
   19557       IRTemp argL = newTemp(Ity_I32);
   19558       IRTemp argR = newTemp(Ity_I32);
   19559       IRTemp oldC = newTemp(Ity_I32);
   19560       IRTemp res  = newTemp(Ity_I32);
   19561       assign(argL, getIRegT(rD));
   19562       assign(argR, getIRegT(rM));
   19563       assign(oldC, mk_armg_calculate_flag_c());
   19564       assign(res, binop(Iop_Sub32,
   19565                         binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
   19566                         binop(Iop_Xor32, mkexpr(oldC), mkU32(1))));
   19567       // rD can never be r15
   19568       putIRegT(rD, mkexpr(res), condT);
   19569       setFlags_D1_D2_ND( ARMG_CC_OP_SBB, argL, argR, oldC,
   19570                          cond_AND_notInIT_T );
   19571       DIP("sbcs r%u, r%u\n", rD, rM);
   19572       goto decode_success;
   19573    }
   19574 
   19575    case 0x2CB: {
   19576       /* ---------------- UXTB Rd, Rm ---------------- */
   19577       /* Rd = 8Uto32(Rm) */
   19578       UInt rM = INSN0(5,3);
   19579       UInt rD = INSN0(2,0);
   19580       putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFF)),
   19581                    condT);
   19582       DIP("uxtb r%u, r%u\n", rD, rM);
   19583       goto decode_success;
   19584    }
   19585 
   19586    case 0x2C9: {
   19587       /* ---------------- SXTB Rd, Rm ---------------- */
   19588       /* Rd = 8Sto32(Rm) */
   19589       UInt rM = INSN0(5,3);
   19590       UInt rD = INSN0(2,0);
   19591       putIRegT(rD, binop(Iop_Sar32,
   19592                          binop(Iop_Shl32, getIRegT(rM), mkU8(24)),
   19593                          mkU8(24)),
   19594                    condT);
   19595       DIP("sxtb r%u, r%u\n", rD, rM);
   19596       goto decode_success;
   19597    }
   19598 
   19599    case 0x2CA: {
   19600       /* ---------------- UXTH Rd, Rm ---------------- */
   19601       /* Rd = 16Uto32(Rm) */
   19602       UInt rM = INSN0(5,3);
   19603       UInt rD = INSN0(2,0);
   19604       putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFFFF)),
   19605                    condT);
   19606       DIP("uxth r%u, r%u\n", rD, rM);
   19607       goto decode_success;
   19608    }
   19609 
   19610    case 0x2C8: {
   19611       /* ---------------- SXTH Rd, Rm ---------------- */
   19612       /* Rd = 16Sto32(Rm) */
   19613       UInt rM = INSN0(5,3);
   19614       UInt rD = INSN0(2,0);
   19615       putIRegT(rD, binop(Iop_Sar32,
   19616                          binop(Iop_Shl32, getIRegT(rM), mkU8(16)),
   19617                          mkU8(16)),
   19618                    condT);
   19619       DIP("sxth r%u, r%u\n", rD, rM);
   19620       goto decode_success;
   19621    }
   19622 
   19623    case 0x102:   // LSLS
   19624    case 0x103:   // LSRS
   19625    case 0x104:   // ASRS
   19626    case 0x107: { // RORS
   19627       /* ---------------- LSLS Rs, Rd ---------------- */
   19628       /* ---------------- LSRS Rs, Rd ---------------- */
   19629       /* ---------------- ASRS Rs, Rd ---------------- */
   19630       /* ---------------- RORS Rs, Rd ---------------- */
   19631       /* Rd = Rd `op` Rs, and set flags */
   19632       UInt   rS   = INSN0(5,3);
   19633       UInt   rD   = INSN0(2,0);
   19634       IRTemp oldV = newTemp(Ity_I32);
   19635       IRTemp rDt  = newTemp(Ity_I32);
   19636       IRTemp rSt  = newTemp(Ity_I32);
   19637       IRTemp res  = newTemp(Ity_I32);
   19638       IRTemp resC = newTemp(Ity_I32);
   19639       const HChar* wot  = "???";
   19640       assign(rSt, getIRegT(rS));
   19641       assign(rDt, getIRegT(rD));
   19642       assign(oldV, mk_armg_calculate_flag_v());
   19643       /* Does not appear to be the standard 'how' encoding. */
   19644       switch (INSN0(15,6)) {
   19645          case 0x102:
   19646             compute_result_and_C_after_LSL_by_reg(
   19647                dis_buf, &res, &resC, rDt, rSt, rD, rS
   19648             );
   19649             wot = "lsl";
   19650             break;
   19651          case 0x103:
   19652             compute_result_and_C_after_LSR_by_reg(
   19653                dis_buf, &res, &resC, rDt, rSt, rD, rS
   19654             );
   19655             wot = "lsr";
   19656             break;
   19657          case 0x104:
   19658             compute_result_and_C_after_ASR_by_reg(
   19659                dis_buf, &res, &resC, rDt, rSt, rD, rS
   19660             );
   19661             wot = "asr";
   19662             break;
   19663          case 0x107:
   19664             compute_result_and_C_after_ROR_by_reg(
   19665                dis_buf, &res, &resC, rDt, rSt, rD, rS
   19666             );
   19667             wot = "ror";
   19668             break;
   19669          default:
   19670             /*NOTREACHED*/vassert(0);
   19671       }
   19672       // not safe to read guest state after this point
   19673       putIRegT(rD, mkexpr(res), condT);
   19674       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
   19675                          cond_AND_notInIT_T );
   19676       DIP("%ss r%u, r%u\n", wot, rS, rD);
   19677       goto decode_success;
   19678    }
   19679 
   19680    case 0x2E8:   // REV
   19681    case 0x2E9: { // REV16
   19682       /* ---------------- REV   Rd, Rm ---------------- */
   19683       /* ---------------- REV16 Rd, Rm ---------------- */
   19684       UInt rM = INSN0(5,3);
   19685       UInt rD = INSN0(2,0);
   19686       Bool isREV = INSN0(15,6) == 0x2E8;
   19687       IRTemp arg = newTemp(Ity_I32);
   19688       assign(arg, getIRegT(rM));
   19689       IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
   19690       putIRegT(rD, mkexpr(res), condT);
   19691       DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM);
   19692       goto decode_success;
   19693    }
   19694 
   19695    case 0x2EB: { // REVSH
   19696       /* ---------------- REVSH Rd, Rn ---------------- */
   19697       UInt rM = INSN0(5,3);
   19698       UInt rD = INSN0(2,0);
   19699       IRTemp irt_rM  = newTemp(Ity_I32);
   19700       IRTemp irt_hi  = newTemp(Ity_I32);
   19701       IRTemp irt_low = newTemp(Ity_I32);
   19702       IRTemp irt_res = newTemp(Ity_I32);
   19703       assign(irt_rM, getIRegT(rM));
   19704       assign(irt_hi,
   19705              binop(Iop_Sar32,
   19706                    binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
   19707                    mkU8(16)
   19708              )
   19709       );
   19710       assign(irt_low,
   19711              binop(Iop_And32,
   19712                    binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
   19713                    mkU32(0xFF)
   19714              )
   19715       );
   19716       assign(irt_res,
   19717              binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
   19718       );
   19719       putIRegT(rD, mkexpr(irt_res), condT);
   19720       DIP("revsh r%u, r%u\n", rD, rM);
   19721       goto decode_success;
   19722    }
   19723 
   19724    default:
   19725       break; /* examine the next shortest prefix */
   19726 
   19727    }
   19728 
   19729 
   19730    /* ================ 16-bit 15:7 cases ================ */
   19731 
   19732    switch (INSN0(15,7)) {
   19733 
   19734    case BITS9(1,0,1,1,0,0,0,0,0): {
   19735       /* ------------ ADD SP, #imm7 * 4 ------------ */
   19736       UInt uimm7 = INSN0(6,0);
   19737       putIRegT(13, binop(Iop_Add32, getIRegT(13), mkU32(uimm7 * 4)),
   19738                    condT);
   19739       DIP("add sp, #%u\n", uimm7 * 4);
   19740       goto decode_success;
   19741    }
   19742 
   19743    case BITS9(1,0,1,1,0,0,0,0,1): {
   19744       /* ------------ SUB SP, #imm7 * 4 ------------ */
   19745       UInt uimm7 = INSN0(6,0);
   19746       putIRegT(13, binop(Iop_Sub32, getIRegT(13), mkU32(uimm7 * 4)),
   19747                    condT);
   19748       DIP("sub sp, #%u\n", uimm7 * 4);
   19749       goto decode_success;
   19750    }
   19751 
   19752    case BITS9(0,1,0,0,0,1,1,1,0): {
   19753       /* ---------------- BX rM ---------------- */
   19754       /* Branch to reg, and optionally switch modes.  Reg contains a
   19755          suitably encoded address therefore (w CPSR.T at the bottom).
   19756          Have to special-case r15, as usual. */
   19757       UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
   19758       if (BITS3(0,0,0) == INSN0(2,0)) {
   19759          IRTemp dst = newTemp(Ity_I32);
   19760          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   19761          mk_skip_over_T16_if_cond_is_false(condT);
   19762          condT = IRTemp_INVALID;
   19763          // now uncond
   19764          if (rM <= 14) {
   19765             assign( dst, getIRegT(rM) );
   19766          } else {
   19767             vassert(rM == 15);
   19768             assign( dst, mkU32(guest_R15_curr_instr_notENC + 4) );
   19769          }
   19770          llPutIReg(15, mkexpr(dst));
   19771          dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
   19772          dres.whatNext    = Dis_StopHere;
   19773          DIP("bx r%u (possibly switch to ARM mode)\n", rM);
   19774          goto decode_success;
   19775       }
   19776       break;
   19777    }
   19778 
   19779    /* ---------------- BLX rM ---------------- */
   19780    /* Branch and link to interworking address in rM. */
   19781    case BITS9(0,1,0,0,0,1,1,1,1): {
   19782       if (BITS3(0,0,0) == INSN0(2,0)) {
   19783          UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
   19784          IRTemp dst = newTemp(Ity_I32);
   19785          if (rM <= 14) {
   19786             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   19787             mk_skip_over_T16_if_cond_is_false(condT);
   19788             condT = IRTemp_INVALID;
   19789             // now uncond
   19790             /* We're returning to Thumb code, hence "| 1" */
   19791             assign( dst, getIRegT(rM) );
   19792             putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ),
   19793                           IRTemp_INVALID );
   19794             llPutIReg(15, mkexpr(dst));
   19795             dres.jk_StopHere = Ijk_Call;
   19796             dres.whatNext    = Dis_StopHere;
   19797             DIP("blx r%u (possibly switch to ARM mode)\n", rM);
   19798             goto decode_success;
   19799          }
   19800          /* else unpredictable, fall through */
   19801       }
   19802       break;
   19803    }
   19804 
   19805    default:
   19806       break; /* examine the next shortest prefix */
   19807 
   19808    }
   19809 
   19810 
   19811    /* ================ 16-bit 15:8 cases ================ */
   19812 
   19813    switch (INSN0(15,8)) {
   19814 
   19815    case BITS8(1,1,0,1,1,1,1,1): {
   19816       /* ---------------- SVC ---------------- */
   19817       UInt imm8 = INSN0(7,0);
   19818       if (imm8 == 0) {
   19819          /* A syscall.  We can't do this conditionally, hence: */
   19820          mk_skip_over_T16_if_cond_is_false( condT );
   19821          // FIXME: what if we have to back up and restart this insn?
   19822          // then ITSTATE will be wrong (we'll have it as "used")
   19823          // when it isn't.  Correct is to save ITSTATE in a
   19824          // stash pseudo-reg, and back up from that if we have to
   19825          // restart.
   19826          // uncond after here
   19827          llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ));
   19828          dres.jk_StopHere = Ijk_Sys_syscall;
   19829          dres.whatNext    = Dis_StopHere;
   19830          DIP("svc #0x%08x\n", imm8);
   19831          goto decode_success;
   19832       }
   19833       /* else fall through */
   19834       break;
   19835    }
   19836 
   19837    case BITS8(0,1,0,0,0,1,0,0): {
   19838       /* ---------------- ADD(HI) Rd, Rm ---------------- */
   19839       UInt h1 = INSN0(7,7);
   19840       UInt h2 = INSN0(6,6);
   19841       UInt rM = (h2 << 3) | INSN0(5,3);
   19842       UInt rD = (h1 << 3) | INSN0(2,0);
   19843       //if (h1 == 0 && h2 == 0) { // Original T1 was more restrictive
   19844       if (rD == 15 && rM == 15) {
   19845          // then it's invalid
   19846       } else {
   19847          IRTemp res = newTemp(Ity_I32);
   19848          assign( res, binop(Iop_Add32, getIRegT(rD), getIRegT(rM) ));
   19849          if (rD != 15) {
   19850             putIRegT( rD, mkexpr(res), condT );
   19851          } else {
   19852             /* Only allowed outside or last-in IT block; SIGILL if not so. */
   19853             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   19854             /* jump over insn if not selected */
   19855             mk_skip_over_T16_if_cond_is_false(condT);
   19856             condT = IRTemp_INVALID;
   19857             // now uncond
   19858             /* non-interworking branch */
   19859             llPutIReg(15, binop(Iop_Or32, mkexpr(res), mkU32(1)));
   19860             dres.jk_StopHere = Ijk_Boring;
   19861             dres.whatNext    = Dis_StopHere;
   19862          }
   19863          DIP("add(hi) r%u, r%u\n", rD, rM);
   19864          goto decode_success;
   19865       }
   19866       break;
   19867    }
   19868 
   19869    case BITS8(0,1,0,0,0,1,0,1): {
   19870       /* ---------------- CMP(HI) Rd, Rm ---------------- */
   19871       UInt h1 = INSN0(7,7);
   19872       UInt h2 = INSN0(6,6);
   19873       UInt rM = (h2 << 3) | INSN0(5,3);
   19874       UInt rN = (h1 << 3) | INSN0(2,0);
   19875       if (h1 != 0 || h2 != 0) {
   19876          IRTemp argL  = newTemp(Ity_I32);
   19877          IRTemp argR  = newTemp(Ity_I32);
   19878          assign( argL, getIRegT(rN) );
   19879          assign( argR, getIRegT(rM) );
   19880          /* Update flags regardless of whether in an IT block or not. */
   19881          setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
   19882          DIP("cmphi r%u, r%u\n", rN, rM);
   19883          goto decode_success;
   19884       }
   19885       break;
   19886    }
   19887 
   19888    case BITS8(0,1,0,0,0,1,1,0): {
   19889       /* ---------------- MOV(HI) Rd, Rm ---------------- */
   19890       UInt h1 = INSN0(7,7);
   19891       UInt h2 = INSN0(6,6);
   19892       UInt rM = (h2 << 3) | INSN0(5,3);
   19893       UInt rD = (h1 << 3) | INSN0(2,0);
   19894       /* The old ARM ARM seems to disallow the case where both Rd and
   19895          Rm are "low" registers, but newer versions allow it. */
   19896       if (1 /*h1 != 0 || h2 != 0*/) {
   19897          IRTemp val = newTemp(Ity_I32);
   19898          assign( val, getIRegT(rM) );
   19899          if (rD != 15) {
   19900             putIRegT( rD, mkexpr(val), condT );
   19901          } else {
   19902             /* Only allowed outside or last-in IT block; SIGILL if not so. */
   19903             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   19904             /* jump over insn if not selected */
   19905             mk_skip_over_T16_if_cond_is_false(condT);
   19906             condT = IRTemp_INVALID;
   19907             // now uncond
   19908             /* non-interworking branch */
   19909             llPutIReg(15, binop(Iop_Or32, mkexpr(val), mkU32(1)));
   19910             dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
   19911             dres.whatNext    = Dis_StopHere;
   19912          }
   19913          DIP("mov r%u, r%u\n", rD, rM);
   19914          goto decode_success;
   19915       }
   19916       break;
   19917    }
   19918 
   19919    case BITS8(1,0,1,1,1,1,1,1): {
   19920       /* ---------------- IT (if-then) ---------------- */
   19921       UInt firstcond = INSN0(7,4);
   19922       UInt mask = INSN0(3,0);
   19923       UInt newITSTATE = 0;
   19924       /* This is the ITSTATE represented as described in
   19925          libvex_guest_arm.h.  It is not the ARM ARM representation. */
   19926       HChar c1 = '.';
   19927       HChar c2 = '.';
   19928       HChar c3 = '.';
   19929       Bool valid = compute_ITSTATE( &newITSTATE, &c1, &c2, &c3,
   19930                                     firstcond, mask );
   19931       if (valid && firstcond != 0xF/*NV*/) {
   19932          /* Not allowed in an IT block; SIGILL if so. */
   19933          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   19934 
   19935          IRTemp t = newTemp(Ity_I32);
   19936          assign(t, mkU32(newITSTATE));
   19937          put_ITSTATE(t);
   19938 
   19939          DIP("it%c%c%c %s\n", c1, c2, c3, nCC(firstcond));
   19940          goto decode_success;
   19941       }
   19942       break;
   19943    }
   19944 
   19945    case BITS8(1,0,1,1,0,0,0,1):
   19946    case BITS8(1,0,1,1,0,0,1,1):
   19947    case BITS8(1,0,1,1,1,0,0,1):
   19948    case BITS8(1,0,1,1,1,0,1,1): {
   19949       /* ---------------- CB{N}Z ---------------- */
   19950       UInt rN    = INSN0(2,0);
   19951       UInt bOP   = INSN0(11,11);
   19952       UInt imm32 = (INSN0(9,9) << 6) | (INSN0(7,3) << 1);
   19953       gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   19954       /* It's a conditional branch forward. */
   19955       IRTemp kond = newTemp(Ity_I1);
   19956       assign( kond, binop(bOP ? Iop_CmpNE32 : Iop_CmpEQ32,
   19957                           getIRegT(rN), mkU32(0)) );
   19958 
   19959       vassert(0 == (guest_R15_curr_instr_notENC & 1));
   19960       /* Looks like the nearest insn we can branch to is the one after
   19961          next.  That makes sense, as there's no point in being able to
   19962          encode a conditional branch to the next instruction. */
   19963       UInt dst = (guest_R15_curr_instr_notENC + 4 + imm32) | 1;
   19964       stmt(IRStmt_Exit( mkexpr(kond),
   19965                         Ijk_Boring,
   19966                         IRConst_U32(toUInt(dst)),
   19967                         OFFB_R15T ));
   19968       DIP("cb%s r%u, 0x%x\n", bOP ? "nz" : "z", rN, dst - 1);
   19969       goto decode_success;
   19970    }
   19971 
   19972    default:
   19973       break; /* examine the next shortest prefix */
   19974 
   19975    }
   19976 
   19977 
   19978    /* ================ 16-bit 15:9 cases ================ */
   19979 
   19980    switch (INSN0(15,9)) {
   19981 
   19982    case BITS7(1,0,1,1,0,1,0): {
   19983       /* ---------------- PUSH ---------------- */
   19984       /* This is a bit like STMxx, but way simpler. Complications we
   19985          don't have to deal with:
   19986          * SP being one of the transferred registers
   19987          * direction (increment vs decrement)
   19988          * before-vs-after-ness
   19989       */
   19990       Int  i, nRegs;
   19991       UInt bitR    = INSN0(8,8);
   19992       UInt regList = INSN0(7,0);
   19993       if (bitR) regList |= (1 << 14);
   19994 
   19995       /* At least one register must be transferred, else result is
   19996          UNPREDICTABLE. */
   19997       if (regList != 0) {
   19998          /* Since we can't generate a guaranteed non-trapping IR
   19999             sequence, (1) jump over the insn if it is gated false, and
   20000             (2) back out the ITSTATE update. */
   20001          mk_skip_over_T16_if_cond_is_false(condT);
   20002          condT = IRTemp_INVALID;
   20003          put_ITSTATE(old_itstate);
   20004          // now uncond
   20005 
   20006          nRegs = 0;
   20007          for (i = 0; i < 16; i++) {
   20008             if ((regList & (1 << i)) != 0)
   20009                nRegs++;
   20010          }
   20011          vassert(nRegs >= 1 && nRegs <= 9);
   20012 
   20013          /* Move SP down first of all, so we're "covered".  And don't
   20014             mess with its alignment. */
   20015          IRTemp newSP = newTemp(Ity_I32);
   20016          assign(newSP, binop(Iop_Sub32, getIRegT(13), mkU32(4 * nRegs)));
   20017          putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
   20018 
   20019          /* Generate a transfer base address as a forced-aligned
   20020             version of the final SP value. */
   20021          IRTemp base = newTemp(Ity_I32);
   20022          assign(base, binop(Iop_And32, mkexpr(newSP), mkU32(~3)));
   20023 
   20024          /* Now the transfers */
   20025          nRegs = 0;
   20026          for (i = 0; i < 16; i++) {
   20027             if ((regList & (1 << i)) != 0) {
   20028                storeLE( binop(Iop_Add32, mkexpr(base), mkU32(4 * nRegs)),
   20029                         getIRegT(i) );
   20030                nRegs++;
   20031             }
   20032          }
   20033 
   20034          /* Reinstate the ITSTATE update. */
   20035          put_ITSTATE(new_itstate);
   20036 
   20037          DIP("push {%s0x%04x}\n", bitR ? "lr," : "", regList & 0xFF);
   20038          goto decode_success;
   20039       }
   20040       break;
   20041    }
   20042 
   20043    case BITS7(1,0,1,1,1,1,0): {
   20044       /* ---------------- POP ---------------- */
   20045       Int  i, nRegs;
   20046       UInt bitR    = INSN0(8,8);
   20047       UInt regList = INSN0(7,0);
   20048 
   20049       /* At least one register must be transferred, else result is
   20050          UNPREDICTABLE. */
   20051       if (regList != 0 || bitR) {
   20052          /* Since we can't generate a guaranteed non-trapping IR
   20053             sequence, (1) jump over the insn if it is gated false, and
   20054             (2) back out the ITSTATE update. */
   20055          mk_skip_over_T16_if_cond_is_false(condT);
   20056          condT = IRTemp_INVALID;
   20057          put_ITSTATE(old_itstate);
   20058          // now uncond
   20059 
   20060          nRegs = 0;
   20061          for (i = 0; i < 8; i++) {
   20062             if ((regList & (1 << i)) != 0)
   20063                nRegs++;
   20064          }
   20065          vassert(nRegs >= 0 && nRegs <= 8);
   20066          vassert(bitR == 0 || bitR == 1);
   20067 
   20068          IRTemp oldSP = newTemp(Ity_I32);
   20069          assign(oldSP, getIRegT(13));
   20070 
   20071          /* Generate a transfer base address as a forced-aligned
   20072             version of the original SP value. */
   20073          IRTemp base = newTemp(Ity_I32);
   20074          assign(base, binop(Iop_And32, mkexpr(oldSP), mkU32(~3)));
   20075 
   20076          /* Compute a new value for SP, but don't install it yet, so
   20077             that we're "covered" until all the transfers are done.
   20078             And don't mess with its alignment. */
   20079          IRTemp newSP = newTemp(Ity_I32);
   20080          assign(newSP, binop(Iop_Add32, mkexpr(oldSP),
   20081                                         mkU32(4 * (nRegs + bitR))));
   20082 
   20083          /* Now the transfers, not including PC */
   20084          nRegs = 0;
   20085          for (i = 0; i < 8; i++) {
   20086             if ((regList & (1 << i)) != 0) {
   20087                putIRegT(i, loadLE( Ity_I32,
   20088                                    binop(Iop_Add32, mkexpr(base),
   20089                                                     mkU32(4 * nRegs))),
   20090                            IRTemp_INVALID );
   20091                nRegs++;
   20092             }
   20093          }
   20094 
   20095          IRTemp newPC = IRTemp_INVALID;
   20096          if (bitR) {
   20097             newPC = newTemp(Ity_I32);
   20098             assign( newPC, loadLE( Ity_I32,
   20099                                    binop(Iop_Add32, mkexpr(base),
   20100                                                     mkU32(4 * nRegs))));
   20101          }
   20102 
   20103          /* Now we can safely install the new SP value */
   20104          putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
   20105 
   20106          /* Reinstate the ITSTATE update. */
   20107          put_ITSTATE(new_itstate);
   20108 
   20109          /* now, do we also have to do a branch?  If so, it turns out
   20110             that the new PC value is encoded exactly as we need it to
   20111             be -- with CPSR.T in the bottom bit.  So we can simply use
   20112             it as is, no need to mess with it.  Note, therefore, this
   20113             is an interworking return. */
   20114          if (bitR) {
   20115             llPutIReg(15, mkexpr(newPC));
   20116             dres.jk_StopHere = Ijk_Ret;
   20117             dres.whatNext    = Dis_StopHere;
   20118          }
   20119 
   20120          DIP("pop {%s0x%04x}\n", bitR ? "pc," : "", regList & 0xFF);
   20121          goto decode_success;
   20122       }
   20123       break;
   20124    }
   20125 
   20126    case BITS7(0,0,0,1,1,1,0):   /* ADDS */
   20127    case BITS7(0,0,0,1,1,1,1): { /* SUBS */
   20128       /* ---------------- ADDS Rd, Rn, #uimm3 ---------------- */
   20129       /* ---------------- SUBS Rd, Rn, #uimm3 ---------------- */
   20130       UInt   uimm3 = INSN0(8,6);
   20131       UInt   rN    = INSN0(5,3);
   20132       UInt   rD    = INSN0(2,0);
   20133       UInt   isSub = INSN0(9,9);
   20134       IRTemp argL  = newTemp(Ity_I32);
   20135       IRTemp argR  = newTemp(Ity_I32);
   20136       assign( argL, getIRegT(rN) );
   20137       assign( argR, mkU32(uimm3) );
   20138       putIRegT(rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
   20139                          mkexpr(argL), mkexpr(argR)),
   20140                    condT);
   20141       setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
   20142                       argL, argR, cond_AND_notInIT_T );
   20143       DIP("%s r%u, r%u, #%u\n", isSub ? "subs" : "adds", rD, rN, uimm3);
   20144       goto decode_success;
   20145    }
   20146 
   20147    case BITS7(0,0,0,1,1,0,0):   /* ADDS */
   20148    case BITS7(0,0,0,1,1,0,1): { /* SUBS */
   20149       /* ---------------- ADDS Rd, Rn, Rm ---------------- */
   20150       /* ---------------- SUBS Rd, Rn, Rm ---------------- */
   20151       UInt   rM    = INSN0(8,6);
   20152       UInt   rN    = INSN0(5,3);
   20153       UInt   rD    = INSN0(2,0);
   20154       UInt   isSub = INSN0(9,9);
   20155       IRTemp argL  = newTemp(Ity_I32);
   20156       IRTemp argR  = newTemp(Ity_I32);
   20157       assign( argL, getIRegT(rN) );
   20158       assign( argR, getIRegT(rM) );
   20159       putIRegT( rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
   20160                           mkexpr(argL), mkexpr(argR)),
   20161                     condT );
   20162       setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
   20163                       argL, argR, cond_AND_notInIT_T );
   20164       DIP("%s r%u, r%u, r%u\n", isSub ? "subs" : "adds", rD, rN, rM);
   20165       goto decode_success;
   20166    }
   20167 
   20168    case BITS7(0,1,0,1,0,0,0):   /* STR */
   20169    case BITS7(0,1,0,1,1,0,0): { /* LDR */
   20170       /* ------------- LDR Rd, [Rn, Rm] ------------- */
   20171       /* ------------- STR Rd, [Rn, Rm] ------------- */
   20172       /* LDR/STR Rd, [Rn + Rm] */
   20173       UInt    rD   = INSN0(2,0);
   20174       UInt    rN   = INSN0(5,3);
   20175       UInt    rM   = INSN0(8,6);
   20176       UInt    isLD = INSN0(11,11);
   20177 
   20178       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
   20179       put_ITSTATE(old_itstate); // backout
   20180       if (isLD) {
   20181          IRTemp tD = newTemp(Ity_I32);
   20182          loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
   20183          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   20184       } else {
   20185          storeGuardedLE(ea, getIRegT(rD), condT);
   20186       }
   20187       put_ITSTATE(new_itstate); // restore
   20188 
   20189       DIP("%s r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
   20190       goto decode_success;
   20191    }
   20192 
   20193    case BITS7(0,1,0,1,0,0,1):
   20194    case BITS7(0,1,0,1,1,0,1): {
   20195       /* ------------- LDRH Rd, [Rn, Rm] ------------- */
   20196       /* ------------- STRH Rd, [Rn, Rm] ------------- */
   20197       /* LDRH/STRH Rd, [Rn + Rm] */
   20198       UInt    rD   = INSN0(2,0);
   20199       UInt    rN   = INSN0(5,3);
   20200       UInt    rM   = INSN0(8,6);
   20201       UInt    isLD = INSN0(11,11);
   20202 
   20203       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
   20204       put_ITSTATE(old_itstate); // backout
   20205       if (isLD) {
   20206          IRTemp tD = newTemp(Ity_I32);
   20207          loadGuardedLE(tD, ILGop_16Uto32, ea, llGetIReg(rD), condT);
   20208          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   20209       } else {
   20210          storeGuardedLE( ea, unop(Iop_32to16, getIRegT(rD)), condT );
   20211       }
   20212       put_ITSTATE(new_itstate); // restore
   20213 
   20214       DIP("%sh r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
   20215       goto decode_success;
   20216    }
   20217 
   20218    case BITS7(0,1,0,1,1,1,1): {
   20219       /* ------------- LDRSH Rd, [Rn, Rm] ------------- */
   20220       /* LDRSH Rd, [Rn + Rm] */
   20221       UInt    rD = INSN0(2,0);
   20222       UInt    rN = INSN0(5,3);
   20223       UInt    rM = INSN0(8,6);
   20224 
   20225       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
   20226       put_ITSTATE(old_itstate); // backout
   20227       IRTemp tD = newTemp(Ity_I32);
   20228       loadGuardedLE(tD, ILGop_16Sto32, ea, llGetIReg(rD), condT);
   20229       putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   20230       put_ITSTATE(new_itstate); // restore
   20231 
   20232       DIP("ldrsh r%u, [r%u, r%u]\n", rD, rN, rM);
   20233       goto decode_success;
   20234    }
   20235 
   20236    case BITS7(0,1,0,1,0,1,1): {
   20237       /* ------------- LDRSB Rd, [Rn, Rm] ------------- */
   20238       /* LDRSB Rd, [Rn + Rm] */
   20239       UInt    rD = INSN0(2,0);
   20240       UInt    rN = INSN0(5,3);
   20241       UInt    rM = INSN0(8,6);
   20242 
   20243       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
   20244       put_ITSTATE(old_itstate); // backout
   20245       IRTemp tD = newTemp(Ity_I32);
   20246       loadGuardedLE(tD, ILGop_8Sto32, ea, llGetIReg(rD), condT);
   20247       putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   20248       put_ITSTATE(new_itstate); // restore
   20249 
   20250       DIP("ldrsb r%u, [r%u, r%u]\n", rD, rN, rM);
   20251       goto decode_success;
   20252    }
   20253 
   20254    case BITS7(0,1,0,1,0,1,0):
   20255    case BITS7(0,1,0,1,1,1,0): {
   20256       /* ------------- LDRB Rd, [Rn, Rm] ------------- */
   20257       /* ------------- STRB Rd, [Rn, Rm] ------------- */
   20258       /* LDRB/STRB Rd, [Rn + Rm] */
   20259       UInt    rD   = INSN0(2,0);
   20260       UInt    rN   = INSN0(5,3);
   20261       UInt    rM   = INSN0(8,6);
   20262       UInt    isLD = INSN0(11,11);
   20263 
   20264       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
   20265       put_ITSTATE(old_itstate); // backout
   20266       if (isLD) {
   20267          IRTemp tD = newTemp(Ity_I32);
   20268          loadGuardedLE(tD, ILGop_8Uto32, ea, llGetIReg(rD), condT);
   20269          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   20270       } else {
   20271          storeGuardedLE( ea, unop(Iop_32to8, getIRegT(rD)), condT );
   20272       }
   20273       put_ITSTATE(new_itstate); // restore
   20274 
   20275       DIP("%sb r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
   20276       goto decode_success;
   20277    }
   20278 
   20279    default:
   20280       break; /* examine the next shortest prefix */
   20281 
   20282    }
   20283 
   20284 
   20285    /* ================ 16-bit 15:11 cases ================ */
   20286 
   20287    switch (INSN0(15,11)) {
   20288 
   20289    case BITS5(0,0,1,1,0):
   20290    case BITS5(0,0,1,1,1): {
   20291       /* ---------------- ADDS Rn, #uimm8 ---------------- */
   20292       /* ---------------- SUBS Rn, #uimm8 ---------------- */
   20293       UInt   isSub = INSN0(11,11);
   20294       UInt   rN    = INSN0(10,8);
   20295       UInt   uimm8 = INSN0(7,0);
   20296       IRTemp argL  = newTemp(Ity_I32);
   20297       IRTemp argR  = newTemp(Ity_I32);
   20298       assign( argL, getIRegT(rN) );
   20299       assign( argR, mkU32(uimm8) );
   20300       putIRegT( rN, binop(isSub ? Iop_Sub32 : Iop_Add32,
   20301                           mkexpr(argL), mkexpr(argR)), condT );
   20302       setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
   20303                       argL, argR, cond_AND_notInIT_T );
   20304       DIP("%s r%u, #%u\n", isSub ? "subs" : "adds", rN, uimm8);
   20305       goto decode_success;
   20306    }
   20307 
   20308    case BITS5(1,0,1,0,0): {
   20309       /* ---------------- ADD rD, PC, #imm8 * 4 ---------------- */
   20310       /* a.k.a. ADR */
   20311       /* rD = align4(PC) + imm8 * 4 */
   20312       UInt rD   = INSN0(10,8);
   20313       UInt imm8 = INSN0(7,0);
   20314       putIRegT(rD, binop(Iop_Add32,
   20315                          binop(Iop_And32, getIRegT(15), mkU32(~3U)),
   20316                          mkU32(imm8 * 4)),
   20317                    condT);
   20318       DIP("add r%u, pc, #%u\n", rD, imm8 * 4);
   20319       goto decode_success;
   20320    }
   20321 
   20322    case BITS5(1,0,1,0,1): {
   20323       /* ---------------- ADD rD, SP, #imm8 * 4 ---------------- */
   20324       UInt rD   = INSN0(10,8);
   20325       UInt imm8 = INSN0(7,0);
   20326       putIRegT(rD, binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4)),
   20327                    condT);
   20328       DIP("add r%u, r13, #%u\n", rD, imm8 * 4);
   20329       goto decode_success;
   20330    }
   20331 
   20332    case BITS5(0,0,1,0,1): {
   20333       /* ---------------- CMP Rn, #uimm8 ---------------- */
   20334       UInt   rN    = INSN0(10,8);
   20335       UInt   uimm8 = INSN0(7,0);
   20336       IRTemp argL  = newTemp(Ity_I32);
   20337       IRTemp argR  = newTemp(Ity_I32);
   20338       assign( argL, getIRegT(rN) );
   20339       assign( argR, mkU32(uimm8) );
   20340       /* Update flags regardless of whether in an IT block or not. */
   20341       setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
   20342       DIP("cmp r%u, #%u\n", rN, uimm8);
   20343       goto decode_success;
   20344    }
   20345 
   20346    case BITS5(0,0,1,0,0): {
   20347       /* -------------- (T1) MOVS Rn, #uimm8 -------------- */
   20348       UInt   rD    = INSN0(10,8);
   20349       UInt   uimm8 = INSN0(7,0);
   20350       IRTemp oldV  = newTemp(Ity_I32);
   20351       IRTemp oldC  = newTemp(Ity_I32);
   20352       IRTemp res   = newTemp(Ity_I32);
   20353       assign( oldV, mk_armg_calculate_flag_v() );
   20354       assign( oldC, mk_armg_calculate_flag_c() );
   20355       assign( res, mkU32(uimm8) );
   20356       putIRegT(rD, mkexpr(res), condT);
   20357       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   20358                          cond_AND_notInIT_T );
   20359       DIP("movs r%u, #%u\n", rD, uimm8);
   20360       goto decode_success;
   20361    }
   20362 
   20363    case BITS5(0,1,0,0,1): {
   20364       /* ------------- LDR Rd, [PC, #imm8 * 4] ------------- */
   20365       /* LDR Rd, [align4(PC) + imm8 * 4] */
   20366       UInt   rD   = INSN0(10,8);
   20367       UInt   imm8 = INSN0(7,0);
   20368       IRTemp ea   = newTemp(Ity_I32);
   20369 
   20370       assign(ea, binop(Iop_Add32,
   20371                        binop(Iop_And32, getIRegT(15), mkU32(~3U)),
   20372                        mkU32(imm8 * 4)));
   20373       put_ITSTATE(old_itstate); // backout
   20374       IRTemp tD = newTemp(Ity_I32);
   20375       loadGuardedLE( tD, ILGop_Ident32, mkexpr(ea), llGetIReg(rD), condT );
   20376       putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   20377       put_ITSTATE(new_itstate); // restore
   20378 
   20379       DIP("ldr r%u, [pc, #%u]\n", rD, imm8 * 4);
   20380       goto decode_success;
   20381    }
   20382 
   20383    case BITS5(0,1,1,0,0):   /* STR */
   20384    case BITS5(0,1,1,0,1): { /* LDR */
   20385       /* ------------- LDR Rd, [Rn, #imm5 * 4] ------------- */
   20386       /* ------------- STR Rd, [Rn, #imm5 * 4] ------------- */
   20387       /* LDR/STR Rd, [Rn + imm5 * 4] */
   20388       UInt    rD   = INSN0(2,0);
   20389       UInt    rN   = INSN0(5,3);
   20390       UInt    imm5 = INSN0(10,6);
   20391       UInt    isLD = INSN0(11,11);
   20392 
   20393       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 4));
   20394       put_ITSTATE(old_itstate); // backout
   20395       if (isLD) {
   20396          IRTemp tD = newTemp(Ity_I32);
   20397          loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
   20398          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   20399       } else {
   20400          storeGuardedLE( ea, getIRegT(rD), condT );
   20401       }
   20402       put_ITSTATE(new_itstate); // restore
   20403 
   20404       DIP("%s r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 4);
   20405       goto decode_success;
   20406    }
   20407 
   20408    case BITS5(1,0,0,0,0):   /* STRH */
   20409    case BITS5(1,0,0,0,1): { /* LDRH */
   20410       /* ------------- LDRH Rd, [Rn, #imm5 * 2] ------------- */
   20411       /* ------------- STRH Rd, [Rn, #imm5 * 2] ------------- */
   20412       /* LDRH/STRH Rd, [Rn + imm5 * 2] */
   20413       UInt    rD   = INSN0(2,0);
   20414       UInt    rN   = INSN0(5,3);
   20415       UInt    imm5 = INSN0(10,6);
   20416       UInt    isLD = INSN0(11,11);
   20417 
   20418       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 2));
   20419       put_ITSTATE(old_itstate); // backout
   20420       if (isLD) {
   20421          IRTemp tD = newTemp(Ity_I32);
   20422          loadGuardedLE( tD, ILGop_16Uto32, ea, llGetIReg(rD), condT );
   20423          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   20424       } else {
   20425          storeGuardedLE( ea, unop(Iop_32to16, getIRegT(rD)), condT );
   20426       }
   20427       put_ITSTATE(new_itstate); // restore
   20428 
   20429       DIP("%sh r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 2);
   20430       goto decode_success;
   20431    }
   20432 
   20433    case BITS5(0,1,1,1,0):   /* STRB */
   20434    case BITS5(0,1,1,1,1): { /* LDRB */
   20435       /* ------------- LDRB Rd, [Rn, #imm5] ------------- */
   20436       /* ------------- STRB Rd, [Rn, #imm5] ------------- */
   20437       /* LDRB/STRB Rd, [Rn + imm5] */
   20438       UInt    rD   = INSN0(2,0);
   20439       UInt    rN   = INSN0(5,3);
   20440       UInt    imm5 = INSN0(10,6);
   20441       UInt    isLD = INSN0(11,11);
   20442 
   20443       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5));
   20444       put_ITSTATE(old_itstate); // backout
   20445       if (isLD) {
   20446          IRTemp tD = newTemp(Ity_I32);
   20447          loadGuardedLE( tD, ILGop_8Uto32, ea, llGetIReg(rD), condT );
   20448          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   20449       } else {
   20450          storeGuardedLE( ea, unop(Iop_32to8, getIRegT(rD)), condT );
   20451       }
   20452       put_ITSTATE(new_itstate); // restore
   20453 
   20454       DIP("%sb r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5);
   20455       goto decode_success;
   20456    }
   20457 
   20458    case BITS5(1,0,0,1,0):   /* STR */
   20459    case BITS5(1,0,0,1,1): { /* LDR */
   20460       /* ------------- LDR Rd, [SP, #imm8 * 4] ------------- */
   20461       /* ------------- STR Rd, [SP, #imm8 * 4] ------------- */
   20462       /* LDR/STR Rd, [SP + imm8 * 4] */
   20463       UInt rD    = INSN0(10,8);
   20464       UInt imm8  = INSN0(7,0);
   20465       UInt isLD  = INSN0(11,11);
   20466 
   20467       IRExpr* ea = binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4));
   20468       put_ITSTATE(old_itstate); // backout
   20469       if (isLD) {
   20470          IRTemp tD = newTemp(Ity_I32);
   20471          loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
   20472          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
   20473       } else {
   20474          storeGuardedLE(ea, getIRegT(rD), condT);
   20475       }
   20476       put_ITSTATE(new_itstate); // restore
   20477 
   20478       DIP("%s r%u, [sp, #%u]\n", isLD ? "ldr" : "str", rD, imm8 * 4);
   20479       goto decode_success;
   20480    }
   20481 
   20482    case BITS5(1,1,0,0,1): {
   20483       /* ------------- LDMIA Rn!, {reglist} ------------- */
   20484       Int i, nRegs = 0;
   20485       UInt rN   = INSN0(10,8);
   20486       UInt list = INSN0(7,0);
   20487       /* Empty lists aren't allowed. */
   20488       if (list != 0) {
   20489          mk_skip_over_T16_if_cond_is_false(condT);
   20490          condT = IRTemp_INVALID;
   20491          put_ITSTATE(old_itstate);
   20492          // now uncond
   20493 
   20494          IRTemp oldRn = newTemp(Ity_I32);
   20495          IRTemp base  = newTemp(Ity_I32);
   20496          assign(oldRn, getIRegT(rN));
   20497          assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
   20498          for (i = 0; i < 8; i++) {
   20499             if (0 == (list & (1 << i)))
   20500                continue;
   20501             nRegs++;
   20502             putIRegT(
   20503                i, loadLE(Ity_I32,
   20504                          binop(Iop_Add32, mkexpr(base),
   20505                                           mkU32(nRegs * 4 - 4))),
   20506                IRTemp_INVALID
   20507             );
   20508          }
   20509          /* Only do the writeback for rN if it isn't in the list of
   20510             registers to be transferred. */
   20511          if (0 == (list & (1 << rN))) {
   20512             putIRegT(rN,
   20513                      binop(Iop_Add32, mkexpr(oldRn),
   20514                                       mkU32(nRegs * 4)),
   20515                      IRTemp_INVALID
   20516             );
   20517          }
   20518 
   20519          /* Reinstate the ITSTATE update. */
   20520          put_ITSTATE(new_itstate);
   20521 
   20522          DIP("ldmia r%u!, {0x%04x}\n", rN, list);
   20523          goto decode_success;
   20524       }
   20525       break;
   20526    }
   20527 
   20528    case BITS5(1,1,0,0,0): {
   20529       /* ------------- STMIA Rn!, {reglist} ------------- */
   20530       Int i, nRegs = 0;
   20531       UInt rN   = INSN0(10,8);
   20532       UInt list = INSN0(7,0);
   20533       /* Empty lists aren't allowed.  Also, if rN is in the list then
   20534          it must be the lowest numbered register in the list. */
   20535       Bool valid = list != 0;
   20536       if (valid && 0 != (list & (1 << rN))) {
   20537          for (i = 0; i < rN; i++) {
   20538             if (0 != (list & (1 << i)))
   20539                valid = False;
   20540          }
   20541       }
   20542       if (valid) {
   20543          mk_skip_over_T16_if_cond_is_false(condT);
   20544          condT = IRTemp_INVALID;
   20545          put_ITSTATE(old_itstate);
   20546          // now uncond
   20547 
   20548          IRTemp oldRn = newTemp(Ity_I32);
   20549          IRTemp base = newTemp(Ity_I32);
   20550          assign(oldRn, getIRegT(rN));
   20551          assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
   20552          for (i = 0; i < 8; i++) {
   20553             if (0 == (list & (1 << i)))
   20554                continue;
   20555             nRegs++;
   20556             storeLE( binop(Iop_Add32, mkexpr(base), mkU32(nRegs * 4 - 4)),
   20557                      getIRegT(i) );
   20558          }
   20559          /* Always do the writeback. */
   20560          putIRegT(rN,
   20561                   binop(Iop_Add32, mkexpr(oldRn),
   20562                                    mkU32(nRegs * 4)),
   20563                   IRTemp_INVALID);
   20564 
   20565          /* Reinstate the ITSTATE update. */
   20566          put_ITSTATE(new_itstate);
   20567 
   20568          DIP("stmia r%u!, {0x%04x}\n", rN, list);
   20569          goto decode_success;
   20570       }
   20571       break;
   20572    }
   20573 
   20574    case BITS5(0,0,0,0,0):   /* LSLS */
   20575    case BITS5(0,0,0,0,1):   /* LSRS */
   20576    case BITS5(0,0,0,1,0): { /* ASRS */
   20577       /* ---------------- LSLS Rd, Rm, #imm5 ---------------- */
   20578       /* ---------------- LSRS Rd, Rm, #imm5 ---------------- */
   20579       /* ---------------- ASRS Rd, Rm, #imm5 ---------------- */
   20580       UInt   rD   = INSN0(2,0);
   20581       UInt   rM   = INSN0(5,3);
   20582       UInt   imm5 = INSN0(10,6);
   20583       IRTemp res  = newTemp(Ity_I32);
   20584       IRTemp resC = newTemp(Ity_I32);
   20585       IRTemp rMt  = newTemp(Ity_I32);
   20586       IRTemp oldV = newTemp(Ity_I32);
   20587       const HChar* wot  = "???";
   20588       assign(rMt, getIRegT(rM));
   20589       assign(oldV, mk_armg_calculate_flag_v());
   20590       /* Looks like INSN0(12,11) are the standard 'how' encoding.
   20591          Could compactify if the ROR case later appears. */
   20592       switch (INSN0(15,11)) {
   20593          case BITS5(0,0,0,0,0):
   20594             compute_result_and_C_after_LSL_by_imm5(
   20595                dis_buf, &res, &resC, rMt, imm5, rM
   20596             );
   20597             wot = "lsl";
   20598             break;
   20599          case BITS5(0,0,0,0,1):
   20600             compute_result_and_C_after_LSR_by_imm5(
   20601                dis_buf, &res, &resC, rMt, imm5, rM
   20602             );
   20603             wot = "lsr";
   20604             break;
   20605          case BITS5(0,0,0,1,0):
   20606             compute_result_and_C_after_ASR_by_imm5(
   20607                dis_buf, &res, &resC, rMt, imm5, rM
   20608             );
   20609             wot = "asr";
   20610             break;
   20611          default:
   20612             /*NOTREACHED*/vassert(0);
   20613       }
   20614       // not safe to read guest state after this point
   20615       putIRegT(rD, mkexpr(res), condT);
   20616       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
   20617                          cond_AND_notInIT_T );
   20618       /* ignore buf and roll our own output */
   20619       DIP("%ss r%u, r%u, #%u\n", wot, rD, rM, imm5);
   20620       goto decode_success;
   20621    }
   20622 
   20623    case BITS5(1,1,1,0,0): {
   20624       /* ---------------- B #simm11 ---------------- */
   20625       UInt uimm11 = INSN0(10,0);  uimm11 <<= 21;
   20626       Int  simm11 = (Int)uimm11;  simm11 >>= 20;
   20627       UInt dst    = simm11 + guest_R15_curr_instr_notENC + 4;
   20628       /* Only allowed outside or last-in IT block; SIGILL if not so. */
   20629       gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   20630       // and skip this insn if not selected; being cleverer is too
   20631       // difficult
   20632       mk_skip_over_T16_if_cond_is_false(condT);
   20633       condT = IRTemp_INVALID;
   20634       // now uncond
   20635       llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
   20636       dres.jk_StopHere = Ijk_Boring;
   20637       dres.whatNext    = Dis_StopHere;
   20638       DIP("b 0x%x\n", dst);
   20639       goto decode_success;
   20640    }
   20641 
   20642    default:
   20643       break; /* examine the next shortest prefix */
   20644 
   20645    }
   20646 
   20647 
   20648    /* ================ 16-bit 15:12 cases ================ */
   20649 
   20650    switch (INSN0(15,12)) {
   20651 
   20652    case BITS4(1,1,0,1): {
   20653       /* ---------------- Bcond #simm8 ---------------- */
   20654       UInt cond  = INSN0(11,8);
   20655       UInt uimm8 = INSN0(7,0);  uimm8 <<= 24;
   20656       Int  simm8 = (Int)uimm8;  simm8 >>= 23;
   20657       UInt dst   = simm8 + guest_R15_curr_instr_notENC + 4;
   20658       if (cond != ARMCondAL && cond != ARMCondNV) {
   20659          /* Not allowed in an IT block; SIGILL if so. */
   20660          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   20661 
   20662          IRTemp kondT = newTemp(Ity_I32);
   20663          assign( kondT, mk_armg_calculate_condition(cond) );
   20664          stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
   20665                             Ijk_Boring,
   20666                             IRConst_U32(dst | 1/*CPSR.T*/),
   20667                             OFFB_R15T ));
   20668          llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2)
   20669                               | 1 /*CPSR.T*/ ));
   20670          dres.jk_StopHere = Ijk_Boring;
   20671          dres.whatNext    = Dis_StopHere;
   20672          DIP("b%s 0x%x\n", nCC(cond), dst);
   20673          goto decode_success;
   20674       }
   20675       break;
   20676    }
   20677 
   20678    default:
   20679       break; /* hmm, nothing matched */
   20680 
   20681    }
   20682 
   20683    /* ================ 16-bit misc cases ================ */
   20684 
   20685    switch (INSN0(15,0)) {
   20686       case 0xBF00:
   20687          /* ------ NOP ------ */
   20688          DIP("nop\n");
   20689          goto decode_success;
   20690       case 0xBF10: // YIELD
   20691       case 0xBF20: // WFE
   20692          /* ------ WFE, YIELD ------ */
   20693          /* Both appear to get used as a spin-loop hints.  Do the usual thing,
   20694             which is to continue after yielding. */
   20695          stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
   20696                             Ijk_Yield,
   20697                             IRConst_U32((guest_R15_curr_instr_notENC + 2)
   20698                                         | 1 /*CPSR.T*/),
   20699                             OFFB_R15T ));
   20700          Bool isWFE = INSN0(15,0) == 0xBF20;
   20701          DIP(isWFE ? "wfe\n" : "yield\n");
   20702          goto decode_success;
   20703       case 0xBF40:
   20704          /* ------ SEV ------ */
   20705          /* Treat this as a no-op.  Any matching WFEs won't really
   20706             cause the host CPU to snooze; they just cause V to try to
   20707             run some other thread for a while.  So there's no point in
   20708             really doing anything for SEV. */
   20709          DIP("sev\n");
   20710          goto decode_success;
   20711       default:
   20712          break; /* fall through */
   20713    }
   20714 
   20715    /* ----------------------------------------------------------- */
   20716    /* --                                                       -- */
   20717    /* -- Thumb 32-bit integer instructions                     -- */
   20718    /* --                                                       -- */
   20719    /* ----------------------------------------------------------- */
   20720 
   20721 #  define INSN1(_bMax,_bMin)  SLICE_UInt(((UInt)insn1), (_bMax), (_bMin))
   20722 
   20723    /* second 16 bits of the instruction, if any */
   20724    vassert(insn1 == 0);
   20725    insn1 = getUShortLittleEndianly( guest_instr+2 );
   20726 
   20727    anOp   = Iop_INVALID; /* paranoia */
   20728    anOpNm = NULL;        /* paranoia */
   20729 
   20730    /* Change result defaults to suit 32-bit insns. */
   20731    vassert(dres.whatNext   == Dis_Continue);
   20732    vassert(dres.len        == 2);
   20733    vassert(dres.continueAt == 0);
   20734    dres.len = 4;
   20735 
   20736    /* ---------------- BL/BLX simm26 ---------------- */
   20737    if (BITS5(1,1,1,1,0) == INSN0(15,11) && BITS2(1,1) == INSN1(15,14)) {
   20738       UInt isBL = INSN1(12,12);
   20739       UInt bS   = INSN0(10,10);
   20740       UInt bJ1  = INSN1(13,13);
   20741       UInt bJ2  = INSN1(11,11);
   20742       UInt bI1  = 1 ^ (bJ1 ^ bS);
   20743       UInt bI2  = 1 ^ (bJ2 ^ bS);
   20744       UInt uimm25
   20745          =   (bS          << (1 + 1 + 10 + 11 + 1))
   20746            | (bI1         << (1 + 10 + 11 + 1))
   20747            | (bI2         << (10 + 11 + 1))
   20748            | (INSN0(9,0)  << (11 + 1))
   20749            | (INSN1(10,0) << 1);
   20750       uimm25 <<= 7;
   20751       Int simm25 = (Int)uimm25;
   20752       simm25 >>= 7;
   20753 
   20754       vassert(0 == (guest_R15_curr_instr_notENC & 1));
   20755       UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
   20756 
   20757       /* One further validity case to check: in the case of BLX
   20758          (not-BL), that insn1[0] must be zero. */
   20759       Bool valid = True;
   20760       if (isBL == 0 && INSN1(0,0) == 1) valid = False;
   20761       if (valid) {
   20762          /* Only allowed outside or last-in IT block; SIGILL if not so. */
   20763          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   20764          // and skip this insn if not selected; being cleverer is too
   20765          // difficult
   20766          mk_skip_over_T32_if_cond_is_false(condT);
   20767          condT = IRTemp_INVALID;
   20768          // now uncond
   20769 
   20770          /* We're returning to Thumb code, hence "| 1" */
   20771          putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 4) | 1 ),
   20772                    IRTemp_INVALID);
   20773          if (isBL) {
   20774             /* BL: unconditional T -> T call */
   20775             /* we're calling Thumb code, hence "| 1" */
   20776             llPutIReg(15, mkU32( dst | 1 ));
   20777             DIP("bl 0x%x (stay in Thumb mode)\n", dst);
   20778          } else {
   20779             /* BLX: unconditional T -> A call */
   20780             /* we're calling ARM code, hence "& 3" to align to a
   20781                valid ARM insn address */
   20782             llPutIReg(15, mkU32( dst & ~3 ));
   20783             DIP("blx 0x%x (switch to ARM mode)\n", dst & ~3);
   20784          }
   20785          dres.whatNext    = Dis_StopHere;
   20786          dres.jk_StopHere = Ijk_Call;
   20787          goto decode_success;
   20788       }
   20789    }
   20790 
   20791    /* ---------------- {LD,ST}M{IA,DB} ---------------- */
   20792    if (0x3a2 == INSN0(15,6) // {LD,ST}MIA
   20793        || 0x3a4 == INSN0(15,6)) { // {LD,ST}MDB
   20794       UInt bW      = INSN0(5,5); /* writeback Rn ? */
   20795       UInt bL      = INSN0(4,4);
   20796       UInt rN      = INSN0(3,0);
   20797       UInt bP      = INSN1(15,15); /* reglist entry for r15 */
   20798       UInt bM      = INSN1(14,14); /* reglist entry for r14 */
   20799       UInt rLmost  = INSN1(12,0);  /* reglist entry for r0 .. 12 */
   20800       UInt rL13    = INSN1(13,13); /* must be zero */
   20801       UInt regList = 0;
   20802       Bool valid   = True;
   20803 
   20804       UInt bINC    = 1;
   20805       UInt bBEFORE = 0;
   20806       if (INSN0(15,6) == 0x3a4) {
   20807          bINC    = 0;
   20808          bBEFORE = 1;
   20809       }
   20810 
   20811       /* detect statically invalid cases, and construct the final
   20812          reglist */
   20813       if (rL13 == 1)
   20814          valid = False;
   20815 
   20816       if (bL == 1) {
   20817          regList = (bP << 15) | (bM << 14) | rLmost;
   20818          if (rN == 15)                       valid = False;
   20819          if (popcount32(regList) < 2)        valid = False;
   20820          if (bP == 1 && bM == 1)             valid = False;
   20821          if (bW == 1 && (regList & (1<<rN))) valid = False;
   20822       } else {
   20823          regList = (bM << 14) | rLmost;
   20824          if (bP == 1)                        valid = False;
   20825          if (rN == 15)                       valid = False;
   20826          if (popcount32(regList) < 2)        valid = False;
   20827          if (bW == 1 && (regList & (1<<rN))) valid = False;
   20828       }
   20829 
   20830       if (valid) {
   20831          if (bL == 1 && bP == 1) {
   20832             // We'll be writing the PC.  Hence:
   20833             /* Only allowed outside or last-in IT block; SIGILL if not so. */
   20834             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   20835          }
   20836 
   20837          /* Go uncond: */
   20838          mk_skip_over_T32_if_cond_is_false(condT);
   20839          condT = IRTemp_INVALID;
   20840          // now uncond
   20841 
   20842          /* Generate the IR.  This might generate a write to R15. */
   20843          mk_ldm_stm(False/*!arm*/, rN, bINC, bBEFORE, bW, bL, regList);
   20844 
   20845          if (bL == 1 && (regList & (1<<15))) {
   20846             // If we wrote to R15, we have an interworking return to
   20847             // deal with.
   20848             llPutIReg(15, llGetIReg(15));
   20849             dres.jk_StopHere = Ijk_Ret;
   20850             dres.whatNext    = Dis_StopHere;
   20851          }
   20852 
   20853          DIP("%sm%c%c r%u%s, {0x%04x}\n",
   20854               bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
   20855               rN, bW ? "!" : "", regList);
   20856 
   20857          goto decode_success;
   20858       }
   20859    }
   20860 
   20861    /* -------------- (T3) ADD{S}.W Rd, Rn, #constT -------------- */
   20862    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   20863        && INSN0(9,5) == BITS5(0,1,0,0,0)
   20864        && INSN1(15,15) == 0) {
   20865       UInt bS = INSN0(4,4);
   20866       UInt rN = INSN0(3,0);
   20867       UInt rD = INSN1(11,8);
   20868       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
   20869       /* but allow "add.w reg, sp, #constT" for reg != PC */
   20870       if (!valid && rD <= 14 && rN == 13)
   20871          valid = True;
   20872       if (valid) {
   20873          IRTemp argL  = newTemp(Ity_I32);
   20874          IRTemp argR  = newTemp(Ity_I32);
   20875          IRTemp res   = newTemp(Ity_I32);
   20876          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
   20877          assign(argL, getIRegT(rN));
   20878          assign(argR, mkU32(imm32));
   20879          assign(res,  binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
   20880          putIRegT(rD, mkexpr(res), condT);
   20881          if (bS == 1)
   20882             setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
   20883          DIP("add%s.w r%u, r%u, #%u\n",
   20884              bS == 1 ? "s" : "", rD, rN, imm32);
   20885          goto decode_success;
   20886       }
   20887    }
   20888 
   20889    /* ---------------- (T4) ADDW Rd, Rn, #uimm12 -------------- */
   20890    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   20891        && INSN0(9,4) == BITS6(1,0,0,0,0,0)
   20892        && INSN1(15,15) == 0) {
   20893       UInt rN = INSN0(3,0);
   20894       UInt rD = INSN1(11,8);
   20895       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
   20896       /* but allow "addw reg, sp, #uimm12" for reg != PC */
   20897       if (!valid && rD <= 14 && rN == 13)
   20898          valid = True;
   20899       if (valid) {
   20900          IRTemp argL = newTemp(Ity_I32);
   20901          IRTemp argR = newTemp(Ity_I32);
   20902          IRTemp res  = newTemp(Ity_I32);
   20903          UInt imm12  = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
   20904          assign(argL, getIRegT(rN));
   20905          assign(argR, mkU32(imm12));
   20906          assign(res,  binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
   20907          putIRegT(rD, mkexpr(res), condT);
   20908          DIP("addw r%u, r%u, #%u\n", rD, rN, imm12);
   20909          goto decode_success;
   20910       }
   20911    }
   20912 
   20913    /* ---------------- (T2) CMP.W Rn, #constT ---------------- */
   20914    /* ---------------- (T2) CMN.W Rn, #constT ---------------- */
   20915    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   20916        && (   INSN0(9,4) == BITS6(0,1,1,0,1,1)  // CMP
   20917            || INSN0(9,4) == BITS6(0,1,0,0,0,1)) // CMN
   20918        && INSN1(15,15) == 0
   20919        && INSN1(11,8) == BITS4(1,1,1,1)) {
   20920       UInt rN = INSN0(3,0);
   20921       if (rN != 15) {
   20922          IRTemp argL  = newTemp(Ity_I32);
   20923          IRTemp argR  = newTemp(Ity_I32);
   20924          Bool   isCMN = INSN0(9,4) == BITS6(0,1,0,0,0,1);
   20925          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
   20926          assign(argL, getIRegT(rN));
   20927          assign(argR, mkU32(imm32));
   20928          setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
   20929                          argL, argR, condT );
   20930          DIP("%s.w r%u, #%u\n", isCMN ? "cmn" : "cmp", rN, imm32);
   20931          goto decode_success;
   20932       }
   20933    }
   20934 
   20935    /* -------------- (T1) TST.W Rn, #constT -------------- */
   20936    /* -------------- (T1) TEQ.W Rn, #constT -------------- */
   20937    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   20938        && (   INSN0(9,4) == BITS6(0,0,0,0,0,1)  // TST
   20939            || INSN0(9,4) == BITS6(0,0,1,0,0,1)) // TEQ
   20940        && INSN1(15,15) == 0
   20941        && INSN1(11,8) == BITS4(1,1,1,1)) {
   20942       UInt rN = INSN0(3,0);
   20943       if (!isBadRegT(rN)) { // yes, really, it's inconsistent with CMP.W
   20944          Bool  isTST  = INSN0(9,4) == BITS6(0,0,0,0,0,1);
   20945          IRTemp argL  = newTemp(Ity_I32);
   20946          IRTemp argR  = newTemp(Ity_I32);
   20947          IRTemp res   = newTemp(Ity_I32);
   20948          IRTemp oldV  = newTemp(Ity_I32);
   20949          IRTemp oldC  = newTemp(Ity_I32);
   20950          Bool   updC  = False;
   20951          UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
   20952          assign(argL, getIRegT(rN));
   20953          assign(argR, mkU32(imm32));
   20954          assign(res,  binop(isTST ? Iop_And32 : Iop_Xor32,
   20955                             mkexpr(argL), mkexpr(argR)));
   20956          assign( oldV, mk_armg_calculate_flag_v() );
   20957          assign( oldC, updC
   20958                        ? mkU32((imm32 >> 31) & 1)
   20959                        : mk_armg_calculate_flag_c() );
   20960          setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
   20961          DIP("%s.w r%u, #%u\n", isTST ? "tst" : "teq", rN, imm32);
   20962          goto decode_success;
   20963       }
   20964    }
   20965 
   20966    /* -------------- (T3) SUB{S}.W Rd, Rn, #constT -------------- */
   20967    /* -------------- (T3) RSB{S}.W Rd, Rn, #constT -------------- */
   20968    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   20969        && (INSN0(9,5) == BITS5(0,1,1,0,1) // SUB
   20970            || INSN0(9,5) == BITS5(0,1,1,1,0)) // RSB
   20971        && INSN1(15,15) == 0) {
   20972       Bool isRSB = INSN0(9,5) == BITS5(0,1,1,1,0);
   20973       UInt bS    = INSN0(4,4);
   20974       UInt rN    = INSN0(3,0);
   20975       UInt rD    = INSN1(11,8);
   20976       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
   20977       /* but allow "sub{s}.w reg, sp, #constT
   20978          this is (T2) of "SUB (SP minus immediate)" */
   20979       if (!valid && !isRSB && rN == 13 && rD != 15)
   20980          valid = True;
   20981       if (valid) {
   20982          IRTemp argL  = newTemp(Ity_I32);
   20983          IRTemp argR  = newTemp(Ity_I32);
   20984          IRTemp res   = newTemp(Ity_I32);
   20985          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
   20986          assign(argL, getIRegT(rN));
   20987          assign(argR, mkU32(imm32));
   20988          assign(res,  isRSB
   20989                       ? binop(Iop_Sub32, mkexpr(argR), mkexpr(argL))
   20990                       : binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
   20991          putIRegT(rD, mkexpr(res), condT);
   20992          if (bS == 1) {
   20993             if (isRSB)
   20994                setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
   20995             else
   20996                setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
   20997          }
   20998          DIP("%s%s.w r%u, r%u, #%u\n",
   20999              isRSB ? "rsb" : "sub", bS == 1 ? "s" : "", rD, rN, imm32);
   21000          goto decode_success;
   21001       }
   21002    }
   21003 
   21004    /* -------------- (T4) SUBW Rd, Rn, #uimm12 ------------------- */
   21005    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   21006        && INSN0(9,4) == BITS6(1,0,1,0,1,0)
   21007        && INSN1(15,15) == 0) {
   21008       UInt rN = INSN0(3,0);
   21009       UInt rD = INSN1(11,8);
   21010       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
   21011       /* but allow "subw sp, sp, #uimm12" */
   21012       if (!valid && rD == 13 && rN == 13)
   21013          valid = True;
   21014       if (valid) {
   21015          IRTemp argL  = newTemp(Ity_I32);
   21016          IRTemp argR  = newTemp(Ity_I32);
   21017          IRTemp res   = newTemp(Ity_I32);
   21018          UInt imm12   = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
   21019          assign(argL, getIRegT(rN));
   21020          assign(argR, mkU32(imm12));
   21021          assign(res,  binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
   21022          putIRegT(rD, mkexpr(res), condT);
   21023          DIP("subw r%u, r%u, #%u\n", rD, rN, imm12);
   21024          goto decode_success;
   21025       }
   21026    }
   21027 
   21028    /* -------------- (T1) ADC{S}.W Rd, Rn, #constT -------------- */
   21029    /* -------------- (T1) SBC{S}.W Rd, Rn, #constT -------------- */
   21030    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   21031        && (   INSN0(9,5) == BITS5(0,1,0,1,0)  // ADC
   21032            || INSN0(9,5) == BITS5(0,1,0,1,1)) // SBC
   21033        && INSN1(15,15) == 0) {
   21034       /* ADC:  Rd = Rn + constT + oldC */
   21035       /* SBC:  Rd = Rn - constT - (oldC ^ 1) */
   21036       UInt bS    = INSN0(4,4);
   21037       UInt rN    = INSN0(3,0);
   21038       UInt rD    = INSN1(11,8);
   21039       if (!isBadRegT(rN) && !isBadRegT(rD)) {
   21040          IRTemp argL  = newTemp(Ity_I32);
   21041          IRTemp argR  = newTemp(Ity_I32);
   21042          IRTemp res   = newTemp(Ity_I32);
   21043          IRTemp oldC  = newTemp(Ity_I32);
   21044          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
   21045          assign(argL, getIRegT(rN));
   21046          assign(argR, mkU32(imm32));
   21047          assign(oldC, mk_armg_calculate_flag_c() );
   21048          const HChar* nm  = "???";
   21049          switch (INSN0(9,5)) {
   21050             case BITS5(0,1,0,1,0): // ADC
   21051                nm = "adc";
   21052                assign(res,
   21053                       binop(Iop_Add32,
   21054                             binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
   21055                             mkexpr(oldC) ));
   21056                putIRegT(rD, mkexpr(res), condT);
   21057                if (bS)
   21058                   setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
   21059                                      argL, argR, oldC, condT );
   21060                break;
   21061             case BITS5(0,1,0,1,1): // SBC
   21062                nm = "sbc";
   21063                assign(res,
   21064                       binop(Iop_Sub32,
   21065                             binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
   21066                             binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
   21067                putIRegT(rD, mkexpr(res), condT);
   21068                if (bS)
   21069                   setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
   21070                                      argL, argR, oldC, condT );
   21071                break;
   21072             default:
   21073               vassert(0);
   21074          }
   21075          DIP("%s%s.w r%u, r%u, #%u\n",
   21076              nm, bS == 1 ? "s" : "", rD, rN, imm32);
   21077          goto decode_success;
   21078       }
   21079    }
   21080 
   21081    /* -------------- (T1) ORR{S}.W Rd, Rn, #constT -------------- */
   21082    /* -------------- (T1) AND{S}.W Rd, Rn, #constT -------------- */
   21083    /* -------------- (T1) BIC{S}.W Rd, Rn, #constT -------------- */
   21084    /* -------------- (T1) EOR{S}.W Rd, Rn, #constT -------------- */
   21085    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   21086        && (   INSN0(9,5) == BITS5(0,0,0,1,0)  // ORR
   21087            || INSN0(9,5) == BITS5(0,0,0,0,0)  // AND
   21088            || INSN0(9,5) == BITS5(0,0,0,0,1)  // BIC
   21089            || INSN0(9,5) == BITS5(0,0,1,0,0)  // EOR
   21090            || INSN0(9,5) == BITS5(0,0,0,1,1)) // ORN
   21091        && INSN1(15,15) == 0) {
   21092       UInt bS = INSN0(4,4);
   21093       UInt rN = INSN0(3,0);
   21094       UInt rD = INSN1(11,8);
   21095       if (!isBadRegT(rN) && !isBadRegT(rD)) {
   21096          Bool   notArgR = False;
   21097          IROp   op      = Iop_INVALID;
   21098          const HChar* nm = "???";
   21099          switch (INSN0(9,5)) {
   21100             case BITS5(0,0,0,1,0): op = Iop_Or32;  nm = "orr"; break;
   21101             case BITS5(0,0,0,0,0): op = Iop_And32; nm = "and"; break;
   21102             case BITS5(0,0,0,0,1): op = Iop_And32; nm = "bic";
   21103                                    notArgR = True; break;
   21104             case BITS5(0,0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
   21105             case BITS5(0,0,0,1,1): op = Iop_Or32;  nm = "orn";
   21106                                    notArgR = True; break;
   21107             default: vassert(0);
   21108          }
   21109          IRTemp argL  = newTemp(Ity_I32);
   21110          IRTemp argR  = newTemp(Ity_I32);
   21111          IRTemp res   = newTemp(Ity_I32);
   21112          Bool   updC  = False;
   21113          UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
   21114          assign(argL, getIRegT(rN));
   21115          assign(argR, mkU32(notArgR ? ~imm32 : imm32));
   21116          assign(res,  binop(op, mkexpr(argL), mkexpr(argR)));
   21117          putIRegT(rD, mkexpr(res), condT);
   21118          if (bS) {
   21119             IRTemp oldV = newTemp(Ity_I32);
   21120             IRTemp oldC = newTemp(Ity_I32);
   21121             assign( oldV, mk_armg_calculate_flag_v() );
   21122             assign( oldC, updC
   21123                           ? mkU32((imm32 >> 31) & 1)
   21124                           : mk_armg_calculate_flag_c() );
   21125             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   21126                                condT );
   21127          }
   21128          DIP("%s%s.w r%u, r%u, #%u\n",
   21129              nm, bS == 1 ? "s" : "", rD, rN, imm32);
   21130          goto decode_success;
   21131       }
   21132    }
   21133 
   21134    /* ---------- (T3) ADD{S}.W Rd, Rn, Rm, {shift} ---------- */
   21135    /* ---------- (T3) SUB{S}.W Rd, Rn, Rm, {shift} ---------- */
   21136    /* ---------- (T3) RSB{S}.W Rd, Rn, Rm, {shift} ---------- */
   21137    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
   21138        && (   INSN0(8,5) == BITS4(1,0,0,0)  // add subopc
   21139            || INSN0(8,5) == BITS4(1,1,0,1)  // sub subopc
   21140            || INSN0(8,5) == BITS4(1,1,1,0)) // rsb subopc
   21141        && INSN1(15,15) == 0) {
   21142       UInt rN   = INSN0(3,0);
   21143       UInt rD   = INSN1(11,8);
   21144       UInt rM   = INSN1(3,0);
   21145       UInt bS   = INSN0(4,4);
   21146       UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
   21147       UInt how  = INSN1(5,4);
   21148 
   21149       Bool valid = !isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM);
   21150       /* but allow "add.w reg, sp, reg, lsl #N for N=0..31
   21151          (T3) "ADD (SP plus register) */
   21152       if (!valid && INSN0(8,5) == BITS4(1,0,0,0) // add
   21153           && rD != 15 && rN == 13 && imm5 <= 31 && how == 0) {
   21154          valid = True;
   21155       }
   21156       /* also allow "sub.w reg, sp, reg   lsl #N for N=0 .. 5
   21157          (T1) "SUB (SP minus register) */
   21158       if (!valid && INSN0(8,5) == BITS4(1,1,0,1) // sub
   21159           && rD != 15 && rN == 13 && imm5 <= 5 && how == 0) {
   21160          valid = True;
   21161       }
   21162       if (valid) {
   21163          Bool   swap = False;
   21164          IROp   op   = Iop_INVALID;
   21165          const HChar* nm = "???";
   21166          switch (INSN0(8,5)) {
   21167             case BITS4(1,0,0,0): op = Iop_Add32; nm = "add"; break;
   21168             case BITS4(1,1,0,1): op = Iop_Sub32; nm = "sub"; break;
   21169             case BITS4(1,1,1,0): op = Iop_Sub32; nm = "rsb";
   21170                                  swap = True; break;
   21171             default: vassert(0);
   21172          }
   21173 
   21174          IRTemp argL = newTemp(Ity_I32);
   21175          assign(argL, getIRegT(rN));
   21176 
   21177          IRTemp rMt = newTemp(Ity_I32);
   21178          assign(rMt, getIRegT(rM));
   21179 
   21180          IRTemp argR = newTemp(Ity_I32);
   21181          compute_result_and_C_after_shift_by_imm5(
   21182             dis_buf, &argR, NULL, rMt, how, imm5, rM
   21183          );
   21184 
   21185          IRTemp res = newTemp(Ity_I32);
   21186          assign(res, swap
   21187                      ? binop(op, mkexpr(argR), mkexpr(argL))
   21188                      : binop(op, mkexpr(argL), mkexpr(argR)));
   21189 
   21190          putIRegT(rD, mkexpr(res), condT);
   21191          if (bS) {
   21192             switch (op) {
   21193                case Iop_Add32:
   21194                   setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
   21195                   break;
   21196                case Iop_Sub32:
   21197                   if (swap)
   21198                      setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
   21199                   else
   21200                      setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
   21201                   break;
   21202                default:
   21203                   vassert(0);
   21204             }
   21205          }
   21206 
   21207          DIP("%s%s.w r%u, r%u, %s\n",
   21208              nm, bS ? "s" : "", rD, rN, dis_buf);
   21209          goto decode_success;
   21210       }
   21211    }
   21212 
   21213    /* ---------- (T3) ADC{S}.W Rd, Rn, Rm, {shift} ---------- */
   21214    /* ---------- (T2) SBC{S}.W Rd, Rn, Rm, {shift} ---------- */
   21215    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
   21216        && (   INSN0(8,5) == BITS4(1,0,1,0)   // adc subopc
   21217            || INSN0(8,5) == BITS4(1,0,1,1))  // sbc subopc
   21218        && INSN1(15,15) == 0) {
   21219       /* ADC:  Rd = Rn + shifter_operand + oldC */
   21220       /* SBC:  Rd = Rn - shifter_operand - (oldC ^ 1) */
   21221       UInt rN = INSN0(3,0);
   21222       UInt rD = INSN1(11,8);
   21223       UInt rM = INSN1(3,0);
   21224       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   21225          UInt bS   = INSN0(4,4);
   21226          UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
   21227          UInt how  = INSN1(5,4);
   21228 
   21229          IRTemp argL = newTemp(Ity_I32);
   21230          assign(argL, getIRegT(rN));
   21231 
   21232          IRTemp rMt = newTemp(Ity_I32);
   21233          assign(rMt, getIRegT(rM));
   21234 
   21235          IRTemp oldC = newTemp(Ity_I32);
   21236          assign(oldC, mk_armg_calculate_flag_c());
   21237 
   21238          IRTemp argR = newTemp(Ity_I32);
   21239          compute_result_and_C_after_shift_by_imm5(
   21240             dis_buf, &argR, NULL, rMt, how, imm5, rM
   21241          );
   21242 
   21243          const HChar* nm  = "???";
   21244          IRTemp res = newTemp(Ity_I32);
   21245          switch (INSN0(8,5)) {
   21246             case BITS4(1,0,1,0): // ADC
   21247                nm = "adc";
   21248                assign(res,
   21249                       binop(Iop_Add32,
   21250                             binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
   21251                             mkexpr(oldC) ));
   21252                putIRegT(rD, mkexpr(res), condT);
   21253                if (bS)
   21254                   setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
   21255                                      argL, argR, oldC, condT );
   21256                break;
   21257             case BITS4(1,0,1,1): // SBC
   21258                nm = "sbc";
   21259                assign(res,
   21260                       binop(Iop_Sub32,
   21261                             binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
   21262                             binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
   21263                putIRegT(rD, mkexpr(res), condT);
   21264                if (bS)
   21265                   setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
   21266                                      argL, argR, oldC, condT );
   21267                break;
   21268             default:
   21269                vassert(0);
   21270          }
   21271 
   21272          DIP("%s%s.w r%u, r%u, %s\n",
   21273              nm, bS ? "s" : "", rD, rN, dis_buf);
   21274          goto decode_success;
   21275       }
   21276    }
   21277 
   21278    /* ---------- (T3) AND{S}.W Rd, Rn, Rm, {shift} ---------- */
   21279    /* ---------- (T3) ORR{S}.W Rd, Rn, Rm, {shift} ---------- */
   21280    /* ---------- (T3) EOR{S}.W Rd, Rn, Rm, {shift} ---------- */
   21281    /* ---------- (T3) BIC{S}.W Rd, Rn, Rm, {shift} ---------- */
   21282    /* ---------- (T1) ORN{S}.W Rd, Rn, Rm, {shift} ---------- */
   21283    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
   21284        && (   INSN0(8,5) == BITS4(0,0,0,0)  // and subopc
   21285            || INSN0(8,5) == BITS4(0,0,1,0)  // orr subopc
   21286            || INSN0(8,5) == BITS4(0,1,0,0)  // eor subopc
   21287            || INSN0(8,5) == BITS4(0,0,0,1)  // bic subopc
   21288            || INSN0(8,5) == BITS4(0,0,1,1)) // orn subopc
   21289        && INSN1(15,15) == 0) {
   21290       UInt rN = INSN0(3,0);
   21291       UInt rD = INSN1(11,8);
   21292       UInt rM = INSN1(3,0);
   21293       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   21294          Bool notArgR = False;
   21295          IROp op      = Iop_INVALID;
   21296          const HChar* nm  = "???";
   21297          switch (INSN0(8,5)) {
   21298             case BITS4(0,0,0,0): op = Iop_And32; nm = "and"; break;
   21299             case BITS4(0,0,1,0): op = Iop_Or32;  nm = "orr"; break;
   21300             case BITS4(0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
   21301             case BITS4(0,0,0,1): op = Iop_And32; nm = "bic";
   21302                                  notArgR = True; break;
   21303             case BITS4(0,0,1,1): op = Iop_Or32; nm = "orn";
   21304                                  notArgR = True; break;
   21305             default: vassert(0);
   21306          }
   21307          UInt bS   = INSN0(4,4);
   21308          UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
   21309          UInt how  = INSN1(5,4);
   21310 
   21311          IRTemp rNt = newTemp(Ity_I32);
   21312          assign(rNt, getIRegT(rN));
   21313 
   21314          IRTemp rMt = newTemp(Ity_I32);
   21315          assign(rMt, getIRegT(rM));
   21316 
   21317          IRTemp argR = newTemp(Ity_I32);
   21318          IRTemp oldC = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
   21319 
   21320          compute_result_and_C_after_shift_by_imm5(
   21321             dis_buf, &argR, bS ? &oldC : NULL, rMt, how, imm5, rM
   21322          );
   21323 
   21324          IRTemp res = newTemp(Ity_I32);
   21325          if (notArgR) {
   21326             vassert(op == Iop_And32 || op == Iop_Or32);
   21327             assign(res, binop(op, mkexpr(rNt),
   21328                                   unop(Iop_Not32, mkexpr(argR))));
   21329          } else {
   21330             assign(res, binop(op, mkexpr(rNt), mkexpr(argR)));
   21331          }
   21332 
   21333          putIRegT(rD, mkexpr(res), condT);
   21334          if (bS) {
   21335             IRTemp oldV = newTemp(Ity_I32);
   21336             assign( oldV, mk_armg_calculate_flag_v() );
   21337             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   21338                                condT );
   21339          }
   21340 
   21341          DIP("%s%s.w r%u, r%u, %s\n",
   21342              nm, bS ? "s" : "", rD, rN, dis_buf);
   21343          goto decode_success;
   21344       }
   21345    }
   21346 
   21347    /* -------------- (T?) LSL{S}.W Rd, Rn, Rm -------------- */
   21348    /* -------------- (T?) LSR{S}.W Rd, Rn, Rm -------------- */
   21349    /* -------------- (T?) ASR{S}.W Rd, Rn, Rm -------------- */
   21350    /* -------------- (T?) ROR{S}.W Rd, Rn, Rm -------------- */
   21351    if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,0,0)
   21352        && INSN1(15,12) == BITS4(1,1,1,1)
   21353        && INSN1(7,4) == BITS4(0,0,0,0)) {
   21354       UInt how = INSN0(6,5); // standard encoding
   21355       UInt rN  = INSN0(3,0);
   21356       UInt rD  = INSN1(11,8);
   21357       UInt rM  = INSN1(3,0);
   21358       UInt bS  = INSN0(4,4);
   21359       Bool valid = !isBadRegT(rN) && !isBadRegT(rM) && !isBadRegT(rD);
   21360       if (valid) {
   21361          IRTemp rNt    = newTemp(Ity_I32);
   21362          IRTemp rMt    = newTemp(Ity_I32);
   21363          IRTemp res    = newTemp(Ity_I32);
   21364          IRTemp oldC   = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
   21365          IRTemp oldV   = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
   21366          const HChar* nms[4] = { "lsl", "lsr", "asr", "ror" };
   21367          const HChar* nm     = nms[how];
   21368          assign(rNt, getIRegT(rN));
   21369          assign(rMt, getIRegT(rM));
   21370          compute_result_and_C_after_shift_by_reg(
   21371             dis_buf, &res, bS ? &oldC : NULL,
   21372             rNt, how, rMt, rN, rM
   21373          );
   21374          if (bS)
   21375             assign(oldV, mk_armg_calculate_flag_v());
   21376          putIRegT(rD, mkexpr(res), condT);
   21377          if (bS) {
   21378             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   21379                                condT );
   21380          }
   21381          DIP("%s%s.w r%u, r%u, r%u\n",
   21382              nm, bS ? "s" : "", rD, rN, rM);
   21383          goto decode_success;
   21384       }
   21385    }
   21386 
   21387    /* ------------ (T?) MOV{S}.W Rd, Rn, {shift} ------------ */
   21388    /* ------------ (T?) MVN{S}.W Rd, Rn, {shift} ------------ */
   21389    if ((INSN0(15,0) & 0xFFCF) == 0xEA4F
   21390        && INSN1(15,15) == 0) {
   21391       UInt rD      = INSN1(11,8);
   21392       UInt rN      = INSN1(3,0);
   21393       UInt bS      = INSN0(4,4);
   21394       UInt isMVN   = INSN0(5,5);
   21395       Bool regsOK  = (bS || isMVN)
   21396                         ? (!isBadRegT(rD) && !isBadRegT(rN))
   21397                         : (rD != 15 && rN != 15 && (rD != 13 || rN != 13));
   21398       if (regsOK) {
   21399          UInt imm5  = (INSN1(14,12) << 2) | INSN1(7,6);
   21400          UInt how   = INSN1(5,4);
   21401 
   21402          IRTemp rNt = newTemp(Ity_I32);
   21403          assign(rNt, getIRegT(rN));
   21404 
   21405          IRTemp oldRn = newTemp(Ity_I32);
   21406          IRTemp oldC  = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
   21407          compute_result_and_C_after_shift_by_imm5(
   21408             dis_buf, &oldRn, bS ? &oldC : NULL, rNt, how, imm5, rN
   21409          );
   21410 
   21411          IRTemp res = newTemp(Ity_I32);
   21412          assign(res, isMVN ? unop(Iop_Not32, mkexpr(oldRn))
   21413                            : mkexpr(oldRn));
   21414 
   21415          putIRegT(rD, mkexpr(res), condT);
   21416          if (bS) {
   21417             IRTemp oldV = newTemp(Ity_I32);
   21418             assign( oldV, mk_armg_calculate_flag_v() );
   21419             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT);
   21420          }
   21421          DIP("%s%s.w r%u, %s\n",
   21422              isMVN ? "mvn" : "mov", bS ? "s" : "", rD, dis_buf);
   21423          goto decode_success;
   21424       }
   21425    }
   21426 
   21427    /* -------------- (T?) TST.W Rn, Rm, {shift} -------------- */
   21428    /* -------------- (T?) TEQ.W Rn, Rm, {shift} -------------- */
   21429    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
   21430        && (   INSN0(8,4) == BITS5(0,0,0,0,1)  // TST
   21431            || INSN0(8,4) == BITS5(0,1,0,0,1)) // TEQ
   21432        && INSN1(15,15) == 0
   21433        && INSN1(11,8) == BITS4(1,1,1,1)) {
   21434       UInt rN = INSN0(3,0);
   21435       UInt rM = INSN1(3,0);
   21436       if (!isBadRegT(rN) && !isBadRegT(rM)) {
   21437          Bool isTST = INSN0(8,4) == BITS5(0,0,0,0,1);
   21438 
   21439          UInt how  = INSN1(5,4);
   21440          UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
   21441 
   21442          IRTemp argL = newTemp(Ity_I32);
   21443          assign(argL, getIRegT(rN));
   21444 
   21445          IRTemp rMt = newTemp(Ity_I32);
   21446          assign(rMt, getIRegT(rM));
   21447 
   21448          IRTemp argR = newTemp(Ity_I32);
   21449          IRTemp oldC = newTemp(Ity_I32);
   21450          compute_result_and_C_after_shift_by_imm5(
   21451             dis_buf, &argR, &oldC, rMt, how, imm5, rM
   21452          );
   21453 
   21454          IRTemp oldV = newTemp(Ity_I32);
   21455          assign( oldV, mk_armg_calculate_flag_v() );
   21456 
   21457          IRTemp res = newTemp(Ity_I32);
   21458          assign(res, binop(isTST ? Iop_And32 : Iop_Xor32,
   21459                            mkexpr(argL), mkexpr(argR)));
   21460 
   21461          setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   21462                             condT );
   21463          DIP("%s.w r%u, %s\n", isTST ? "tst" : "teq", rN, dis_buf);
   21464          goto decode_success;
   21465       }
   21466    }
   21467 
   21468    /* -------------- (T3) CMP.W Rn, Rm, {shift} -------------- */
   21469    /* -------------- (T2) CMN.W Rn, Rm, {shift} -------------- */
   21470    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
   21471        && (   INSN0(8,4) == BITS5(1,1,0,1,1)  // CMP
   21472            || INSN0(8,4) == BITS5(1,0,0,0,1)) // CMN
   21473        && INSN1(15,15) == 0
   21474        && INSN1(11,8) == BITS4(1,1,1,1)) {
   21475       UInt rN = INSN0(3,0);
   21476       UInt rM = INSN1(3,0);
   21477       if (!isBadRegT(rN) && !isBadRegT(rM)) {
   21478          Bool isCMN = INSN0(8,4) == BITS5(1,0,0,0,1);
   21479          UInt how   = INSN1(5,4);
   21480          UInt imm5  = (INSN1(14,12) << 2) | INSN1(7,6);
   21481 
   21482          IRTemp argL = newTemp(Ity_I32);
   21483          assign(argL, getIRegT(rN));
   21484 
   21485          IRTemp rMt = newTemp(Ity_I32);
   21486          assign(rMt, getIRegT(rM));
   21487 
   21488          IRTemp argR = newTemp(Ity_I32);
   21489          compute_result_and_C_after_shift_by_imm5(
   21490             dis_buf, &argR, NULL, rMt, how, imm5, rM
   21491          );
   21492 
   21493          setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
   21494                          argL, argR, condT );
   21495 
   21496          DIP("%s.w r%u, %s\n", isCMN ? "cmn" : "cmp", rN, dis_buf);
   21497          goto decode_success;
   21498       }
   21499    }
   21500 
   21501    /* -------------- (T2) MOV{S}.W Rd, #constT -------------- */
   21502    /* -------------- (T2) MVN{S}.W Rd, #constT -------------- */
   21503    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   21504        && (   INSN0(9,5) == BITS5(0,0,0,1,0)  // MOV
   21505            || INSN0(9,5) == BITS5(0,0,0,1,1)) // MVN
   21506        && INSN0(3,0) == BITS4(1,1,1,1)
   21507        && INSN1(15,15) == 0) {
   21508       UInt rD = INSN1(11,8);
   21509       if (!isBadRegT(rD)) {
   21510          Bool   updC  = False;
   21511          UInt   bS    = INSN0(4,4);
   21512          Bool   isMVN = INSN0(5,5) == 1;
   21513          UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
   21514          IRTemp res   = newTemp(Ity_I32);
   21515          assign(res, mkU32(isMVN ? ~imm32 : imm32));
   21516          putIRegT(rD, mkexpr(res), condT);
   21517          if (bS) {
   21518             IRTemp oldV = newTemp(Ity_I32);
   21519             IRTemp oldC = newTemp(Ity_I32);
   21520             assign( oldV, mk_armg_calculate_flag_v() );
   21521             assign( oldC, updC
   21522                           ? mkU32((imm32 >> 31) & 1)
   21523                           : mk_armg_calculate_flag_c() );
   21524             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
   21525                                condT );
   21526          }
   21527          DIP("%s%s.w r%u, #%u\n",
   21528              isMVN ? "mvn" : "mov", bS ? "s" : "", rD, imm32);
   21529          goto decode_success;
   21530       }
   21531    }
   21532 
   21533    /* -------------- (T3) MOVW Rd, #imm16 -------------- */
   21534    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   21535        && INSN0(9,4) == BITS6(1,0,0,1,0,0)
   21536        && INSN1(15,15) == 0) {
   21537       UInt rD = INSN1(11,8);
   21538       if (!isBadRegT(rD)) {
   21539          UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
   21540                       | (INSN1(14,12) << 8) | INSN1(7,0);
   21541          putIRegT(rD, mkU32(imm16), condT);
   21542          DIP("movw r%u, #%u\n", rD, imm16);
   21543          goto decode_success;
   21544       }
   21545    }
   21546 
   21547    /* ---------------- MOVT Rd, #imm16 ---------------- */
   21548    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   21549        && INSN0(9,4) == BITS6(1,0,1,1,0,0)
   21550        && INSN1(15,15) == 0) {
   21551       UInt rD = INSN1(11,8);
   21552       if (!isBadRegT(rD)) {
   21553          UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
   21554                       | (INSN1(14,12) << 8) | INSN1(7,0);
   21555          IRTemp res = newTemp(Ity_I32);
   21556          assign(res,
   21557                 binop(Iop_Or32,
   21558                       binop(Iop_And32, getIRegT(rD), mkU32(0xFFFF)),
   21559                       mkU32(imm16 << 16)));
   21560          putIRegT(rD, mkexpr(res), condT);
   21561          DIP("movt r%u, #%u\n", rD, imm16);
   21562          goto decode_success;
   21563       }
   21564    }
   21565 
   21566    /* ---------------- LD/ST reg+/-#imm8 ---------------- */
   21567    /* Loads and stores of the form:
   21568          op  Rt, [Rn, #-imm8]      or
   21569          op  Rt, [Rn], #+/-imm8    or
   21570          op  Rt, [Rn, #+/-imm8]!
   21571       where op is one of
   21572          ldrb ldrh ldr  ldrsb ldrsh
   21573          strb strh str
   21574    */
   21575    if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0) && INSN1(11,11) == 1) {
   21576       Bool   valid  = True;
   21577       Bool   syned  = False;
   21578       Bool   isST   = False;
   21579       IRType ty     = Ity_I8;
   21580       const HChar* nm = "???";
   21581 
   21582       switch (INSN0(8,4)) {
   21583          case BITS5(0,0,0,0,0):   // strb
   21584             nm = "strb"; isST = True; break;
   21585          case BITS5(0,0,0,0,1):   // ldrb
   21586             nm = "ldrb"; break;
   21587          case BITS5(1,0,0,0,1):   // ldrsb
   21588             nm = "ldrsb"; syned = True; break;
   21589          case BITS5(0,0,0,1,0):   // strh
   21590             nm = "strh"; ty = Ity_I16; isST = True; break;
   21591          case BITS5(0,0,0,1,1):   // ldrh
   21592             nm = "ldrh"; ty = Ity_I16; break;
   21593          case BITS5(1,0,0,1,1):   // ldrsh
   21594             nm = "ldrsh"; ty = Ity_I16; syned = True; break;
   21595          case BITS5(0,0,1,0,0):   // str
   21596             nm = "str"; ty = Ity_I32; isST = True; break;
   21597          case BITS5(0,0,1,0,1):
   21598             nm = "ldr"; ty = Ity_I32; break;  // ldr
   21599          default:
   21600             valid = False; break;
   21601       }
   21602 
   21603       UInt rN      = INSN0(3,0);
   21604       UInt rT      = INSN1(15,12);
   21605       UInt bP      = INSN1(10,10);
   21606       UInt bU      = INSN1(9,9);
   21607       UInt bW      = INSN1(8,8);
   21608       UInt imm8    = INSN1(7,0);
   21609       Bool loadsPC = False;
   21610 
   21611       if (valid) {
   21612          if (bP == 1 && bU == 1 && bW == 0)
   21613             valid = False;
   21614          if (bP == 0 && bW == 0)
   21615             valid = False;
   21616          if (rN == 15)
   21617             valid = False;
   21618          if (bW == 1 && rN == rT)
   21619             valid = False;
   21620          if (ty == Ity_I8 || ty == Ity_I16) {
   21621             if (isBadRegT(rT))
   21622                valid = False;
   21623          } else {
   21624             /* ty == Ity_I32 */
   21625             if (isST && rT == 15)
   21626                valid = False;
   21627             if (!isST && rT == 15)
   21628                loadsPC = True;
   21629          }
   21630       }
   21631 
   21632       if (valid) {
   21633          // if it's a branch, it can't happen in the middle of an IT block
   21634          // Also, if it is a branch, make it unconditional at this point.
   21635          // Doing conditional branches in-line is too complex (for now)
   21636          if (loadsPC) {
   21637             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   21638             // go uncond
   21639             mk_skip_over_T32_if_cond_is_false(condT);
   21640             condT = IRTemp_INVALID;
   21641             // now uncond
   21642          }
   21643 
   21644          IRTemp preAddr = newTemp(Ity_I32);
   21645          assign(preAddr, getIRegT(rN));
   21646 
   21647          IRTemp postAddr = newTemp(Ity_I32);
   21648          assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
   21649                                 mkexpr(preAddr), mkU32(imm8)));
   21650 
   21651          IRTemp transAddr = bP == 1 ? postAddr : preAddr;
   21652 
   21653          if (isST) {
   21654 
   21655             /* Store.  If necessary, update the base register before
   21656                the store itself, so that the common idiom of "str rX,
   21657                [sp, #-4]!" (store rX at sp-4, then do new sp = sp-4,
   21658                a.k.a "push rX") doesn't cause Memcheck to complain
   21659                that the access is below the stack pointer.  Also, not
   21660                updating sp before the store confuses Valgrind's
   21661                dynamic stack-extending logic.  So do it before the
   21662                store.  Hence we need to snarf the store data before
   21663                doing the basereg update. */
   21664 
   21665             /* get hold of the data to be stored */
   21666             IRTemp oldRt = newTemp(Ity_I32);
   21667             assign(oldRt, getIRegT(rT));
   21668 
   21669             /* Update Rn if necessary. */
   21670             if (bW == 1) {
   21671                vassert(rN != rT); // assured by validity check above
   21672                putIRegT(rN, mkexpr(postAddr), condT);
   21673             }
   21674 
   21675             /* generate the transfer */
   21676             IRExpr* data = NULL;
   21677             switch (ty) {
   21678                case Ity_I8:
   21679                   data = unop(Iop_32to8, mkexpr(oldRt));
   21680                   break;
   21681                case Ity_I16:
   21682                   data = unop(Iop_32to16, mkexpr(oldRt));
   21683                   break;
   21684                case Ity_I32:
   21685                   data = mkexpr(oldRt);
   21686                   break;
   21687                default:
   21688                   vassert(0);
   21689             }
   21690             storeGuardedLE(mkexpr(transAddr), data, condT);
   21691 
   21692          } else {
   21693 
   21694             /* Load. */
   21695             IRTemp llOldRt = newTemp(Ity_I32);
   21696             assign(llOldRt, llGetIReg(rT));
   21697 
   21698             /* generate the transfer */
   21699             IRTemp    newRt = newTemp(Ity_I32);
   21700             IRLoadGOp widen = ILGop_INVALID;
   21701             switch (ty) {
   21702                case Ity_I8:
   21703                   widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
   21704                case Ity_I16:
   21705                   widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
   21706                case Ity_I32:
   21707                   widen = ILGop_Ident32; break;
   21708                default:
   21709                   vassert(0);
   21710             }
   21711             loadGuardedLE(newRt, widen,
   21712                           mkexpr(transAddr), mkexpr(llOldRt), condT);
   21713             if (rT == 15) {
   21714                vassert(loadsPC);
   21715                /* We'll do the write to the PC just below */
   21716             } else {
   21717                vassert(!loadsPC);
   21718                /* IRTemp_INVALID is OK here because in the case where
   21719                   condT is false at run time, we're just putting the
   21720                   old rT value back. */
   21721                putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   21722             }
   21723 
   21724             /* Update Rn if necessary. */
   21725             if (bW == 1) {
   21726                vassert(rN != rT); // assured by validity check above
   21727                putIRegT(rN, mkexpr(postAddr), condT);
   21728             }
   21729 
   21730             if (loadsPC) {
   21731                /* Presumably this is an interworking branch. */
   21732                vassert(rN != 15); // assured by validity check above
   21733                vassert(rT == 15);
   21734                vassert(condT == IRTemp_INVALID); /* due to check above */
   21735                llPutIReg(15, mkexpr(newRt));
   21736                dres.jk_StopHere = Ijk_Boring;  /* or _Ret ? */
   21737                dres.whatNext    = Dis_StopHere;
   21738             }
   21739          }
   21740 
   21741          if (bP == 1 && bW == 0) {
   21742             DIP("%s.w r%u, [r%u, #%c%u]\n",
   21743                 nm, rT, rN, bU ? '+' : '-', imm8);
   21744          }
   21745          else if (bP == 1 && bW == 1) {
   21746             DIP("%s.w r%u, [r%u, #%c%u]!\n",
   21747                 nm, rT, rN, bU ? '+' : '-', imm8);
   21748          }
   21749          else {
   21750             vassert(bP == 0 && bW == 1);
   21751             DIP("%s.w r%u, [r%u], #%c%u\n",
   21752                 nm, rT, rN, bU ? '+' : '-', imm8);
   21753          }
   21754 
   21755          goto decode_success;
   21756       }
   21757    }
   21758 
   21759    /* ------------- LD/ST reg+(reg<<imm2) ------------- */
   21760    /* Loads and stores of the form:
   21761          op  Rt, [Rn, Rm, LSL #imm8]
   21762       where op is one of
   21763          ldrb ldrh ldr  ldrsb ldrsh
   21764          strb strh str
   21765    */
   21766    if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)
   21767        && INSN1(11,6) == BITS6(0,0,0,0,0,0)) {
   21768       Bool   valid  = True;
   21769       Bool   syned  = False;
   21770       Bool   isST   = False;
   21771       IRType ty     = Ity_I8;
   21772       const HChar* nm = "???";
   21773 
   21774       switch (INSN0(8,4)) {
   21775          case BITS5(0,0,0,0,0):   // strb
   21776             nm = "strb"; isST = True; break;
   21777          case BITS5(0,0,0,0,1):   // ldrb
   21778             nm = "ldrb"; break;
   21779          case BITS5(1,0,0,0,1):   // ldrsb
   21780             nm = "ldrsb"; syned = True; break;
   21781          case BITS5(0,0,0,1,0):   // strh
   21782             nm = "strh"; ty = Ity_I16; isST = True; break;
   21783          case BITS5(0,0,0,1,1):   // ldrh
   21784             nm = "ldrh"; ty = Ity_I16; break;
   21785          case BITS5(1,0,0,1,1):   // ldrsh
   21786             nm = "ldrsh"; ty = Ity_I16; syned = True; break;
   21787          case BITS5(0,0,1,0,0):   // str
   21788             nm = "str"; ty = Ity_I32; isST = True; break;
   21789          case BITS5(0,0,1,0,1):
   21790             nm = "ldr"; ty = Ity_I32; break;  // ldr
   21791          default:
   21792             valid = False; break;
   21793       }
   21794 
   21795       UInt rN      = INSN0(3,0);
   21796       UInt rM      = INSN1(3,0);
   21797       UInt rT      = INSN1(15,12);
   21798       UInt imm2    = INSN1(5,4);
   21799       Bool loadsPC = False;
   21800 
   21801       if (ty == Ity_I8 || ty == Ity_I16) {
   21802          /* all 8- and 16-bit load and store cases have the
   21803             same exclusion set. */
   21804          if (rN == 15 || isBadRegT(rT) || isBadRegT(rM))
   21805             valid = False;
   21806       } else {
   21807          vassert(ty == Ity_I32);
   21808          if (rN == 15 || isBadRegT(rM))
   21809             valid = False;
   21810          if (isST && rT == 15)
   21811             valid = False;
   21812          /* If it is a load and rT is 15, that's only allowable if we
   21813             not in an IT block, or are the last in it.  Need to insert
   21814             a dynamic check for that. */
   21815          if (!isST && rT == 15)
   21816             loadsPC = True;
   21817       }
   21818 
   21819       if (valid) {
   21820          // if it's a branch, it can't happen in the middle of an IT block
   21821          // Also, if it is a branch, make it unconditional at this point.
   21822          // Doing conditional branches in-line is too complex (for now)
   21823          if (loadsPC) {
   21824             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   21825             // go uncond
   21826             mk_skip_over_T32_if_cond_is_false(condT);
   21827             condT = IRTemp_INVALID;
   21828             // now uncond
   21829          }
   21830 
   21831          IRTemp transAddr = newTemp(Ity_I32);
   21832          assign(transAddr,
   21833                 binop( Iop_Add32,
   21834                        getIRegT(rN),
   21835                        binop(Iop_Shl32, getIRegT(rM), mkU8(imm2)) ));
   21836 
   21837          if (isST) {
   21838 
   21839             /* get hold of the data to be stored */
   21840             IRTemp oldRt = newTemp(Ity_I32);
   21841             assign(oldRt, getIRegT(rT));
   21842 
   21843             /* generate the transfer */
   21844             IRExpr* data = NULL;
   21845             switch (ty) {
   21846                case Ity_I8:
   21847                   data = unop(Iop_32to8, mkexpr(oldRt));
   21848                   break;
   21849                case Ity_I16:
   21850                   data = unop(Iop_32to16, mkexpr(oldRt));
   21851                   break;
   21852               case Ity_I32:
   21853                   data = mkexpr(oldRt);
   21854                   break;
   21855               default:
   21856                  vassert(0);
   21857             }
   21858             storeGuardedLE(mkexpr(transAddr), data, condT);
   21859 
   21860          } else {
   21861 
   21862             /* Load. */
   21863             IRTemp llOldRt = newTemp(Ity_I32);
   21864             assign(llOldRt, llGetIReg(rT));
   21865 
   21866             /* generate the transfer */
   21867             IRTemp    newRt = newTemp(Ity_I32);
   21868             IRLoadGOp widen = ILGop_INVALID;
   21869             switch (ty) {
   21870                case Ity_I8:
   21871                   widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
   21872                case Ity_I16:
   21873                   widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
   21874                case Ity_I32:
   21875                   widen = ILGop_Ident32; break;
   21876                default:
   21877                   vassert(0);
   21878             }
   21879             loadGuardedLE(newRt, widen,
   21880                           mkexpr(transAddr), mkexpr(llOldRt), condT);
   21881 
   21882             if (rT == 15) {
   21883                vassert(loadsPC);
   21884                /* We'll do the write to the PC just below */
   21885             } else {
   21886                vassert(!loadsPC);
   21887                /* IRTemp_INVALID is OK here because in the case where
   21888                   condT is false at run time, we're just putting the
   21889                   old rT value back. */
   21890                putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   21891             }
   21892 
   21893             if (loadsPC) {
   21894                /* Presumably this is an interworking branch. */
   21895                vassert(rN != 15); // assured by validity check above
   21896                vassert(rT == 15);
   21897                vassert(condT == IRTemp_INVALID); /* due to check above */
   21898                llPutIReg(15, mkexpr(newRt));
   21899                dres.jk_StopHere = Ijk_Boring;  /* or _Ret ? */
   21900                dres.whatNext    = Dis_StopHere;
   21901             }
   21902          }
   21903 
   21904          DIP("%s.w r%u, [r%u, r%u, LSL #%u]\n",
   21905              nm, rT, rN, rM, imm2);
   21906 
   21907          goto decode_success;
   21908       }
   21909    }
   21910 
   21911    /* --------------- LD/ST reg+imm12 --------------- */
   21912    /* Loads and stores of the form:
   21913          op  Rt, [Rn, #+-imm12]
   21914       where op is one of
   21915          ldrb ldrh ldr  ldrsb ldrsh
   21916          strb strh str
   21917    */
   21918    if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)) {
   21919       Bool   valid  = True;
   21920       Bool   syned  = INSN0(8,8) == 1;
   21921       Bool   isST   = False;
   21922       IRType ty     = Ity_I8;
   21923       UInt   bU     = INSN0(7,7); // 1: +imm   0: -imm
   21924                                   // -imm is only supported by literal versions
   21925       const HChar* nm = "???";
   21926 
   21927       switch (INSN0(6,4)) {
   21928          case BITS3(0,0,0):   // strb
   21929             nm = "strb"; isST = True; break;
   21930          case BITS3(0,0,1):   // ldrb
   21931             nm = syned ? "ldrsb" : "ldrb"; break;
   21932          case BITS3(0,1,0):   // strh
   21933             nm = "strh"; ty = Ity_I16; isST = True; break;
   21934          case BITS3(0,1,1):   // ldrh
   21935             nm = syned ? "ldrsh" : "ldrh"; ty = Ity_I16; break;
   21936          case BITS3(1,0,0):   // str
   21937             nm = "str"; ty = Ity_I32; isST = True; break;
   21938          case BITS3(1,0,1):
   21939             nm = "ldr"; ty = Ity_I32; break;  // ldr
   21940          default:
   21941             valid = False; break;
   21942       }
   21943 
   21944       UInt rN      = INSN0(3,0);
   21945       UInt rT      = INSN1(15,12);
   21946       UInt imm12   = INSN1(11,0);
   21947       Bool loadsPC = False;
   21948 
   21949       if (rN != 15 && bU == 0) {
   21950          // only pc supports #-imm12
   21951          valid = False;
   21952       }
   21953 
   21954       if (isST) {
   21955          if (syned) valid = False;
   21956          if (rN == 15 || rT == 15)
   21957             valid = False;
   21958       } else {
   21959          /* For a 32-bit load, rT == 15 is only allowable if we are not
   21960             in an IT block, or are the last in it.  Need to insert
   21961             a dynamic check for that.  Also, in this particular
   21962             case, rN == 15 is allowable.  In this case however, the
   21963             value obtained for rN is (apparently)
   21964             "word-align(address of current insn + 4)". */
   21965          if (rT == 15) {
   21966             if (ty == Ity_I32)
   21967                loadsPC = True;
   21968             else // Can't do it for B/H loads
   21969                valid = False;
   21970          }
   21971       }
   21972 
   21973       if (valid) {
   21974          // if it's a branch, it can't happen in the middle of an IT block
   21975          // Also, if it is a branch, make it unconditional at this point.
   21976          // Doing conditional branches in-line is too complex (for now)
   21977          if (loadsPC) {
   21978             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   21979             // go uncond
   21980             mk_skip_over_T32_if_cond_is_false(condT);
   21981             condT = IRTemp_INVALID;
   21982             // now uncond
   21983          }
   21984 
   21985          IRTemp rNt = newTemp(Ity_I32);
   21986          if (rN == 15) {
   21987             vassert(!isST);
   21988             assign(rNt, binop(Iop_And32, getIRegT(15), mkU32(~3)));
   21989          } else {
   21990             assign(rNt, getIRegT(rN));
   21991          }
   21992 
   21993          IRTemp transAddr = newTemp(Ity_I32);
   21994          assign(transAddr,
   21995                 binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
   21996                       mkexpr(rNt), mkU32(imm12)));
   21997 
   21998          IRTemp oldRt = newTemp(Ity_I32);
   21999          assign(oldRt, getIRegT(rT));
   22000 
   22001          IRTemp llOldRt = newTemp(Ity_I32);
   22002          assign(llOldRt, llGetIReg(rT));
   22003 
   22004          if (isST) {
   22005             IRExpr* data = NULL;
   22006             switch (ty) {
   22007                case Ity_I8:
   22008                   data = unop(Iop_32to8, mkexpr(oldRt));
   22009                   break;
   22010                case Ity_I16:
   22011                   data = unop(Iop_32to16, mkexpr(oldRt));
   22012                   break;
   22013               case Ity_I32:
   22014                   data = mkexpr(oldRt);
   22015                   break;
   22016               default:
   22017                  vassert(0);
   22018             }
   22019             storeGuardedLE(mkexpr(transAddr), data, condT);
   22020          } else {
   22021             IRTemp    newRt = newTemp(Ity_I32);
   22022             IRLoadGOp widen = ILGop_INVALID;
   22023             switch (ty) {
   22024                case Ity_I8:
   22025                   widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
   22026                case Ity_I16:
   22027                   widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
   22028                case Ity_I32:
   22029                   widen = ILGop_Ident32; break;
   22030                default:
   22031                   vassert(0);
   22032             }
   22033             loadGuardedLE(newRt, widen,
   22034                           mkexpr(transAddr), mkexpr(llOldRt), condT);
   22035             if (rT == 15) {
   22036                vassert(loadsPC);
   22037                /* We'll do the write to the PC just below */
   22038             } else {
   22039                vassert(!loadsPC);
   22040                /* IRTemp_INVALID is OK here because in the case where
   22041                   condT is false at run time, we're just putting the
   22042                   old rT value back. */
   22043                putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   22044             }
   22045 
   22046             if (loadsPC) {
   22047                /* Presumably this is an interworking branch. */
   22048                vassert(rT == 15);
   22049                vassert(condT == IRTemp_INVALID); /* due to check above */
   22050                llPutIReg(15, mkexpr(newRt));
   22051                dres.jk_StopHere = Ijk_Boring;
   22052                dres.whatNext    = Dis_StopHere;
   22053             }
   22054          }
   22055 
   22056          DIP("%s.w r%u, [r%u, +#%u]\n", nm, rT, rN, imm12);
   22057 
   22058          goto decode_success;
   22059       }
   22060    }
   22061 
   22062    /* -------------- LDRD/STRD reg+/-#imm8 -------------- */
   22063    /* Doubleword loads and stores of the form:
   22064          ldrd/strd  Rt, Rt2, [Rn, #+/-imm8]    or
   22065          ldrd/strd  Rt, Rt2, [Rn], #+/-imm8    or
   22066          ldrd/strd  Rt, Rt2, [Rn, #+/-imm8]!
   22067    */
   22068    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,0) && INSN0(6,6) == 1) {
   22069       UInt bP   = INSN0(8,8);
   22070       UInt bU   = INSN0(7,7);
   22071       UInt bW   = INSN0(5,5);
   22072       UInt bL   = INSN0(4,4);  // 1: load  0: store
   22073       UInt rN   = INSN0(3,0);
   22074       UInt rT   = INSN1(15,12);
   22075       UInt rT2  = INSN1(11,8);
   22076       UInt imm8 = INSN1(7,0);
   22077 
   22078       Bool valid = True;
   22079       if (bP == 0 && bW == 0)                 valid = False;
   22080       if (bW == 1 && (rN == rT || rN == rT2)) valid = False;
   22081       if (isBadRegT(rT) || isBadRegT(rT2))    valid = False;
   22082       if (bL == 1 && rT == rT2)               valid = False;
   22083       /* It's OK to use PC as the base register only in the
   22084          following case: ldrd Rt, Rt2, [PC, #+/-imm8] */
   22085       if (rN == 15 && (bL == 0/*store*/
   22086                        || bW == 1/*wb*/))     valid = False;
   22087 
   22088       if (valid) {
   22089          IRTemp preAddr = newTemp(Ity_I32);
   22090          assign(preAddr, 15 == rN
   22091                            ? binop(Iop_And32, getIRegT(15), mkU32(~3U))
   22092                            : getIRegT(rN));
   22093 
   22094          IRTemp postAddr = newTemp(Ity_I32);
   22095          assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
   22096                                 mkexpr(preAddr), mkU32(imm8 << 2)));
   22097 
   22098          IRTemp transAddr = bP == 1 ? postAddr : preAddr;
   22099 
   22100          /* For almost all cases, we do the writeback after the transfers.
   22101             However, that leaves the stack "uncovered" in cases like:
   22102                strd    rD, [sp, #-8]
   22103                strd    rD, [sp, #-16]
   22104             In which case, do the writeback to SP now, instead of later.
   22105             This is bad in that it makes the insn non-restartable if the
   22106             accesses fault, but at least keeps Memcheck happy. */
   22107          Bool writeback_already_done = False;
   22108          if (bL == 0/*store*/ && bW == 1/*wb*/
   22109              && rN == 13 && rN != rT && rN != rT2
   22110              && bU == 0/*minus*/
   22111              && ((imm8 << 2) == 8 || (imm8 << 2) == 16)) {
   22112             putIRegT(rN, mkexpr(postAddr), condT);
   22113             writeback_already_done = True;
   22114          }
   22115 
   22116          if (bL == 0) {
   22117             IRTemp oldRt  = newTemp(Ity_I32);
   22118             IRTemp oldRt2 = newTemp(Ity_I32);
   22119             assign(oldRt,  getIRegT(rT));
   22120             assign(oldRt2, getIRegT(rT2));
   22121             storeGuardedLE( mkexpr(transAddr),
   22122                             mkexpr(oldRt), condT );
   22123             storeGuardedLE( binop(Iop_Add32, mkexpr(transAddr), mkU32(4)),
   22124                             mkexpr(oldRt2), condT );
   22125          } else {
   22126             IRTemp oldRt  = newTemp(Ity_I32);
   22127             IRTemp oldRt2 = newTemp(Ity_I32);
   22128             IRTemp newRt  = newTemp(Ity_I32);
   22129             IRTemp newRt2 = newTemp(Ity_I32);
   22130             assign(oldRt,  llGetIReg(rT));
   22131             assign(oldRt2, llGetIReg(rT2));
   22132             loadGuardedLE( newRt, ILGop_Ident32,
   22133                            mkexpr(transAddr),
   22134                            mkexpr(oldRt), condT );
   22135             loadGuardedLE( newRt2, ILGop_Ident32,
   22136                            binop(Iop_Add32, mkexpr(transAddr), mkU32(4)),
   22137                            mkexpr(oldRt2), condT );
   22138             /* Put unconditionally, since we already switched on the condT
   22139                in the guarded loads. */
   22140             putIRegT(rT,  mkexpr(newRt),  IRTemp_INVALID);
   22141             putIRegT(rT2, mkexpr(newRt2), IRTemp_INVALID);
   22142          }
   22143 
   22144          if (bW == 1 && !writeback_already_done) {
   22145             putIRegT(rN, mkexpr(postAddr), condT);
   22146          }
   22147 
   22148          const HChar* nm = bL ? "ldrd" : "strd";
   22149 
   22150          if (bP == 1 && bW == 0) {
   22151             DIP("%s.w r%u, r%u, [r%u, #%c%u]\n",
   22152                 nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
   22153          }
   22154          else if (bP == 1 && bW == 1) {
   22155             DIP("%s.w r%u, r%u, [r%u, #%c%u]!\n",
   22156                 nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
   22157          }
   22158          else {
   22159             vassert(bP == 0 && bW == 1);
   22160             DIP("%s.w r%u, r%u, [r%u], #%c%u\n",
   22161                 nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
   22162          }
   22163 
   22164          goto decode_success;
   22165       }
   22166    }
   22167 
   22168    /* -------------- (T3) Bcond.W label -------------- */
   22169    /* This variant carries its own condition, so can't be part of an
   22170       IT block ... */
   22171    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   22172        && INSN1(15,14) == BITS2(1,0)
   22173        && INSN1(12,12) == 0) {
   22174       UInt cond = INSN0(9,6);
   22175       if (cond != ARMCondAL && cond != ARMCondNV) {
   22176          UInt uimm21
   22177             =   (INSN0(10,10) << (1 + 1 + 6 + 11 + 1))
   22178               | (INSN1(11,11) << (1 + 6 + 11 + 1))
   22179               | (INSN1(13,13) << (6 + 11 + 1))
   22180               | (INSN0(5,0)   << (11 + 1))
   22181               | (INSN1(10,0)  << 1);
   22182          uimm21 <<= 11;
   22183          Int simm21 = (Int)uimm21;
   22184          simm21 >>= 11;
   22185 
   22186          vassert(0 == (guest_R15_curr_instr_notENC & 1));
   22187          UInt dst = simm21 + guest_R15_curr_instr_notENC + 4;
   22188 
   22189          /* Not allowed in an IT block; SIGILL if so. */
   22190          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
   22191 
   22192          IRTemp kondT = newTemp(Ity_I32);
   22193          assign( kondT, mk_armg_calculate_condition(cond) );
   22194          stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
   22195                             Ijk_Boring,
   22196                             IRConst_U32(dst | 1/*CPSR.T*/),
   22197                             OFFB_R15T ));
   22198          llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 4)
   22199                               | 1 /*CPSR.T*/ ));
   22200          dres.jk_StopHere = Ijk_Boring;
   22201          dres.whatNext    = Dis_StopHere;
   22202          DIP("b%s.w 0x%x\n", nCC(cond), dst);
   22203          goto decode_success;
   22204       }
   22205    }
   22206 
   22207    /* ---------------- (T4) B.W label ---------------- */
   22208    /* ... whereas this variant doesn't carry its own condition, so it
   22209       has to be either unconditional or the conditional by virtue of
   22210       being the last in an IT block.  The upside is that there's 4
   22211       more bits available for the jump offset, so it has a 16-times
   22212       greater branch range than the T3 variant. */
   22213    if (INSN0(15,11) == BITS5(1,1,1,1,0)
   22214        && INSN1(15,14) == BITS2(1,0)
   22215        && INSN1(12,12) == 1) {
   22216       if (1) {
   22217          UInt bS  = INSN0(10,10);
   22218          UInt bJ1 = INSN1(13,13);
   22219          UInt bJ2 = INSN1(11,11);
   22220          UInt bI1 = 1 ^ (bJ1 ^ bS);
   22221          UInt bI2 = 1 ^ (bJ2 ^ bS);
   22222          UInt uimm25
   22223             =   (bS          << (1 + 1 + 10 + 11 + 1))
   22224               | (bI1         << (1 + 10 + 11 + 1))
   22225               | (bI2         << (10 + 11 + 1))
   22226               | (INSN0(9,0)  << (11 + 1))
   22227               | (INSN1(10,0) << 1);
   22228          uimm25 <<= 7;
   22229          Int simm25 = (Int)uimm25;
   22230          simm25 >>= 7;
   22231 
   22232          vassert(0 == (guest_R15_curr_instr_notENC & 1));
   22233          UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
   22234 
   22235          /* If in an IT block, must be the last insn. */
   22236          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   22237 
   22238          // go uncond
   22239          mk_skip_over_T32_if_cond_is_false(condT);
   22240          condT = IRTemp_INVALID;
   22241          // now uncond
   22242 
   22243          // branch to dst
   22244          llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
   22245          dres.jk_StopHere = Ijk_Boring;
   22246          dres.whatNext    = Dis_StopHere;
   22247          DIP("b.w 0x%x\n", dst);
   22248          goto decode_success;
   22249       }
   22250    }
   22251 
   22252    /* ------------------ TBB, TBH ------------------ */
   22253    if (INSN0(15,4) == 0xE8D && INSN1(15,5) == 0x780) {
   22254       UInt rN = INSN0(3,0);
   22255       UInt rM = INSN1(3,0);
   22256       UInt bH = INSN1(4,4);
   22257       if (bH/*ATC*/ || (rN != 13 && !isBadRegT(rM))) {
   22258          /* Must be last or not-in IT block */
   22259          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
   22260          /* Go uncond */
   22261          mk_skip_over_T32_if_cond_is_false(condT);
   22262          condT = IRTemp_INVALID;
   22263 
   22264          IRExpr* ea
   22265              = binop(Iop_Add32,
   22266                      getIRegT(rN),
   22267                      bH ? binop(Iop_Shl32, getIRegT(rM), mkU8(1))
   22268                         : getIRegT(rM));
   22269 
   22270          IRTemp delta = newTemp(Ity_I32);
   22271          if (bH) {
   22272             assign(delta, unop(Iop_16Uto32, loadLE(Ity_I16, ea)));
   22273          } else {
   22274             assign(delta, unop(Iop_8Uto32, loadLE(Ity_I8, ea)));
   22275          }
   22276 
   22277          llPutIReg(
   22278             15,
   22279             binop(Iop_Or32,
   22280                   binop(Iop_Add32,
   22281                         getIRegT(15),
   22282                         binop(Iop_Shl32, mkexpr(delta), mkU8(1))
   22283                   ),
   22284                   mkU32(1)
   22285          ));
   22286          dres.jk_StopHere = Ijk_Boring;
   22287          dres.whatNext    = Dis_StopHere;
   22288          DIP("tb%c [r%u, r%u%s]\n",
   22289              bH ? 'h' : 'b', rN, rM, bH ? ", LSL #1" : "");
   22290          goto decode_success;
   22291       }
   22292    }
   22293 
   22294    /* ------------------ UBFX ------------------ */
   22295    /* ------------------ SBFX ------------------ */
   22296    /* There's also ARM versions of same, but it doesn't seem worth the
   22297       hassle to common up the handling (it's only a couple of C
   22298       statements). */
   22299    if ((INSN0(15,4) == 0xF3C // UBFX
   22300         || INSN0(15,4) == 0xF34) // SBFX
   22301        && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
   22302       UInt rN  = INSN0(3,0);
   22303       UInt rD  = INSN1(11,8);
   22304       UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
   22305       UInt wm1 = INSN1(4,0);
   22306       UInt msb =  lsb + wm1;
   22307       if (!isBadRegT(rD) && !isBadRegT(rN) && msb <= 31) {
   22308          Bool   isU  = INSN0(15,4) == 0xF3C;
   22309          IRTemp src  = newTemp(Ity_I32);
   22310          IRTemp tmp  = newTemp(Ity_I32);
   22311          IRTemp res  = newTemp(Ity_I32);
   22312          UInt   mask = ((1 << wm1) - 1) + (1 << wm1);
   22313          vassert(msb >= 0 && msb <= 31);
   22314          vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
   22315 
   22316          assign(src, getIRegT(rN));
   22317          assign(tmp, binop(Iop_And32,
   22318                            binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
   22319                            mkU32(mask)));
   22320          assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
   22321                            binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
   22322                            mkU8(31-wm1)));
   22323 
   22324          putIRegT(rD, mkexpr(res), condT);
   22325 
   22326          DIP("%s r%u, r%u, #%u, #%u\n",
   22327              isU ? "ubfx" : "sbfx", rD, rN, lsb, wm1 + 1);
   22328          goto decode_success;
   22329       }
   22330    }
   22331 
   22332    /* ------------------ UXTB ------------------ */
   22333    /* ------------------ UXTH ------------------ */
   22334    /* ------------------ SXTB ------------------ */
   22335    /* ------------------ SXTH ------------------ */
   22336    /* ----------------- UXTB16 ----------------- */
   22337    /* ----------------- SXTB16 ----------------- */
   22338    /* FIXME: this is an exact duplicate of the ARM version.  They
   22339       should be commoned up. */
   22340    if ((INSN0(15,0) == 0xFA5F     // UXTB
   22341         || INSN0(15,0) == 0xFA1F  // UXTH
   22342         || INSN0(15,0) == 0xFA4F  // SXTB
   22343         || INSN0(15,0) == 0xFA0F  // SXTH
   22344         || INSN0(15,0) == 0xFA3F  // UXTB16
   22345         || INSN0(15,0) == 0xFA2F) // SXTB16
   22346        && INSN1(15,12) == BITS4(1,1,1,1)
   22347        && INSN1(7,6) == BITS2(1,0)) {
   22348       UInt rD = INSN1(11,8);
   22349       UInt rM = INSN1(3,0);
   22350       UInt rot = INSN1(5,4);
   22351       if (!isBadRegT(rD) && !isBadRegT(rM)) {
   22352          const HChar* nm = "???";
   22353          IRTemp srcT = newTemp(Ity_I32);
   22354          IRTemp rotT = newTemp(Ity_I32);
   22355          IRTemp dstT = newTemp(Ity_I32);
   22356          assign(srcT, getIRegT(rM));
   22357          assign(rotT, genROR32(srcT, 8 * rot));
   22358          switch (INSN0(15,0)) {
   22359             case 0xFA5F: // UXTB
   22360                nm = "uxtb";
   22361                assign(dstT, unop(Iop_8Uto32,
   22362                                  unop(Iop_32to8, mkexpr(rotT))));
   22363                break;
   22364             case 0xFA1F: // UXTH
   22365                nm = "uxth";
   22366                assign(dstT, unop(Iop_16Uto32,
   22367                                  unop(Iop_32to16, mkexpr(rotT))));
   22368                break;
   22369             case 0xFA4F: // SXTB
   22370                nm = "sxtb";
   22371                assign(dstT, unop(Iop_8Sto32,
   22372                                  unop(Iop_32to8, mkexpr(rotT))));
   22373                break;
   22374             case 0xFA0F: // SXTH
   22375                nm = "sxth";
   22376                assign(dstT, unop(Iop_16Sto32,
   22377                                  unop(Iop_32to16, mkexpr(rotT))));
   22378                break;
   22379             case 0xFA3F: // UXTB16
   22380                nm = "uxtb16";
   22381                assign(dstT, binop(Iop_And32, mkexpr(rotT),
   22382                                              mkU32(0x00FF00FF)));
   22383                break;
   22384             case 0xFA2F: { // SXTB16
   22385                nm = "sxtb16";
   22386                IRTemp lo32 = newTemp(Ity_I32);
   22387                IRTemp hi32 = newTemp(Ity_I32);
   22388                assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
   22389                assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
   22390                assign(
   22391                   dstT,
   22392                   binop(Iop_Or32,
   22393                         binop(Iop_And32,
   22394                               unop(Iop_8Sto32,
   22395                                    unop(Iop_32to8, mkexpr(lo32))),
   22396                               mkU32(0xFFFF)),
   22397                         binop(Iop_Shl32,
   22398                               unop(Iop_8Sto32,
   22399                                    unop(Iop_32to8, mkexpr(hi32))),
   22400                               mkU8(16))
   22401                ));
   22402                break;
   22403             }
   22404             default:
   22405                vassert(0);
   22406          }
   22407          putIRegT(rD, mkexpr(dstT), condT);
   22408          DIP("%s r%u, r%u, ror #%u\n", nm, rD, rM, 8 * rot);
   22409          goto decode_success;
   22410       }
   22411    }
   22412 
   22413    /* -------------- MUL.W Rd, Rn, Rm -------------- */
   22414    if (INSN0(15,4) == 0xFB0
   22415        && (INSN1(15,0) & 0xF0F0) == 0xF000) {
   22416       UInt rN = INSN0(3,0);
   22417       UInt rD = INSN1(11,8);
   22418       UInt rM = INSN1(3,0);
   22419       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   22420          IRTemp res = newTemp(Ity_I32);
   22421          assign(res, binop(Iop_Mul32, getIRegT(rN), getIRegT(rM)));
   22422          putIRegT(rD, mkexpr(res), condT);
   22423          DIP("mul.w r%u, r%u, r%u\n", rD, rN, rM);
   22424          goto decode_success;
   22425       }
   22426    }
   22427 
   22428    /* -------------- SDIV.W Rd, Rn, Rm -------------- */
   22429    if (INSN0(15,4) == 0xFB9
   22430        && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
   22431       UInt rN = INSN0(3,0);
   22432       UInt rD = INSN1(11,8);
   22433       UInt rM = INSN1(3,0);
   22434       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   22435          IRTemp res  = newTemp(Ity_I32);
   22436          IRTemp argL = newTemp(Ity_I32);
   22437          IRTemp argR = newTemp(Ity_I32);
   22438          assign(argL, getIRegT(rN));
   22439          assign(argR, getIRegT(rM));
   22440          assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
   22441          putIRegT(rD, mkexpr(res), condT);
   22442          DIP("sdiv.w r%u, r%u, r%u\n", rD, rN, rM);
   22443          goto decode_success;
   22444       }
   22445    }
   22446 
   22447    /* -------------- UDIV.W Rd, Rn, Rm -------------- */
   22448    if (INSN0(15,4) == 0xFBB
   22449        && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
   22450       UInt rN = INSN0(3,0);
   22451       UInt rD = INSN1(11,8);
   22452       UInt rM = INSN1(3,0);
   22453       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   22454          IRTemp res  = newTemp(Ity_I32);
   22455          IRTemp argL = newTemp(Ity_I32);
   22456          IRTemp argR = newTemp(Ity_I32);
   22457          assign(argL, getIRegT(rN));
   22458          assign(argR, getIRegT(rM));
   22459          assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
   22460          putIRegT(rD, mkexpr(res), condT);
   22461          DIP("udiv.w r%u, r%u, r%u\n", rD, rN, rM);
   22462          goto decode_success;
   22463       }
   22464    }
   22465 
   22466    /* ------------------ {U,S}MULL ------------------ */
   22467    if ((INSN0(15,4) == 0xFB8 || INSN0(15,4) == 0xFBA)
   22468        && INSN1(7,4) == BITS4(0,0,0,0)) {
   22469       UInt isU  = INSN0(5,5);
   22470       UInt rN   = INSN0(3,0);
   22471       UInt rDlo = INSN1(15,12);
   22472       UInt rDhi = INSN1(11,8);
   22473       UInt rM   = INSN1(3,0);
   22474       if (!isBadRegT(rDhi) && !isBadRegT(rDlo)
   22475           && !isBadRegT(rN) && !isBadRegT(rM) && rDlo != rDhi) {
   22476          IRTemp res   = newTemp(Ity_I64);
   22477          assign(res, binop(isU ? Iop_MullU32 : Iop_MullS32,
   22478                            getIRegT(rN), getIRegT(rM)));
   22479          putIRegT( rDhi, unop(Iop_64HIto32, mkexpr(res)), condT );
   22480          putIRegT( rDlo, unop(Iop_64to32, mkexpr(res)), condT );
   22481          DIP("%cmull r%u, r%u, r%u, r%u\n",
   22482              isU ? 'u' : 's', rDlo, rDhi, rN, rM);
   22483          goto decode_success;
   22484       }
   22485    }
   22486 
   22487    /* ------------------ ML{A,S} ------------------ */
   22488    if (INSN0(15,4) == 0xFB0
   22489        && (   INSN1(7,4) == BITS4(0,0,0,0)    // MLA
   22490            || INSN1(7,4) == BITS4(0,0,0,1))) { // MLS
   22491       UInt rN = INSN0(3,0);
   22492       UInt rA = INSN1(15,12);
   22493       UInt rD = INSN1(11,8);
   22494       UInt rM = INSN1(3,0);
   22495       if (!isBadRegT(rD) && !isBadRegT(rN)
   22496           && !isBadRegT(rM) && !isBadRegT(rA)) {
   22497          Bool   isMLA = INSN1(7,4) == BITS4(0,0,0,0);
   22498          IRTemp res   = newTemp(Ity_I32);
   22499          assign(res,
   22500                 binop(isMLA ? Iop_Add32 : Iop_Sub32,
   22501                       getIRegT(rA),
   22502                       binop(Iop_Mul32, getIRegT(rN), getIRegT(rM))));
   22503          putIRegT(rD, mkexpr(res), condT);
   22504          DIP("%s r%u, r%u, r%u, r%u\n",
   22505              isMLA ? "mla" : "mls", rD, rN, rM, rA);
   22506          goto decode_success;
   22507       }
   22508    }
   22509 
   22510    /* ------------------ (T3) ADR ------------------ */
   22511    if ((INSN0(15,0) == 0xF20F || INSN0(15,0) == 0xF60F)
   22512        && INSN1(15,15) == 0) {
   22513       /* rD = align4(PC) + imm32 */
   22514       UInt rD = INSN1(11,8);
   22515       if (!isBadRegT(rD)) {
   22516          UInt imm32 = (INSN0(10,10) << 11)
   22517                       | (INSN1(14,12) << 8) | INSN1(7,0);
   22518          putIRegT(rD, binop(Iop_Add32,
   22519                             binop(Iop_And32, getIRegT(15), mkU32(~3U)),
   22520                             mkU32(imm32)),
   22521                       condT);
   22522          DIP("add r%u, pc, #%u\n", rD, imm32);
   22523          goto decode_success;
   22524       }
   22525    }
   22526 
   22527    /* ----------------- (T1) UMLAL ----------------- */
   22528    /* ----------------- (T1) SMLAL ----------------- */
   22529    if ((INSN0(15,4) == 0xFBE // UMLAL
   22530         || INSN0(15,4) == 0xFBC) // SMLAL
   22531        && INSN1(7,4) == BITS4(0,0,0,0)) {
   22532       UInt rN   = INSN0(3,0);
   22533       UInt rDlo = INSN1(15,12);
   22534       UInt rDhi = INSN1(11,8);
   22535       UInt rM   = INSN1(3,0);
   22536       if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
   22537           && !isBadRegT(rM) && rDhi != rDlo) {
   22538          Bool   isS   = INSN0(15,4) == 0xFBC;
   22539          IRTemp argL  = newTemp(Ity_I32);
   22540          IRTemp argR  = newTemp(Ity_I32);
   22541          IRTemp old   = newTemp(Ity_I64);
   22542          IRTemp res   = newTemp(Ity_I64);
   22543          IRTemp resHi = newTemp(Ity_I32);
   22544          IRTemp resLo = newTemp(Ity_I32);
   22545          IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
   22546          assign( argL, getIRegT(rM));
   22547          assign( argR, getIRegT(rN));
   22548          assign( old, binop(Iop_32HLto64, getIRegT(rDhi), getIRegT(rDlo)) );
   22549          assign( res, binop(Iop_Add64,
   22550                             mkexpr(old),
   22551                             binop(mulOp, mkexpr(argL), mkexpr(argR))) );
   22552          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
   22553          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
   22554          putIRegT( rDhi, mkexpr(resHi), condT );
   22555          putIRegT( rDlo, mkexpr(resLo), condT );
   22556          DIP("%cmlal r%u, r%u, r%u, r%u\n",
   22557              isS ? 's' : 'u', rDlo, rDhi, rN, rM);
   22558          goto decode_success;
   22559       }
   22560    }
   22561 
   22562    /* ------------------ (T1) UMAAL ------------------ */
   22563    if (INSN0(15,4) == 0xFBE && INSN1(7,4) == BITS4(0,1,1,0)) {
   22564       UInt rN   = INSN0(3,0);
   22565       UInt rDlo = INSN1(15,12);
   22566       UInt rDhi = INSN1(11,8);
   22567       UInt rM   = INSN1(3,0);
   22568       if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
   22569           && !isBadRegT(rM) && rDhi != rDlo) {
   22570          IRTemp argN   = newTemp(Ity_I32);
   22571          IRTemp argM   = newTemp(Ity_I32);
   22572          IRTemp argDhi = newTemp(Ity_I32);
   22573          IRTemp argDlo = newTemp(Ity_I32);
   22574          IRTemp res    = newTemp(Ity_I64);
   22575          IRTemp resHi  = newTemp(Ity_I32);
   22576          IRTemp resLo  = newTemp(Ity_I32);
   22577          assign( argN,   getIRegT(rN) );
   22578          assign( argM,   getIRegT(rM) );
   22579          assign( argDhi, getIRegT(rDhi) );
   22580          assign( argDlo, getIRegT(rDlo) );
   22581          assign( res,
   22582                  binop(Iop_Add64,
   22583                        binop(Iop_Add64,
   22584                              binop(Iop_MullU32, mkexpr(argN), mkexpr(argM)),
   22585                              unop(Iop_32Uto64, mkexpr(argDhi))),
   22586                        unop(Iop_32Uto64, mkexpr(argDlo))) );
   22587          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
   22588          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
   22589          putIRegT( rDhi, mkexpr(resHi), condT );
   22590          putIRegT( rDlo, mkexpr(resLo), condT );
   22591          DIP("umaal r%u, r%u, r%u, r%u\n", rDlo, rDhi, rN, rM);
   22592          goto decode_success;
   22593       }
   22594    }
   22595 
   22596    /* ------------------- (T1) SMMUL{R} ------------------ */
   22597    if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
   22598        && INSN0(6,4) == BITS3(1,0,1)
   22599        && INSN1(15,12) == BITS4(1,1,1,1)
   22600        && INSN1(7,5) == BITS3(0,0,0)) {
   22601       UInt bitR = INSN1(4,4);
   22602       UInt rD = INSN1(11,8);
   22603       UInt rM = INSN1(3,0);
   22604       UInt rN = INSN0(3,0);
   22605       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   22606          IRExpr* res
   22607          = unop(Iop_64HIto32,
   22608                 binop(Iop_Add64,
   22609                       binop(Iop_MullS32, getIRegT(rN), getIRegT(rM)),
   22610                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
   22611          putIRegT(rD, res, condT);
   22612          DIP("smmul%s r%u, r%u, r%u\n",
   22613              bitR ? "r" : "", rD, rN, rM);
   22614          goto decode_success;
   22615       }
   22616    }
   22617 
   22618    /* ------------------- (T1) SMMLA{R} ------------------ */
   22619    if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
   22620        && INSN0(6,4) == BITS3(1,0,1)
   22621        && INSN1(7,5) == BITS3(0,0,0)) {
   22622       UInt bitR = INSN1(4,4);
   22623       UInt rA = INSN1(15,12);
   22624       UInt rD = INSN1(11,8);
   22625       UInt rM = INSN1(3,0);
   22626       UInt rN = INSN0(3,0);
   22627       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && (rA != 13)) {
   22628          IRExpr* res
   22629          = unop(Iop_64HIto32,
   22630                 binop(Iop_Add64,
   22631                       binop(Iop_Add64,
   22632                             binop(Iop_32HLto64, getIRegT(rA), mkU32(0)),
   22633                             binop(Iop_MullS32, getIRegT(rN), getIRegT(rM))),
   22634                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
   22635          putIRegT(rD, res, condT);
   22636          DIP("smmla%s r%u, r%u, r%u, r%u\n",
   22637              bitR ? "r" : "", rD, rN, rM, rA);
   22638          goto decode_success;
   22639       }
   22640    }
   22641 
   22642    /* ------------------ (T2) ADR ------------------ */
   22643    if ((INSN0(15,0) == 0xF2AF || INSN0(15,0) == 0xF6AF)
   22644        && INSN1(15,15) == 0) {
   22645       /* rD = align4(PC) - imm32 */
   22646       UInt rD = INSN1(11,8);
   22647       if (!isBadRegT(rD)) {
   22648          UInt imm32 = (INSN0(10,10) << 11)
   22649                       | (INSN1(14,12) << 8) | INSN1(7,0);
   22650          putIRegT(rD, binop(Iop_Sub32,
   22651                             binop(Iop_And32, getIRegT(15), mkU32(~3U)),
   22652                             mkU32(imm32)),
   22653                       condT);
   22654          DIP("sub r%u, pc, #%u\n", rD, imm32);
   22655          goto decode_success;
   22656       }
   22657    }
   22658 
   22659    /* ------------------- (T1) BFI ------------------- */
   22660    /* ------------------- (T1) BFC ------------------- */
   22661    if (INSN0(15,4) == 0xF36 && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
   22662       UInt rD  = INSN1(11,8);
   22663       UInt rN  = INSN0(3,0);
   22664       UInt msb = INSN1(4,0);
   22665       UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
   22666       if (isBadRegT(rD) || rN == 13 || msb < lsb) {
   22667          /* undecodable; fall through */
   22668       } else {
   22669          IRTemp src    = newTemp(Ity_I32);
   22670          IRTemp olddst = newTemp(Ity_I32);
   22671          IRTemp newdst = newTemp(Ity_I32);
   22672          UInt   mask   = ((UInt)1) << (msb - lsb);
   22673          mask = (mask - 1) + mask;
   22674          vassert(mask != 0); // guaranteed by "msb < lsb" check above
   22675          mask <<= lsb;
   22676 
   22677          assign(src, rN == 15 ? mkU32(0) : getIRegT(rN));
   22678          assign(olddst, getIRegT(rD));
   22679          assign(newdst,
   22680                 binop(Iop_Or32,
   22681                    binop(Iop_And32,
   22682                          binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
   22683                          mkU32(mask)),
   22684                    binop(Iop_And32,
   22685                          mkexpr(olddst),
   22686                          mkU32(~mask)))
   22687                );
   22688 
   22689          putIRegT(rD, mkexpr(newdst), condT);
   22690 
   22691          if (rN == 15) {
   22692             DIP("bfc r%u, #%u, #%u\n",
   22693                 rD, lsb, msb-lsb+1);
   22694          } else {
   22695             DIP("bfi r%u, r%u, #%u, #%u\n",
   22696                 rD, rN, lsb, msb-lsb+1);
   22697          }
   22698          goto decode_success;
   22699       }
   22700    }
   22701 
   22702    /* ------------------- (T1) SXTAH ------------------- */
   22703    /* ------------------- (T1) UXTAH ------------------- */
   22704    if ((INSN0(15,4) == 0xFA1      // UXTAH
   22705         || INSN0(15,4) == 0xFA0)  // SXTAH
   22706        && INSN1(15,12) == BITS4(1,1,1,1)
   22707        && INSN1(7,6) == BITS2(1,0)) {
   22708       Bool isU = INSN0(15,4) == 0xFA1;
   22709       UInt rN  = INSN0(3,0);
   22710       UInt rD  = INSN1(11,8);
   22711       UInt rM  = INSN1(3,0);
   22712       UInt rot = INSN1(5,4);
   22713       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   22714          IRTemp srcL = newTemp(Ity_I32);
   22715          IRTemp srcR = newTemp(Ity_I32);
   22716          IRTemp res  = newTemp(Ity_I32);
   22717          assign(srcR, getIRegT(rM));
   22718          assign(srcL, getIRegT(rN));
   22719          assign(res,  binop(Iop_Add32,
   22720                             mkexpr(srcL),
   22721                             unop(isU ? Iop_16Uto32 : Iop_16Sto32,
   22722                                  unop(Iop_32to16,
   22723                                       genROR32(srcR, 8 * rot)))));
   22724          putIRegT(rD, mkexpr(res), condT);
   22725          DIP("%cxtah r%u, r%u, r%u, ror #%u\n",
   22726              isU ? 'u' : 's', rD, rN, rM, rot);
   22727          goto decode_success;
   22728       }
   22729    }
   22730 
   22731    /* ------------------- (T1) SXTAB ------------------- */
   22732    /* ------------------- (T1) UXTAB ------------------- */
   22733    if ((INSN0(15,4) == 0xFA5      // UXTAB
   22734         || INSN0(15,4) == 0xFA4)  // SXTAB
   22735        && INSN1(15,12) == BITS4(1,1,1,1)
   22736        && INSN1(7,6) == BITS2(1,0)) {
   22737       Bool isU = INSN0(15,4) == 0xFA5;
   22738       UInt rN  = INSN0(3,0);
   22739       UInt rD  = INSN1(11,8);
   22740       UInt rM  = INSN1(3,0);
   22741       UInt rot = INSN1(5,4);
   22742       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
   22743          IRTemp srcL = newTemp(Ity_I32);
   22744          IRTemp srcR = newTemp(Ity_I32);
   22745          IRTemp res  = newTemp(Ity_I32);
   22746          assign(srcR, getIRegT(rM));
   22747          assign(srcL, getIRegT(rN));
   22748          assign(res,  binop(Iop_Add32,
   22749                             mkexpr(srcL),
   22750                             unop(isU ? Iop_8Uto32 : Iop_8Sto32,
   22751                                  unop(Iop_32to8,
   22752                                       genROR32(srcR, 8 * rot)))));
   22753          putIRegT(rD, mkexpr(res), condT);
   22754          DIP("%cxtab r%u, r%u, r%u, ror #%u\n",
   22755              isU ? 'u' : 's', rD, rN, rM, rot);
   22756          goto decode_success;
   22757       }
   22758    }
   22759 
   22760    /* ------------------- (T1) CLZ ------------------- */
   22761    if (INSN0(15,4) == 0xFAB
   22762        && INSN1(15,12) == BITS4(1,1,1,1)
   22763        && INSN1(7,4) == BITS4(1,0,0,0)) {
   22764       UInt rM1 = INSN0(3,0);
   22765       UInt rD  = INSN1(11,8);
   22766       UInt rM2 = INSN1(3,0);
   22767       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
   22768          IRTemp arg = newTemp(Ity_I32);
   22769          IRTemp res = newTemp(Ity_I32);
   22770          assign(arg, getIRegT(rM1));
   22771          assign(res, IRExpr_ITE(
   22772                         binop(Iop_CmpEQ32, mkexpr(arg), mkU32(0)),
   22773                         mkU32(32),
   22774                         unop(Iop_Clz32, mkexpr(arg))
   22775          ));
   22776          putIRegT(rD, mkexpr(res), condT);
   22777          DIP("clz r%u, r%u\n", rD, rM1);
   22778          goto decode_success;
   22779       }
   22780    }
   22781 
   22782    /* ------------------- (T1) RBIT ------------------- */
   22783    if (INSN0(15,4) == 0xFA9
   22784        && INSN1(15,12) == BITS4(1,1,1,1)
   22785        && INSN1(7,4) == BITS4(1,0,1,0)) {
   22786       UInt rM1 = INSN0(3,0);
   22787       UInt rD  = INSN1(11,8);
   22788       UInt rM2 = INSN1(3,0);
   22789       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
   22790          IRTemp arg = newTemp(Ity_I32);
   22791          assign(arg, getIRegT(rM1));
   22792          IRTemp res = gen_BITREV(arg);
   22793          putIRegT(rD, mkexpr(res), condT);
   22794          DIP("rbit r%u, r%u\n", rD, rM1);
   22795          goto decode_success;
   22796       }
   22797    }
   22798 
   22799    /* ------------------- (T2) REV   ------------------- */
   22800    /* ------------------- (T2) REV16 ------------------- */
   22801    if (INSN0(15,4) == 0xFA9
   22802        && INSN1(15,12) == BITS4(1,1,1,1)
   22803        && (   INSN1(7,4) == BITS4(1,0,0,0)     // REV
   22804            || INSN1(7,4) == BITS4(1,0,0,1))) { // REV16
   22805       UInt rM1   = INSN0(3,0);
   22806       UInt rD    = INSN1(11,8);
   22807       UInt rM2   = INSN1(3,0);
   22808       Bool isREV = INSN1(7,4) == BITS4(1,0,0,0);
   22809       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
   22810          IRTemp arg = newTemp(Ity_I32);
   22811          assign(arg, getIRegT(rM1));
   22812          IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
   22813          putIRegT(rD, mkexpr(res), condT);
   22814          DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM1);
   22815          goto decode_success;
   22816       }
   22817    }
   22818 
   22819    /* ------------------- (T2) REVSH ------------------ */
   22820    if (INSN0(15,4) == 0xFA9
   22821        && INSN1(15,12) == BITS4(1,1,1,1)
   22822        && INSN1(7,4) == BITS4(1,0,1,1)) {
   22823       UInt rM1 = INSN0(3,0);
   22824       UInt rM2 = INSN1(3,0);
   22825       UInt rD  = INSN1(11,8);
   22826       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
   22827          IRTemp irt_rM  = newTemp(Ity_I32);
   22828          IRTemp irt_hi  = newTemp(Ity_I32);
   22829          IRTemp irt_low = newTemp(Ity_I32);
   22830          IRTemp irt_res = newTemp(Ity_I32);
   22831          assign(irt_rM, getIRegT(rM1));
   22832          assign(irt_hi,
   22833                 binop(Iop_Sar32,
   22834                       binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
   22835                       mkU8(16)
   22836                 )
   22837          );
   22838          assign(irt_low,
   22839                 binop(Iop_And32,
   22840                       binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
   22841                       mkU32(0xFF)
   22842                 )
   22843          );
   22844          assign(irt_res,
   22845                 binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
   22846          );
   22847          putIRegT(rD, mkexpr(irt_res), condT);
   22848          DIP("revsh r%u, r%u\n", rD, rM1);
   22849          goto decode_success;
   22850       }
   22851    }
   22852 
   22853    /* -------------- (T1) MSR apsr, reg -------------- */
   22854    if (INSN0(15,4) == 0xF38
   22855        && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(9,0) == 0x000) {
   22856       UInt rN          = INSN0(3,0);
   22857       UInt write_ge    = INSN1(10,10);
   22858       UInt write_nzcvq = INSN1(11,11);
   22859       if (!isBadRegT(rN) && (write_nzcvq || write_ge)) {
   22860          IRTemp rNt = newTemp(Ity_I32);
   22861          assign(rNt, getIRegT(rN));
   22862          desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
   22863          DIP("msr cpsr_%s%s, r%u\n",
   22864              write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
   22865          goto decode_success;
   22866       }
   22867    }
   22868 
   22869    /* -------------- (T1) MRS reg, apsr -------------- */
   22870    if (INSN0(15,0) == 0xF3EF
   22871        && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(7,0) == 0x00) {
   22872       UInt rD = INSN1(11,8);
   22873       if (!isBadRegT(rD)) {
   22874          IRTemp apsr = synthesise_APSR();
   22875          putIRegT( rD, mkexpr(apsr), condT );
   22876          DIP("mrs r%u, cpsr\n", rD);
   22877          goto decode_success;
   22878       }
   22879    }
   22880 
   22881    /* ----------------- (T1) LDREX ----------------- */
   22882    if (INSN0(15,4) == 0xE85 && INSN1(11,8) == BITS4(1,1,1,1)) {
   22883       UInt rN   = INSN0(3,0);
   22884       UInt rT   = INSN1(15,12);
   22885       UInt imm8 = INSN1(7,0);
   22886       if (!isBadRegT(rT) && rN != 15) {
   22887          IRTemp res;
   22888          // go uncond
   22889          mk_skip_over_T32_if_cond_is_false( condT );
   22890          // now uncond
   22891          res = newTemp(Ity_I32);
   22892          stmt( IRStmt_LLSC(Iend_LE,
   22893                            res,
   22894                            binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
   22895                            NULL/*this is a load*/ ));
   22896          putIRegT(rT, mkexpr(res), IRTemp_INVALID);
   22897          DIP("ldrex r%u, [r%u, #+%u]\n", rT, rN, imm8 * 4);
   22898          goto decode_success;
   22899       }
   22900    }
   22901 
   22902    /* --------------- (T1) LDREX{B,H} --------------- */
   22903    if (INSN0(15,4) == 0xE8D
   22904        && (INSN1(11,0) == 0xF4F || INSN1(11,0) == 0xF5F)) {
   22905       UInt rN  = INSN0(3,0);
   22906       UInt rT  = INSN1(15,12);
   22907       Bool isH = INSN1(11,0) == 0xF5F;
   22908       if (!isBadRegT(rT) && rN != 15) {
   22909          IRTemp res;
   22910          // go uncond
   22911          mk_skip_over_T32_if_cond_is_false( condT );
   22912          // now uncond
   22913          res = newTemp(isH ? Ity_I16 : Ity_I8);
   22914          stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
   22915                            NULL/*this is a load*/ ));
   22916          putIRegT(rT, unop(isH ? Iop_16Uto32 : Iop_8Uto32, mkexpr(res)),
   22917                       IRTemp_INVALID);
   22918          DIP("ldrex%c r%u, [r%u]\n", isH ? 'h' : 'b', rT, rN);
   22919          goto decode_success;
   22920       }
   22921    }
   22922 
   22923    /* --------------- (T1) LDREXD --------------- */
   22924    if (INSN0(15,4) == 0xE8D && INSN1(7,0) == 0x7F) {
   22925       UInt rN  = INSN0(3,0);
   22926       UInt rT  = INSN1(15,12);
   22927       UInt rT2 = INSN1(11,8);
   22928       if (!isBadRegT(rT) && !isBadRegT(rT2) && rT != rT2 && rN != 15) {
   22929          IRTemp res;
   22930          // go uncond
   22931          mk_skip_over_T32_if_cond_is_false( condT );
   22932          // now uncond
   22933          res = newTemp(Ity_I64);
   22934          // FIXME: assumes little-endian guest
   22935          stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
   22936                            NULL/*this is a load*/ ));
   22937          // FIXME: assumes little-endian guest
   22938          putIRegT(rT,  unop(Iop_64to32,   mkexpr(res)), IRTemp_INVALID);
   22939          putIRegT(rT2, unop(Iop_64HIto32, mkexpr(res)), IRTemp_INVALID);
   22940          DIP("ldrexd r%u, r%u, [r%u]\n", rT, rT2, rN);
   22941          goto decode_success;
   22942       }
   22943    }
   22944 
   22945    /* ----------------- (T1) STREX ----------------- */
   22946    if (INSN0(15,4) == 0xE84) {
   22947       UInt rN   = INSN0(3,0);
   22948       UInt rT   = INSN1(15,12);
   22949       UInt rD   = INSN1(11,8);
   22950       UInt imm8 = INSN1(7,0);
   22951       if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15
   22952           && rD != rN && rD != rT) {
   22953          IRTemp resSC1, resSC32;
   22954          // go uncond
   22955          mk_skip_over_T32_if_cond_is_false( condT );
   22956          // now uncond
   22957          /* Ok, now we're unconditional.  Do the store. */
   22958          resSC1 = newTemp(Ity_I1);
   22959          stmt( IRStmt_LLSC(Iend_LE,
   22960                            resSC1,
   22961                            binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
   22962                            getIRegT(rT)) );
   22963          /* Set rD to 1 on failure, 0 on success.  Currently we have
   22964             resSC1 == 0 on failure, 1 on success. */
   22965          resSC32 = newTemp(Ity_I32);
   22966          assign(resSC32,
   22967                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
   22968          putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
   22969          DIP("strex r%u, r%u, [r%u, #+%u]\n", rD, rT, rN, imm8 * 4);
   22970          goto decode_success;
   22971       }
   22972    }
   22973 
   22974    /* --------------- (T1) STREX{B,H} --------------- */
   22975    if (INSN0(15,4) == 0xE8C
   22976        && (INSN1(11,4) == 0xF4 || INSN1(11,4) == 0xF5)) {
   22977       UInt rN  = INSN0(3,0);
   22978       UInt rT  = INSN1(15,12);
   22979       UInt rD  = INSN1(3,0);
   22980       Bool isH = INSN1(11,4) == 0xF5;
   22981       if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15
   22982           && rD != rN && rD != rT) {
   22983          IRTemp resSC1, resSC32;
   22984          // go uncond
   22985          mk_skip_over_T32_if_cond_is_false( condT );
   22986          // now uncond
   22987          /* Ok, now we're unconditional.  Do the store. */
   22988          resSC1 = newTemp(Ity_I1);
   22989          stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN),
   22990                            unop(isH ? Iop_32to16 : Iop_32to8,
   22991                                 getIRegT(rT))) );
   22992          /* Set rD to 1 on failure, 0 on success.  Currently we have
   22993             resSC1 == 0 on failure, 1 on success. */
   22994          resSC32 = newTemp(Ity_I32);
   22995          assign(resSC32,
   22996                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
   22997          putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
   22998          DIP("strex%c r%u, r%u, [r%u]\n", isH ? 'h' : 'b', rD, rT, rN);
   22999          goto decode_success;
   23000       }
   23001    }
   23002 
   23003    /* ---------------- (T1) STREXD ---------------- */
   23004    if (INSN0(15,4) == 0xE8C && INSN1(7,4) == BITS4(0,1,1,1)) {
   23005       UInt rN  = INSN0(3,0);
   23006       UInt rT  = INSN1(15,12);
   23007       UInt rT2 = INSN1(11,8);
   23008       UInt rD  = INSN1(3,0);
   23009       if (!isBadRegT(rD) && !isBadRegT(rT) && !isBadRegT(rT2)
   23010           && rN != 15 && rD != rN && rD != rT && rD != rT2) {
   23011          IRTemp resSC1, resSC32, data;
   23012          // go uncond
   23013          mk_skip_over_T32_if_cond_is_false( condT );
   23014          // now uncond
   23015          /* Ok, now we're unconditional.  Do the store. */
   23016          resSC1 = newTemp(Ity_I1);
   23017          data = newTemp(Ity_I64);
   23018          // FIXME: assumes little-endian guest
   23019          assign(data, binop(Iop_32HLto64, getIRegT(rT2), getIRegT(rT)));
   23020          // FIXME: assumes little-endian guest
   23021          stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN), mkexpr(data)));
   23022          /* Set rD to 1 on failure, 0 on success.  Currently we have
   23023             resSC1 == 0 on failure, 1 on success. */
   23024          resSC32 = newTemp(Ity_I32);
   23025          assign(resSC32,
   23026                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
   23027          putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
   23028          DIP("strexd r%u, r%u, r%u, [r%u]\n", rD, rT, rT2, rN);
   23029          goto decode_success;
   23030       }
   23031    }
   23032 
   23033    /* -------------- v7 barrier insns -------------- */
   23034    if (INSN0(15,0) == 0xF3BF && (INSN1(15,0) & 0xFF00) == 0x8F00) {
   23035       /* FIXME: should this be unconditional? */
   23036       /* XXX this isn't really right, is it?  The generated IR does
   23037          them unconditionally.  I guess it doesn't matter since it
   23038          doesn't do any harm to do them even when the guarding
   23039          condition is false -- it's just a performance loss. */
   23040       switch (INSN1(7,0)) {
   23041          case 0x4F: /* DSB sy */
   23042          case 0x4E: /* DSB st */
   23043          case 0x4B: /* DSB ish */
   23044          case 0x4A: /* DSB ishst */
   23045          case 0x47: /* DSB nsh */
   23046          case 0x46: /* DSB nshst */
   23047          case 0x43: /* DSB osh */
   23048          case 0x42: /* DSB oshst */
   23049             stmt( IRStmt_MBE(Imbe_Fence) );
   23050             DIP("DSB\n");
   23051             goto decode_success;
   23052          case 0x5F: /* DMB sy */
   23053          case 0x5E: /* DMB st */
   23054          case 0x5B: /* DMB ish */
   23055          case 0x5A: /* DMB ishst */
   23056          case 0x57: /* DMB nsh */
   23057          case 0x56: /* DMB nshst */
   23058          case 0x53: /* DMB osh */
   23059          case 0x52: /* DMB oshst */
   23060             stmt( IRStmt_MBE(Imbe_Fence) );
   23061             DIP("DMB\n");
   23062             goto decode_success;
   23063          case 0x6F: /* ISB */
   23064             stmt( IRStmt_MBE(Imbe_Fence) );
   23065             DIP("ISB\n");
   23066             goto decode_success;
   23067          default:
   23068             break;
   23069       }
   23070    }
   23071 
   23072    /* ---------------------- PLD{,W} ---------------------- */
   23073    if ((INSN0(15,4) & 0xFFD) == 0xF89 && INSN1(15,12) == 0xF) {
   23074       /* FIXME: should this be unconditional? */
   23075       /* PLD/PLDW immediate, encoding T1 */
   23076       UInt rN    = INSN0(3,0);
   23077       UInt bW    = INSN0(5,5);
   23078       UInt imm12 = INSN1(11,0);
   23079       DIP("pld%s [r%u, #%u]\n", bW ? "w" : "",  rN, imm12);
   23080       goto decode_success;
   23081    }
   23082 
   23083    if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,8) == 0xFC) {
   23084       /* FIXME: should this be unconditional? */
   23085       /* PLD/PLDW immediate, encoding T2 */
   23086       UInt rN    = INSN0(3,0);
   23087       UInt bW    = INSN0(5,5);
   23088       UInt imm8  = INSN1(7,0);
   23089       DIP("pld%s [r%u, #-%u]\n", bW ? "w" : "",  rN, imm8);
   23090       goto decode_success;
   23091    }
   23092 
   23093    if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,6) == 0x3C0) {
   23094       /* FIXME: should this be unconditional? */
   23095       /* PLD/PLDW register, encoding T1 */
   23096       UInt rN   = INSN0(3,0);
   23097       UInt rM   = INSN1(3,0);
   23098       UInt bW   = INSN0(5,5);
   23099       UInt imm2 = INSN1(5,4);
   23100       if (!isBadRegT(rM)) {
   23101          DIP("pld%s [r%u, r%u, lsl %u]\n", bW ? "w" : "", rN, rM, imm2);
   23102          goto decode_success;
   23103       }
   23104       /* fall through */
   23105    }
   23106 
   23107    /* -------------- read CP15 TPIDRURO register ------------- */
   23108    /* mrc     p15, 0,  r0, c13, c0, 3  up to
   23109       mrc     p15, 0, r14, c13, c0, 3
   23110    */
   23111    /* I don't know whether this is really v7-only.  But anyway, we
   23112       have to support it since arm-linux uses TPIDRURO as a thread
   23113       state register. */
   23114    if ((INSN0(15,0) == 0xEE1D) && (INSN1(11,0) == 0x0F70)) {
   23115       UInt rD = INSN1(15,12);
   23116       if (!isBadRegT(rD)) {
   23117          putIRegT(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32), condT);
   23118          DIP("mrc p15,0, r%u, c13, c0, 3\n", rD);
   23119          goto decode_success;
   23120       }
   23121       /* fall through */
   23122    }
   23123 
   23124    /* -------------- read CP15 PMUSRENR register ------------- */
   23125    /* mrc     p15, 0, r0,  c9, c14, 0  up to
   23126       mrc     p15, 0, r14, c9, c14, 0
   23127       See comment on the ARM equivalent of this (above) for details.
   23128    */
   23129    if ((INSN0(15,0) == 0xEE19) && (INSN1(11,0) == 0x0F1E)) {
   23130       UInt rD = INSN1(15,12);
   23131       if (!isBadRegT(rD)) {
   23132          putIRegT(rD, mkU32(0), condT);
   23133          DIP("mrc p15,0, r%u, c9, c14, 0\n", rD);
   23134          goto decode_success;
   23135       }
   23136       /* fall through */
   23137    }
   23138 
   23139    /* ------------------- CLREX ------------------ */
   23140    if (INSN0(15,0) == 0xF3BF && INSN1(15,0) == 0x8F2F) {
   23141       /* AFAICS, this simply cancels a (all?) reservations made by a
   23142          (any?) preceding LDREX(es).  Arrange to hand it through to
   23143          the back end. */
   23144       mk_skip_over_T32_if_cond_is_false( condT );
   23145       stmt( IRStmt_MBE(Imbe_CancelReservation) );
   23146       DIP("clrex\n");
   23147       goto decode_success;
   23148    }
   23149 
   23150    /* ------------------- NOP ------------------ */
   23151    if (INSN0(15,0) == 0xF3AF && INSN1(15,0) == 0x8000) {
   23152       DIP("nop\n");
   23153       goto decode_success;
   23154    }
   23155 
   23156    /* -------------- (T1) LDRT reg+#imm8 -------------- */
   23157    /* Load Register Unprivileged:
   23158       ldrt Rt, [Rn, #imm8]
   23159    */
   23160    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,1) && INSN0(5,4) == BITS2(0,1)
   23161        && INSN1(11,8) == BITS4(1,1,1,0)) {
   23162       UInt rT    = INSN1(15,12);
   23163       UInt rN    = INSN0(3,0);
   23164       UInt imm8  = INSN1(7,0);
   23165       Bool valid = True;
   23166       if (rN == 15 || isBadRegT(rT)) valid = False;
   23167       if (valid) {
   23168          put_ITSTATE(old_itstate);
   23169          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   23170          IRTemp newRt = newTemp(Ity_I32);
   23171          loadGuardedLE( newRt, ILGop_Ident32, ea, llGetIReg(rT), condT );
   23172          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   23173          put_ITSTATE(new_itstate);
   23174          DIP("ldrt r%u, [r%u, #%u]\n", rT, rN, imm8);
   23175          goto decode_success;
   23176       }
   23177    }
   23178 
   23179    /* -------------- (T1) STRT reg+#imm8 -------------- */
   23180    /* Store Register Unprivileged:
   23181       strt Rt, [Rn, #imm8]
   23182    */
   23183    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,1) && INSN0(5,4) == BITS2(0,0)
   23184        && INSN1(11,8) == BITS4(1,1,1,0)) {
   23185       UInt rT    = INSN1(15,12);
   23186       UInt rN    = INSN0(3,0);
   23187       UInt imm8  = INSN1(7,0);
   23188       Bool valid = True;
   23189       if (rN == 15 || isBadRegT(rT)) valid = False;
   23190       if (valid) {
   23191          put_ITSTATE(old_itstate);
   23192          IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   23193          storeGuardedLE( address, llGetIReg(rT), condT );
   23194          put_ITSTATE(new_itstate);
   23195          DIP("strt r%u, [r%u, #%u]\n", rT, rN, imm8);
   23196          goto decode_success;
   23197       }
   23198    }
   23199 
   23200    /* -------------- (T1) STRBT reg+#imm8 -------------- */
   23201    /* Store Register Byte Unprivileged:
   23202       strbt Rt, [Rn, #imm8]
   23203    */
   23204    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(0,0)
   23205        && INSN1(11,8) == BITS4(1,1,1,0)) {
   23206       UInt rT    = INSN1(15,12);
   23207       UInt rN    = INSN0(3,0);
   23208       UInt imm8  = INSN1(7,0);
   23209       Bool valid = True;
   23210       if (rN == 15 || isBadRegT(rT)) valid = False;
   23211       if (valid) {
   23212          put_ITSTATE(old_itstate);
   23213          IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   23214          IRExpr* data = unop(Iop_32to8, llGetIReg(rT));
   23215          storeGuardedLE( address, data, condT );
   23216          put_ITSTATE(new_itstate);
   23217          DIP("strbt r%u, [r%u, #%u]\n", rT, rN, imm8);
   23218          goto decode_success;
   23219       }
   23220    }
   23221 
   23222    /* -------------- (T1) LDRHT reg+#imm8 -------------- */
   23223    /* Load Register Halfword Unprivileged:
   23224       ldrht Rt, [Rn, #imm8]
   23225    */
   23226    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(1,1)
   23227        && INSN1(11,8) == BITS4(1,1,1,0)) {
   23228       UInt rN    = INSN0(3,0);
   23229       Bool valid = True;
   23230       if (rN == 15) {
   23231          /* In this case our instruction is LDRH (literal), in fact:
   23232             LDRH (literal) was realized earlier, so we don't want to
   23233             make it twice. */
   23234          valid = False;
   23235       }
   23236       UInt rT    = INSN1(15,12);
   23237       UInt imm8  = INSN1(7,0);
   23238       if (isBadRegT(rT)) valid = False;
   23239       if (valid) {
   23240          put_ITSTATE(old_itstate);
   23241          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   23242          IRTemp newRt = newTemp(Ity_I32);
   23243          loadGuardedLE( newRt, ILGop_16Uto32, ea, llGetIReg(rT), condT );
   23244          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   23245          put_ITSTATE(new_itstate);
   23246          DIP("ldrht r%u, [r%u, #%u]\n", rT, rN, imm8);
   23247          goto decode_success;
   23248       }
   23249    }
   23250 
   23251    /* -------------- (T1) LDRSHT reg+#imm8 -------------- */
   23252    /* Load Register Signed Halfword Unprivileged:
   23253       ldrsht Rt, [Rn, #imm8]
   23254    */
   23255    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(1,1)
   23256        && INSN1(11,8) == BITS4(1,1,1,0)) {
   23257       UInt rN    = INSN0(3,0);
   23258       Bool valid = True;
   23259       if (rN == 15) {
   23260          /* In this case our instruction is LDRSH (literal), in fact:
   23261             LDRSH (literal) was realized earlier, so we don't want to
   23262             make it twice. */
   23263          valid = False;
   23264       }
   23265       UInt rT    = INSN1(15,12);
   23266       UInt imm8  = INSN1(7,0);
   23267       if (isBadRegT(rT)) valid = False;
   23268       if (valid) {
   23269          put_ITSTATE(old_itstate);
   23270          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   23271          IRTemp newRt = newTemp(Ity_I32);
   23272          loadGuardedLE( newRt, ILGop_16Sto32, ea, llGetIReg(rT), condT );
   23273          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   23274          put_ITSTATE(new_itstate);
   23275          DIP("ldrsht r%u, [r%u, #%u]\n", rT, rN, imm8);
   23276          goto decode_success;
   23277       }
   23278    }
   23279 
   23280    /* -------------- (T1) STRHT reg+#imm8 -------------- */
   23281    /* Store Register Halfword Unprivileged:
   23282       strht Rt, [Rn, #imm8]
   23283    */
   23284    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(1,0)
   23285        && INSN1(11,8) == BITS4(1,1,1,0)) {
   23286       UInt rT    = INSN1(15,12);
   23287       UInt rN    = INSN0(3,0);
   23288       UInt imm8  = INSN1(7,0);
   23289       Bool valid = True;
   23290       if (rN == 15 || isBadRegT(rT)) valid = False;
   23291       if (valid) {
   23292          put_ITSTATE(old_itstate);
   23293          IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   23294          IRExpr* data = unop(Iop_32to16, llGetIReg(rT));
   23295          storeGuardedLE( address, data, condT );
   23296          put_ITSTATE(new_itstate);
   23297          DIP("strht r%u, [r%u, #%u]\n", rT, rN, imm8);
   23298          goto decode_success;
   23299       }
   23300    }
   23301 
   23302    /* -------------- (T1) LDRBT reg+#imm8 -------------- */
   23303    /* Load Register Byte Unprivileged:
   23304       ldrbt Rt, [Rn, #imm8]
   23305    */
   23306    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(0,1)
   23307        && INSN1(11,8) == BITS4(1,1,1,0)) {
   23308       UInt rN    = INSN0(3,0);
   23309       UInt rT    = INSN1(15,12);
   23310       UInt imm8  = INSN1(7,0);
   23311       Bool valid = True;
   23312       if (rN == 15 /* insn is LDRB (literal) */) valid = False;
   23313       if (isBadRegT(rT)) valid = False;
   23314       if (valid) {
   23315          put_ITSTATE(old_itstate);
   23316          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   23317          IRTemp newRt = newTemp(Ity_I32);
   23318          loadGuardedLE( newRt, ILGop_8Uto32, ea, llGetIReg(rT), condT );
   23319          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   23320          put_ITSTATE(new_itstate);
   23321          DIP("ldrbt r%u, [r%u, #%u]\n", rT, rN, imm8);
   23322          goto decode_success;
   23323       }
   23324    }
   23325 
   23326    /* -------------- (T1) LDRSBT reg+#imm8 -------------- */
   23327    /* Load Register Signed Byte Unprivileged:
   23328       ldrsbt Rt, [Rn, #imm8]
   23329    */
   23330    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(0,1)
   23331        && INSN1(11,8) == BITS4(1,1,1,0)) {
   23332       UInt rN    = INSN0(3,0);
   23333       Bool valid = True;
   23334       UInt rT    = INSN1(15,12);
   23335       UInt imm8  = INSN1(7,0);
   23336       if (rN == 15 /* insn is LDRSB (literal) */) valid = False;
   23337       if (isBadRegT(rT)) valid = False;
   23338       if (valid) {
   23339          put_ITSTATE(old_itstate);
   23340          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
   23341          IRTemp newRt = newTemp(Ity_I32);
   23342          loadGuardedLE( newRt, ILGop_8Sto32, ea, llGetIReg(rT), condT );
   23343          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
   23344          put_ITSTATE(new_itstate);
   23345          DIP("ldrsbt r%u, [r%u, #%u]\n", rT, rN, imm8);
   23346          goto decode_success;
   23347       }
   23348    }
   23349 
   23350    /* -------------- (T1) PLI reg+#imm12 -------------- */
   23351    /* Preload Instruction:
   23352       pli [Rn, #imm12]
   23353    */
   23354    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,1,0) && INSN0(5,4) == BITS2(0,1)
   23355        && INSN1(15,12) == BITS4(1,1,1,1)) {
   23356       UInt rN    = INSN0(3,0);
   23357       UInt imm12 = INSN1(11,0);
   23358       if (rN != 15) {
   23359          DIP("pli [r%u, #%u]\n", rN, imm12);
   23360          goto decode_success;
   23361       }
   23362    }
   23363 
   23364    /* -------------- (T2) PLI reg-#imm8 -------------- */
   23365    /* Preload Instruction:
   23366       pli [Rn, #-imm8]
   23367    */
   23368    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(0,1)
   23369        && INSN1(15,8) == BITS8(1,1,1,1,1,1,0,0)) {
   23370       UInt rN   = INSN0(3,0);
   23371       UInt imm8 = INSN1(7,0);
   23372       if (rN != 15) {
   23373          DIP("pli [r%u, #-%u]\n", rN, imm8);
   23374          goto decode_success;
   23375       }
   23376    }
   23377 
   23378    /* -------------- (T3) PLI PC+/-#imm12 -------------- */
   23379    /* Preload Instruction:
   23380       pli [PC, #+/-imm12]
   23381    */
   23382    if (INSN0(15,8) == BITS8(1,1,1,1,1,0,0,1)
   23383        && INSN0(6,0) == BITS7(0,0,1,1,1,1,1)
   23384        && INSN1(15,12) == BITS4(1,1,1,1)) {
   23385       UInt imm12 = INSN1(11,0);
   23386       UInt bU    = INSN0(7,7);
   23387       DIP("pli [pc, #%c%u]\n", bU == 1 ? '+' : '-', imm12);
   23388       goto decode_success;
   23389    }
   23390 
   23391    /* ----------------------------------------------------------- */
   23392    /* -- VFP (CP 10, CP 11) instructions (in Thumb mode)       -- */
   23393    /* ----------------------------------------------------------- */
   23394 
   23395    if (INSN0(15,12) == BITS4(1,1,1,0)) {
   23396       UInt insn28 = (INSN0(11,0) << 16) | INSN1(15,0);
   23397       Bool ok_vfp = decode_CP10_CP11_instruction (
   23398                        &dres, insn28, condT, ARMCondAL/*bogus*/,
   23399                        True/*isT*/
   23400                     );
   23401       if (ok_vfp)
   23402          goto decode_success;
   23403    }
   23404 
   23405    /* ----------------------------------------------------------- */
   23406    /* -- NEON instructions (only v7 and below, in Thumb mode)  -- */
   23407    /* ----------------------------------------------------------- */
   23408 
   23409    if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
   23410       UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
   23411       Bool ok_neon = decode_NEON_instruction_ARMv7_and_below(
   23412                         &dres, insn32, condT, True/*isT*/
   23413                      );
   23414       if (ok_neon)
   23415          goto decode_success;
   23416    }
   23417 
   23418    /* ----------------------------------------------------------- */
   23419    /* -- v6 media instructions (in Thumb mode)                 -- */
   23420    /* ----------------------------------------------------------- */
   23421 
   23422    { UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
   23423      Bool ok_v6m = decode_V6MEDIA_instruction(
   23424                       &dres, insn32, condT, ARMCondAL/*bogus*/,
   23425                       True/*isT*/
   23426                    );
   23427      if (ok_v6m)
   23428         goto decode_success;
   23429    }
   23430 
   23431    /* ----------------------------------------------------------- */
   23432    /* -- v8 instructions (in Thumb mode)                       -- */
   23433    /* ----------------------------------------------------------- */
   23434 
   23435    /* If we get here, it means that all attempts to decode the
   23436       instruction as ARMv7 or earlier have failed.  So, if we're doing
   23437       ARMv8 or later, here is the point to try for it. */
   23438 
   23439    if (VEX_ARM_ARCHLEVEL(archinfo->hwcaps) >= 8) {
   23440       UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
   23441       Bool ok_v8
   23442          = decode_V8_instruction( &dres, insn32, condT, True/*isT*/,
   23443                                   old_itstate, new_itstate );
   23444       if (ok_v8)
   23445          goto decode_success;
   23446    }
   23447 
   23448    /* ----------------------------------------------------------- */
   23449    /* -- Undecodable                                           -- */
   23450    /* ----------------------------------------------------------- */
   23451 
   23452    goto decode_failure;
   23453    /*NOTREACHED*/
   23454 
   23455   decode_failure:
   23456    /* All decode failures end up here. */
   23457    if (sigill_diag)
   23458       vex_printf("disInstr(thumb): unhandled instruction: "
   23459                  "0x%04x 0x%04x\n", (UInt)insn0, (UInt)insn1);
   23460 
   23461    /* Back up ITSTATE to the initial value for this instruction.
   23462       If we don't do that, any subsequent restart of the instruction
   23463       will restart with the wrong value. */
   23464    if (old_itstate != IRTemp_INVALID)
   23465       put_ITSTATE(old_itstate);
   23466 
   23467    /* Tell the dispatcher that this insn cannot be decoded, and so has
   23468       not been executed, and (is currently) the next to be executed.
   23469       R15 should be up-to-date since it made so at the start of each
   23470       insn, but nevertheless be paranoid and update it again right
   23471       now. */
   23472    vassert(0 == (guest_R15_curr_instr_notENC & 1));
   23473    llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC | 1) );
   23474    dres.len         = 0;
   23475    dres.whatNext    = Dis_StopHere;
   23476    dres.jk_StopHere = Ijk_NoDecode;
   23477    dres.continueAt  = 0;
   23478    return dres;
   23479 
   23480   decode_success:
   23481    /* All decode successes end up here. */
   23482    vassert(dres.len == 4 || dres.len == 2 || dres.len == 20);
   23483    switch (dres.whatNext) {
   23484       case Dis_Continue:
   23485          llPutIReg(15, mkU32(dres.len + (guest_R15_curr_instr_notENC | 1)));
   23486          break;
   23487       case Dis_ResteerU:
   23488       case Dis_ResteerC:
   23489          llPutIReg(15, mkU32(dres.continueAt));
   23490          break;
   23491       case Dis_StopHere:
   23492          break;
   23493       default:
   23494          vassert(0);
   23495    }
   23496 
   23497    DIP("\n");
   23498 
   23499    return dres;
   23500 
   23501 #  undef INSN0
   23502 #  undef INSN1
   23503 }
   23504 
   23505 #undef DIP
   23506 #undef DIS
   23507 
   23508 
   23509 /* Helper table for figuring out how many insns an IT insn
   23510    conditionalises.
   23511 
   23512    An ITxyz instruction of the format "1011 1111 firstcond mask"
   23513    conditionalises some number of instructions, as indicated by the
   23514    following table.  A value of zero indicates the instruction is
   23515    invalid in some way.
   23516 
   23517    mask = 0 means this isn't an IT instruction
   23518    fc = 15 (NV) means unpredictable
   23519 
   23520    The line fc = 14 (AL) is different from the others; there are
   23521    additional constraints in this case.
   23522 
   23523           mask(0 ..                   15)
   23524         +--------------------------------
   23525    fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23526    ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23527         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23528         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23529         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23530         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23531         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23532         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23533         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23534         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23535         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23536         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23537         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23538         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23539         | 0 4 3 0 2 0 0 0 1 0 0 0 0 0 0 0
   23540    15)  | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
   23541 
   23542    To be conservative with the analysis, let's rule out the mask = 0
   23543    case, since that isn't an IT insn at all.  But for all the other
   23544    cases where the table contains zero, that means unpredictable, so
   23545    let's say 4 to be conservative.  Hence we have a safe value for any
   23546    IT (mask,fc) pair that the CPU would actually identify as an IT
   23547    instruction.  The final table is
   23548 
   23549           mask(0 ..                   15)
   23550         +--------------------------------
   23551    fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23552    ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23553         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23554         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23555         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23556         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23557         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23558         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23559         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23560         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23561         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23562         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23563         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23564         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
   23565         | 0 4 3 4 2 4 4 4 1 4 4 4 4 4 4 4
   23566    15)  | 0 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
   23567 */
   23568 static const UChar it_length_table[256]
   23569    = { 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   23570        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   23571        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   23572        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   23573        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   23574        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   23575        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   23576        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   23577        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   23578        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   23579        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   23580        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   23581        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   23582        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
   23583        0, 4, 3, 4, 2, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4,
   23584        0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
   23585      };
   23586 
   23587 
   23588 /*------------------------------------------------------------*/
   23589 /*--- Top-level fn                                         ---*/
   23590 /*------------------------------------------------------------*/
   23591 
   23592 /* Disassemble a single instruction into IR.  The instruction
   23593    is located in host memory at &guest_code[delta]. */
   23594 
   23595 DisResult disInstr_ARM ( IRSB*        irsb_IN,
   23596                          Bool         (*resteerOkFn) ( void*, Addr ),
   23597                          Bool         resteerCisOk,
   23598                          void*        callback_opaque,
   23599                          const UChar* guest_code_IN,
   23600                          Long         delta_ENCODED,
   23601                          Addr         guest_IP_ENCODED,
   23602                          VexArch      guest_arch,
   23603                          const VexArchInfo* archinfo,
   23604                          const VexAbiInfo*  abiinfo,
   23605                          VexEndness   host_endness_IN,
   23606                          Bool         sigill_diag_IN )
   23607 {
   23608    DisResult dres;
   23609    Bool isThumb = (Bool)(guest_IP_ENCODED & 1);
   23610 
   23611    /* Set globals (see top of this file) */
   23612    vassert(guest_arch == VexArchARM);
   23613 
   23614    irsb            = irsb_IN;
   23615    host_endness    = host_endness_IN;
   23616    __curr_is_Thumb = isThumb;
   23617 
   23618    if (isThumb) {
   23619       guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED - 1;
   23620    } else {
   23621       guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED;
   23622    }
   23623 
   23624    if (isThumb) {
   23625       dres = disInstr_THUMB_WRK ( resteerOkFn,
   23626                                   resteerCisOk, callback_opaque,
   23627                                   &guest_code_IN[delta_ENCODED - 1],
   23628                                   archinfo, abiinfo, sigill_diag_IN );
   23629    } else {
   23630       dres = disInstr_ARM_WRK ( resteerOkFn,
   23631                                 resteerCisOk, callback_opaque,
   23632                                 &guest_code_IN[delta_ENCODED],
   23633                                 archinfo, abiinfo, sigill_diag_IN );
   23634    }
   23635 
   23636    return dres;
   23637 }
   23638 
   23639 /* Test program for the conversion of IRCmpF64Result values to VFP
   23640    nzcv values.  See handling of FCMPD et al above. */
   23641 /*
   23642 UInt foo ( UInt x )
   23643 {
   23644    UInt ix    = ((x >> 5) & 3) | (x & 1);
   23645    UInt termL = (((((ix ^ 1) << 30) - 1) >> 29) + 1);
   23646    UInt termR = (ix & (ix >> 1) & 1);
   23647    return termL  -  termR;
   23648 }
   23649 
   23650 void try ( char* s, UInt ir, UInt req )
   23651 {
   23652    UInt act = foo(ir);
   23653    printf("%s 0x%02x -> req %d%d%d%d act %d%d%d%d (0x%x)\n",
   23654           s, ir, (req >> 3) & 1, (req >> 2) & 1,
   23655                  (req >> 1) & 1, (req >> 0) & 1,
   23656                  (act >> 3) & 1, (act >> 2) & 1,
   23657                  (act >> 1) & 1, (act >> 0) & 1, act);
   23658 
   23659 }
   23660 
   23661 int main ( void )
   23662 {
   23663    printf("\n");
   23664    try("UN", 0x45, 0b0011);
   23665    try("LT", 0x01, 0b1000);
   23666    try("GT", 0x00, 0b0010);
   23667    try("EQ", 0x40, 0b0110);
   23668    printf("\n");
   23669    return 0;
   23670 }
   23671 */
   23672 
   23673 /* Spare code for doing reference implementations of various 64-bit
   23674    SIMD interleaves/deinterleaves/concatenation ops. */
   23675 /*
   23676 // Split a 64 bit value into 4 16 bit ones, in 32-bit IRTemps with
   23677 // the top halves guaranteed to be zero.
   23678 static void break64to16s ( IRTemp* out3, IRTemp* out2, IRTemp* out1,
   23679                            IRTemp* out0, IRTemp v64 )
   23680 {
   23681   if (out3) *out3 = newTemp(Ity_I32);
   23682   if (out2) *out2 = newTemp(Ity_I32);
   23683   if (out1) *out1 = newTemp(Ity_I32);
   23684   if (out0) *out0 = newTemp(Ity_I32);
   23685   IRTemp hi32 = newTemp(Ity_I32);
   23686   IRTemp lo32 = newTemp(Ity_I32);
   23687   assign(hi32, unop(Iop_64HIto32, mkexpr(v64)) );
   23688   assign(lo32, unop(Iop_64to32, mkexpr(v64)) );
   23689   if (out3) assign(*out3, binop(Iop_Shr32, mkexpr(hi32), mkU8(16)));
   23690   if (out2) assign(*out2, binop(Iop_And32, mkexpr(hi32), mkU32(0xFFFF)));
   23691   if (out1) assign(*out1, binop(Iop_Shr32, mkexpr(lo32), mkU8(16)));
   23692   if (out0) assign(*out0, binop(Iop_And32, mkexpr(lo32), mkU32(0xFFFF)));
   23693 }
   23694 
   23695 // Make a 64 bit value from 4 16 bit ones, each of which is in a 32 bit
   23696 // IRTemp.
   23697 static IRTemp mk64from16s ( IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
   23698 {
   23699   IRTemp hi32 = newTemp(Ity_I32);
   23700   IRTemp lo32 = newTemp(Ity_I32);
   23701   assign(hi32,
   23702          binop(Iop_Or32,
   23703                binop(Iop_Shl32, mkexpr(in3), mkU8(16)),
   23704                binop(Iop_And32, mkexpr(in2), mkU32(0xFFFF))));
   23705   assign(lo32,
   23706          binop(Iop_Or32,
   23707                binop(Iop_Shl32, mkexpr(in1), mkU8(16)),
   23708                binop(Iop_And32, mkexpr(in0), mkU32(0xFFFF))));
   23709   IRTemp res = newTemp(Ity_I64);
   23710   assign(res, binop(Iop_32HLto64, mkexpr(hi32), mkexpr(lo32)));
   23711   return res;
   23712 }
   23713 
   23714 static IRExpr* mk_InterleaveLO16x4 ( IRTemp a3210, IRTemp b3210 )
   23715 {
   23716   // returns a1 b1 a0 b0
   23717   IRTemp a1, a0, b1, b0;
   23718   break64to16s(NULL, NULL, &a1, &a0, a3210);
   23719   break64to16s(NULL, NULL, &b1, &b0, b3210);
   23720   return mkexpr(mk64from16s(a1, b1, a0, b0));
   23721 }
   23722 
   23723 static IRExpr* mk_InterleaveHI16x4 ( IRTemp a3210, IRTemp b3210 )
   23724 {
   23725   // returns a3 b3 a2 b2
   23726   IRTemp a3, a2, b3, b2;
   23727   break64to16s(&a3, &a2, NULL, NULL, a3210);
   23728   break64to16s(&b3, &b2, NULL, NULL, b3210);
   23729   return mkexpr(mk64from16s(a3, b3, a2, b2));
   23730 }
   23731 
   23732 static IRExpr* mk_CatEvenLanes16x4 ( IRTemp a3210, IRTemp b3210 )
   23733 {
   23734   // returns a2 a0 b2 b0
   23735   IRTemp a2, a0, b2, b0;
   23736   break64to16s(NULL, &a2, NULL, &a0, a3210);
   23737   break64to16s(NULL, &b2, NULL, &b0, b3210);
   23738   return mkexpr(mk64from16s(a2, a0, b2, b0));
   23739 }
   23740 
   23741 static IRExpr* mk_CatOddLanes16x4 ( IRTemp a3210, IRTemp b3210 )
   23742 {
   23743   // returns a3 a1 b3 b1
   23744   IRTemp a3, a1, b3, b1;
   23745   break64to16s(&a3, NULL, &a1, NULL, a3210);
   23746   break64to16s(&b3, NULL, &b1, NULL, b3210);
   23747   return mkexpr(mk64from16s(a3, a1, b3, b1));
   23748 }
   23749 
   23750 static IRExpr* mk_InterleaveOddLanes16x4 ( IRTemp a3210, IRTemp b3210 )
   23751 {
   23752   // returns a3 b3 a1 b1
   23753   IRTemp a3, b3, a1, b1;
   23754   break64to16s(&a3, NULL, &a1, NULL, a3210);
   23755   break64to16s(&b3, NULL, &b1, NULL, b3210);
   23756   return mkexpr(mk64from16s(a3, b3, a1, b1));
   23757 }
   23758 
   23759 static IRExpr* mk_InterleaveEvenLanes16x4 ( IRTemp a3210, IRTemp b3210 )
   23760 {
   23761   // returns a2 b2 a0 b0
   23762   IRTemp a2, b2, a0, b0;
   23763   break64to16s(NULL, &a2, NULL, &a0, a3210);
   23764   break64to16s(NULL, &b2, NULL, &b0, b3210);
   23765   return mkexpr(mk64from16s(a2, b2, a0, b0));
   23766 }
   23767 
   23768 static void break64to8s ( IRTemp* out7, IRTemp* out6, IRTemp* out5,
   23769                           IRTemp* out4, IRTemp* out3, IRTemp* out2,
   23770                           IRTemp* out1,IRTemp* out0, IRTemp v64 )
   23771 {
   23772   if (out7) *out7 = newTemp(Ity_I32);
   23773   if (out6) *out6 = newTemp(Ity_I32);
   23774   if (out5) *out5 = newTemp(Ity_I32);
   23775   if (out4) *out4 = newTemp(Ity_I32);
   23776   if (out3) *out3 = newTemp(Ity_I32);
   23777   if (out2) *out2 = newTemp(Ity_I32);
   23778   if (out1) *out1 = newTemp(Ity_I32);
   23779   if (out0) *out0 = newTemp(Ity_I32);
   23780   IRTemp hi32 = newTemp(Ity_I32);
   23781   IRTemp lo32 = newTemp(Ity_I32);
   23782   assign(hi32, unop(Iop_64HIto32, mkexpr(v64)) );
   23783   assign(lo32, unop(Iop_64to32, mkexpr(v64)) );
   23784   if (out7)
   23785     assign(*out7, binop(Iop_And32,
   23786                         binop(Iop_Shr32, mkexpr(hi32), mkU8(24)),
   23787                         mkU32(0xFF)));
   23788   if (out6)
   23789     assign(*out6, binop(Iop_And32,
   23790                         binop(Iop_Shr32, mkexpr(hi32), mkU8(16)),
   23791                         mkU32(0xFF)));
   23792   if (out5)
   23793     assign(*out5, binop(Iop_And32,
   23794                         binop(Iop_Shr32, mkexpr(hi32), mkU8(8)),
   23795                         mkU32(0xFF)));
   23796   if (out4)
   23797     assign(*out4, binop(Iop_And32, mkexpr(hi32), mkU32(0xFF)));
   23798   if (out3)
   23799     assign(*out3, binop(Iop_And32,
   23800                         binop(Iop_Shr32, mkexpr(lo32), mkU8(24)),
   23801                         mkU32(0xFF)));
   23802   if (out2)
   23803     assign(*out2, binop(Iop_And32,
   23804                         binop(Iop_Shr32, mkexpr(lo32), mkU8(16)),
   23805                         mkU32(0xFF)));
   23806   if (out1)
   23807     assign(*out1, binop(Iop_And32,
   23808                         binop(Iop_Shr32, mkexpr(lo32), mkU8(8)),
   23809                         mkU32(0xFF)));
   23810   if (out0)
   23811     assign(*out0, binop(Iop_And32, mkexpr(lo32), mkU32(0xFF)));
   23812 }
   23813 
   23814 static IRTemp mk64from8s ( IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4,
   23815                            IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
   23816 {
   23817   IRTemp hi32 = newTemp(Ity_I32);
   23818   IRTemp lo32 = newTemp(Ity_I32);
   23819   assign(hi32,
   23820          binop(Iop_Or32,
   23821                binop(Iop_Or32,
   23822                      binop(Iop_Shl32,
   23823                            binop(Iop_And32, mkexpr(in7), mkU32(0xFF)),
   23824                            mkU8(24)),
   23825                      binop(Iop_Shl32,
   23826                            binop(Iop_And32, mkexpr(in6), mkU32(0xFF)),
   23827                            mkU8(16))),
   23828                binop(Iop_Or32,
   23829                      binop(Iop_Shl32,
   23830                            binop(Iop_And32, mkexpr(in5), mkU32(0xFF)), mkU8(8)),
   23831                      binop(Iop_And32,
   23832                            mkexpr(in4), mkU32(0xFF)))));
   23833   assign(lo32,
   23834          binop(Iop_Or32,
   23835                binop(Iop_Or32,
   23836                      binop(Iop_Shl32,
   23837                            binop(Iop_And32, mkexpr(in3), mkU32(0xFF)),
   23838                            mkU8(24)),
   23839                      binop(Iop_Shl32,
   23840                            binop(Iop_And32, mkexpr(in2), mkU32(0xFF)),
   23841                            mkU8(16))),
   23842                binop(Iop_Or32,
   23843                      binop(Iop_Shl32,
   23844                            binop(Iop_And32, mkexpr(in1), mkU32(0xFF)), mkU8(8)),
   23845                      binop(Iop_And32,
   23846                            mkexpr(in0), mkU32(0xFF)))));
   23847   IRTemp res = newTemp(Ity_I64);
   23848   assign(res, binop(Iop_32HLto64, mkexpr(hi32), mkexpr(lo32)));
   23849   return res;
   23850 }
   23851 
   23852 static IRExpr* mk_InterleaveLO8x8 ( IRTemp a76543210, IRTemp b76543210 )
   23853 {
   23854   // returns a3 b3 a2 b2 a1 b1 a0 b0
   23855   IRTemp a3, b3, a2, b2, a1, a0, b1, b0;
   23856   break64to8s(NULL, NULL, NULL, NULL, &a3, &a2, &a1, &a0, a76543210);
   23857   break64to8s(NULL, NULL, NULL, NULL, &b3, &b2, &b1, &b0, b76543210);
   23858   return mkexpr(mk64from8s(a3, b3, a2, b2, a1, b1, a0, b0));
   23859 }
   23860 
   23861 static IRExpr* mk_InterleaveHI8x8 ( IRTemp a76543210, IRTemp b76543210 )
   23862 {
   23863   // returns a7 b7 a6 b6 a5 b5 a4 b4
   23864   IRTemp a7, b7, a6, b6, a5, b5, a4, b4;
   23865   break64to8s(&a7, &a6, &a5, &a4, NULL, NULL, NULL, NULL, a76543210);
   23866   break64to8s(&b7, &b6, &b5, &b4, NULL, NULL, NULL, NULL, b76543210);
   23867   return mkexpr(mk64from8s(a7, b7, a6, b6, a5, b5, a4, b4));
   23868 }
   23869 
   23870 static IRExpr* mk_CatEvenLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
   23871 {
   23872   // returns a6 a4 a2 a0 b6 b4 b2 b0
   23873   IRTemp a6, a4, a2, a0, b6, b4, b2, b0;
   23874   break64to8s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
   23875   break64to8s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
   23876   return mkexpr(mk64from8s(a6, a4, a2, a0, b6, b4, b2, b0));
   23877 }
   23878 
   23879 static IRExpr* mk_CatOddLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
   23880 {
   23881   // returns a7 a5 a3 a1 b7 b5 b3 b1
   23882   IRTemp a7, a5, a3, a1, b7, b5, b3, b1;
   23883   break64to8s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
   23884   break64to8s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
   23885   return mkexpr(mk64from8s(a7, a5, a3, a1, b7, b5, b3, b1));
   23886 }
   23887 
   23888 static IRExpr* mk_InterleaveEvenLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
   23889 {
   23890   // returns a6 b6 a4 b4 a2 b2 a0 b0
   23891   IRTemp a6, b6, a4, b4, a2, b2, a0, b0;
   23892   break64to8s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
   23893   break64to8s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
   23894   return mkexpr(mk64from8s(a6, b6, a4, b4, a2, b2, a0, b0));
   23895 }
   23896 
   23897 static IRExpr* mk_InterleaveOddLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
   23898 {
   23899   // returns a7 b7 a5 b5 a3 b3 a1 b1
   23900   IRTemp a7, b7, a5, b5, a3, b3, a1, b1;
   23901   break64to8s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
   23902   break64to8s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
   23903   return mkexpr(mk64from8s(a7, b7, a5, b5, a3, b3, a1, b1));
   23904 }
   23905 
   23906 static IRExpr* mk_InterleaveLO32x2 ( IRTemp a10, IRTemp b10 )
   23907 {
   23908   // returns a0 b0
   23909   return binop(Iop_32HLto64, unop(Iop_64to32, mkexpr(a10)),
   23910                              unop(Iop_64to32, mkexpr(b10)));
   23911 }
   23912 
   23913 static IRExpr* mk_InterleaveHI32x2 ( IRTemp a10, IRTemp b10 )
   23914 {
   23915   // returns a1 b1
   23916   return binop(Iop_32HLto64, unop(Iop_64HIto32, mkexpr(a10)),
   23917                              unop(Iop_64HIto32, mkexpr(b10)));
   23918 }
   23919 */
   23920 
   23921 /*--------------------------------------------------------------------*/
   23922 /*--- end                                         guest_arm_toIR.c ---*/
   23923 /*--------------------------------------------------------------------*/
   23924