Home | History | Annotate | Download | only in priv
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- begin                                       guest_arm_toIR.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2012 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    NEON support is
     14    Copyright (C) 2010-2012 Samsung Electronics
     15    contributed by Dmitry Zhurikhin <zhur (at) ispras.ru>
     16               and Kirill Batuzov <batuzovk (at) ispras.ru>
     17 
     18    This program is free software; you can redistribute it and/or
     19    modify it under the terms of the GNU General Public License as
     20    published by the Free Software Foundation; either version 2 of the
     21    License, or (at your option) any later version.
     22 
     23    This program is distributed in the hope that it will be useful, but
     24    WITHOUT ANY WARRANTY; without even the implied warranty of
     25    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     26    General Public License for more details.
     27 
     28    You should have received a copy of the GNU General Public License
     29    along with this program; if not, write to the Free Software
     30    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     31    02110-1301, USA.
     32 
     33    The GNU General Public License is contained in the file COPYING.
     34 */
     35 
     36 /* XXXX thumb to check:
     37    that all cases where putIRegT writes r15, we generate a jump.
     38 
     39    All uses of newTemp assign to an IRTemp and not a UInt
     40 
     41    For all thumb loads and stores, including VFP ones, new-ITSTATE is
     42    backed out before the memory op, and restored afterwards.  This
     43    needs to happen even after we go uncond.  (and for sure it doesn't
     44    happen for VFP loads/stores right now).
     45 
     46    VFP on thumb: check that we exclude all r13/r15 cases that we
     47    should.
     48 
     49    XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
     50    taking into account the number of insns guarded by an IT.
     51 
     52    remove the nasty hack, in the spechelper, of looking for Or32(...,
     53    0xE0) in as the first arg to armg_calculate_condition, and instead
     54    use Slice44 as specified in comments in the spechelper.
     55 
     56    add specialisations for armg_calculate_flag_c and _v, as they
     57    are moderately often needed in Thumb code.
     58 
     59    Correctness: ITSTATE handling in Thumb SVCs is wrong.
     60 
     61    Correctness (obscure): in m_transtab, when invalidating code
     62    address ranges, invalidate up to 18 bytes after the end of the
     63    range.  This is because the ITSTATE optimisation at the top of
     64    _THUMB_WRK below analyses up to 18 bytes before the start of any
     65    given instruction, and so might depend on the invalidated area.
     66 */
     67 
     68 /* Limitations, etc
     69 
     70    - pretty dodgy exception semantics for {LD,ST}Mxx, no doubt
     71 
     72    - SWP: the restart jump back is Ijk_Boring; it should be
     73      Ijk_NoRedir but that's expensive.  See comments on casLE() in
     74      guest_x86_toIR.c.
     75 */
     76 
     77 /* "Special" instructions.
     78 
     79    This instruction decoder can decode four special instructions
     80    which mean nothing natively (are no-ops as far as regs/mem are
     81    concerned) but have meaning for supporting Valgrind.  A special
     82    instruction is flagged by a 16-byte preamble:
     83 
     84       E1A0C1EC E1A0C6EC E1A0CEEC E1A0C9EC
     85       (mov r12, r12, ROR #3;   mov r12, r12, ROR #13;
     86        mov r12, r12, ROR #29;  mov r12, r12, ROR #19)
     87 
     88    Following that, one of the following 3 are allowed
     89    (standard interpretation in parentheses):
     90 
     91       E18AA00A (orr r10,r10,r10)   R3 = client_request ( R4 )
     92       E18BB00B (orr r11,r11,r11)   R3 = guest_NRADDR
     93       E18CC00C (orr r12,r12,r12)   branch-and-link-to-noredir R4
     94 
     95    Any other bytes following the 16-byte preamble are illegal and
     96    constitute a failure in instruction decoding.  This all assumes
     97    that the preamble will never occur except in specific code
     98    fragments designed for Valgrind to catch.
     99 */
    100 
    101 /* Translates ARM(v5) code to IR. */
    102 
    103 #include "libvex_basictypes.h"
    104 #include "libvex_ir.h"
    105 #include "libvex.h"
    106 #include "libvex_guest_arm.h"
    107 
    108 #include "main_util.h"
    109 #include "main_globals.h"
    110 #include "guest_generic_bb_to_IR.h"
    111 #include "guest_arm_defs.h"
    112 
    113 
    114 /*------------------------------------------------------------*/
    115 /*--- Globals                                              ---*/
    116 /*------------------------------------------------------------*/
    117 
    118 /* These are set at the start of the translation of a instruction, so
    119    that we don't have to pass them around endlessly.  CONST means does
    120    not change during translation of the instruction.
    121 */
    122 
    123 /* CONST: is the host bigendian?  This has to do with float vs double
    124    register accesses on VFP, but it's complex and not properly thought
    125    out. */
    126 static Bool host_is_bigendian;
    127 
    128 /* CONST: The guest address for the instruction currently being
    129    translated.  This is the real, "decoded" address (not subject
    130    to the CPSR.T kludge). */
    131 static Addr32 guest_R15_curr_instr_notENC;
    132 
    133 /* CONST, FOR ASSERTIONS ONLY.  Indicates whether currently processed
    134    insn is Thumb (True) or ARM (False). */
    135 static Bool __curr_is_Thumb;
    136 
    137 /* MOD: The IRSB* into which we're generating code. */
    138 static IRSB* irsb;
    139 
    140 /* These are to do with handling writes to r15.  They are initially
    141    set at the start of disInstr_ARM_WRK to indicate no update,
    142    possibly updated during the routine, and examined again at the end.
    143    If they have been set to indicate a r15 update then a jump is
    144    generated.  Note, "explicit" jumps (b, bx, etc) are generated
    145    directly, not using this mechanism -- this is intended to handle
    146    the implicit-style jumps resulting from (eg) assigning to r15 as
    147    the result of insns we wouldn't normally consider branchy. */
    148 
    149 /* MOD.  Initially False; set to True iff abovementioned handling is
    150    required. */
    151 static Bool r15written;
    152 
    153 /* MOD.  Initially IRTemp_INVALID.  If the r15 branch to be generated
    154    is conditional, this holds the gating IRTemp :: Ity_I32.  If the
    155    branch to be generated is unconditional, this remains
    156    IRTemp_INVALID. */
    157 static IRTemp r15guard; /* :: Ity_I32, 0 or 1 */
    158 
    159 /* MOD.  Initially Ijk_Boring.  If an r15 branch is to be generated,
    160    this holds the jump kind. */
    161 static IRTemp r15kind;
    162 
    163 
    164 /*------------------------------------------------------------*/
    165 /*--- Debugging output                                     ---*/
    166 /*------------------------------------------------------------*/
    167 
    168 #define DIP(format, args...)           \
    169    if (vex_traceflags & VEX_TRACE_FE)  \
    170       vex_printf(format, ## args)
    171 
    172 #define DIS(buf, format, args...)      \
    173    if (vex_traceflags & VEX_TRACE_FE)  \
    174       vex_sprintf(buf, format, ## args)
    175 
    176 #define ASSERT_IS_THUMB \
    177    do { vassert(__curr_is_Thumb); } while (0)
    178 
    179 #define ASSERT_IS_ARM \
    180    do { vassert(! __curr_is_Thumb); } while (0)
    181 
    182 
    183 /*------------------------------------------------------------*/
    184 /*--- Helper bits and pieces for deconstructing the        ---*/
    185 /*--- arm insn stream.                                     ---*/
    186 /*------------------------------------------------------------*/
    187 
    188 /* Do a little-endian load of a 32-bit word, regardless of the
    189    endianness of the underlying host. */
    190 static inline UInt getUIntLittleEndianly ( UChar* p )
    191 {
    192    UInt w = 0;
    193    w = (w << 8) | p[3];
    194    w = (w << 8) | p[2];
    195    w = (w << 8) | p[1];
    196    w = (w << 8) | p[0];
    197    return w;
    198 }
    199 
    200 /* Do a little-endian load of a 16-bit word, regardless of the
    201    endianness of the underlying host. */
    202 static inline UShort getUShortLittleEndianly ( UChar* p )
    203 {
    204    UShort w = 0;
    205    w = (w << 8) | p[1];
    206    w = (w << 8) | p[0];
    207    return w;
    208 }
    209 
    210 static UInt ROR32 ( UInt x, UInt sh ) {
    211    vassert(sh >= 0 && sh < 32);
    212    if (sh == 0)
    213       return x;
    214    else
    215       return (x << (32-sh)) | (x >> sh);
    216 }
    217 
    218 static Int popcount32 ( UInt x )
    219 {
    220    Int res = 0, i;
    221    for (i = 0; i < 32; i++) {
    222       res += (x & 1);
    223       x >>= 1;
    224    }
    225    return res;
    226 }
    227 
    228 static UInt setbit32 ( UInt x, Int ix, UInt b )
    229 {
    230    UInt mask = 1 << ix;
    231    x &= ~mask;
    232    x |= ((b << ix) & mask);
    233    return x;
    234 }
    235 
    236 #define BITS2(_b1,_b0) \
    237    (((_b1) << 1) | (_b0))
    238 
    239 #define BITS3(_b2,_b1,_b0)                      \
    240   (((_b2) << 2) | ((_b1) << 1) | (_b0))
    241 
    242 #define BITS4(_b3,_b2,_b1,_b0) \
    243    (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
    244 
    245 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    246    ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
    247     | BITS4((_b3),(_b2),(_b1),(_b0)))
    248 
    249 #define BITS5(_b4,_b3,_b2,_b1,_b0)  \
    250    (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
    251 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0)  \
    252    (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    253 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    254    (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    255 
    256 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)      \
    257    (((_b8) << 8) \
    258     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    259 
    260 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    261    (((_b9) << 9) | ((_b8) << 8)                                \
    262     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    263 
    264 /* produces _uint[_bMax:_bMin] */
    265 #define SLICE_UInt(_uint,_bMax,_bMin) \
    266    (( ((UInt)(_uint)) >> (_bMin)) \
    267     & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
    268 
    269 
    270 /*------------------------------------------------------------*/
    271 /*--- Helper bits and pieces for creating IR fragments.    ---*/
    272 /*------------------------------------------------------------*/
    273 
    274 static IRExpr* mkU64 ( ULong i )
    275 {
    276    return IRExpr_Const(IRConst_U64(i));
    277 }
    278 
    279 static IRExpr* mkU32 ( UInt i )
    280 {
    281    return IRExpr_Const(IRConst_U32(i));
    282 }
    283 
    284 static IRExpr* mkU8 ( UInt i )
    285 {
    286    vassert(i < 256);
    287    return IRExpr_Const(IRConst_U8( (UChar)i ));
    288 }
    289 
    290 static IRExpr* mkexpr ( IRTemp tmp )
    291 {
    292    return IRExpr_RdTmp(tmp);
    293 }
    294 
    295 static IRExpr* unop ( IROp op, IRExpr* a )
    296 {
    297    return IRExpr_Unop(op, a);
    298 }
    299 
    300 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
    301 {
    302    return IRExpr_Binop(op, a1, a2);
    303 }
    304 
    305 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
    306 {
    307    return IRExpr_Triop(op, a1, a2, a3);
    308 }
    309 
    310 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
    311 {
    312    return IRExpr_Load(Iend_LE, ty, addr);
    313 }
    314 
    315 /* Add a statement to the list held by "irbb". */
    316 static void stmt ( IRStmt* st )
    317 {
    318    addStmtToIRSB( irsb, st );
    319 }
    320 
    321 static void assign ( IRTemp dst, IRExpr* e )
    322 {
    323    stmt( IRStmt_WrTmp(dst, e) );
    324 }
    325 
    326 static void storeLE ( IRExpr* addr, IRExpr* data )
    327 {
    328    stmt( IRStmt_Store(Iend_LE, addr, data) );
    329 }
    330 
    331 /* Generate a new temporary of the given type. */
    332 static IRTemp newTemp ( IRType ty )
    333 {
    334    vassert(isPlausibleIRType(ty));
    335    return newIRTemp( irsb->tyenv, ty );
    336 }
    337 
    338 /* Produces a value in 0 .. 3, which is encoded as per the type
    339    IRRoundingMode. */
    340 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
    341 {
    342    return mkU32(Irrm_NEAREST);
    343 }
    344 
    345 /* Generate an expression for SRC rotated right by ROT. */
    346 static IRExpr* genROR32( IRTemp src, Int rot )
    347 {
    348    vassert(rot >= 0 && rot < 32);
    349    if (rot == 0)
    350       return mkexpr(src);
    351    return
    352       binop(Iop_Or32,
    353             binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
    354             binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
    355 }
    356 
    357 static IRExpr* mkU128 ( ULong i )
    358 {
    359    return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
    360 }
    361 
    362 /* Generate a 4-aligned version of the given expression if
    363    the given condition is true.  Else return it unchanged. */
    364 static IRExpr* align4if ( IRExpr* e, Bool b )
    365 {
    366    if (b)
    367       return binop(Iop_And32, e, mkU32(~3));
    368    else
    369       return e;
    370 }
    371 
    372 
    373 /*------------------------------------------------------------*/
    374 /*--- Helpers for accessing guest registers.               ---*/
    375 /*------------------------------------------------------------*/
    376 
    377 #define OFFB_R0       offsetof(VexGuestARMState,guest_R0)
    378 #define OFFB_R1       offsetof(VexGuestARMState,guest_R1)
    379 #define OFFB_R2       offsetof(VexGuestARMState,guest_R2)
    380 #define OFFB_R3       offsetof(VexGuestARMState,guest_R3)
    381 #define OFFB_R4       offsetof(VexGuestARMState,guest_R4)
    382 #define OFFB_R5       offsetof(VexGuestARMState,guest_R5)
    383 #define OFFB_R6       offsetof(VexGuestARMState,guest_R6)
    384 #define OFFB_R7       offsetof(VexGuestARMState,guest_R7)
    385 #define OFFB_R8       offsetof(VexGuestARMState,guest_R8)
    386 #define OFFB_R9       offsetof(VexGuestARMState,guest_R9)
    387 #define OFFB_R10      offsetof(VexGuestARMState,guest_R10)
    388 #define OFFB_R11      offsetof(VexGuestARMState,guest_R11)
    389 #define OFFB_R12      offsetof(VexGuestARMState,guest_R12)
    390 #define OFFB_R13      offsetof(VexGuestARMState,guest_R13)
    391 #define OFFB_R14      offsetof(VexGuestARMState,guest_R14)
    392 #define OFFB_R15T     offsetof(VexGuestARMState,guest_R15T)
    393 
    394 #define OFFB_CC_OP    offsetof(VexGuestARMState,guest_CC_OP)
    395 #define OFFB_CC_DEP1  offsetof(VexGuestARMState,guest_CC_DEP1)
    396 #define OFFB_CC_DEP2  offsetof(VexGuestARMState,guest_CC_DEP2)
    397 #define OFFB_CC_NDEP  offsetof(VexGuestARMState,guest_CC_NDEP)
    398 #define OFFB_NRADDR   offsetof(VexGuestARMState,guest_NRADDR)
    399 
    400 #define OFFB_D0       offsetof(VexGuestARMState,guest_D0)
    401 #define OFFB_D1       offsetof(VexGuestARMState,guest_D1)
    402 #define OFFB_D2       offsetof(VexGuestARMState,guest_D2)
    403 #define OFFB_D3       offsetof(VexGuestARMState,guest_D3)
    404 #define OFFB_D4       offsetof(VexGuestARMState,guest_D4)
    405 #define OFFB_D5       offsetof(VexGuestARMState,guest_D5)
    406 #define OFFB_D6       offsetof(VexGuestARMState,guest_D6)
    407 #define OFFB_D7       offsetof(VexGuestARMState,guest_D7)
    408 #define OFFB_D8       offsetof(VexGuestARMState,guest_D8)
    409 #define OFFB_D9       offsetof(VexGuestARMState,guest_D9)
    410 #define OFFB_D10      offsetof(VexGuestARMState,guest_D10)
    411 #define OFFB_D11      offsetof(VexGuestARMState,guest_D11)
    412 #define OFFB_D12      offsetof(VexGuestARMState,guest_D12)
    413 #define OFFB_D13      offsetof(VexGuestARMState,guest_D13)
    414 #define OFFB_D14      offsetof(VexGuestARMState,guest_D14)
    415 #define OFFB_D15      offsetof(VexGuestARMState,guest_D15)
    416 #define OFFB_D16      offsetof(VexGuestARMState,guest_D16)
    417 #define OFFB_D17      offsetof(VexGuestARMState,guest_D17)
    418 #define OFFB_D18      offsetof(VexGuestARMState,guest_D18)
    419 #define OFFB_D19      offsetof(VexGuestARMState,guest_D19)
    420 #define OFFB_D20      offsetof(VexGuestARMState,guest_D20)
    421 #define OFFB_D21      offsetof(VexGuestARMState,guest_D21)
    422 #define OFFB_D22      offsetof(VexGuestARMState,guest_D22)
    423 #define OFFB_D23      offsetof(VexGuestARMState,guest_D23)
    424 #define OFFB_D24      offsetof(VexGuestARMState,guest_D24)
    425 #define OFFB_D25      offsetof(VexGuestARMState,guest_D25)
    426 #define OFFB_D26      offsetof(VexGuestARMState,guest_D26)
    427 #define OFFB_D27      offsetof(VexGuestARMState,guest_D27)
    428 #define OFFB_D28      offsetof(VexGuestARMState,guest_D28)
    429 #define OFFB_D29      offsetof(VexGuestARMState,guest_D29)
    430 #define OFFB_D30      offsetof(VexGuestARMState,guest_D30)
    431 #define OFFB_D31      offsetof(VexGuestARMState,guest_D31)
    432 
    433 #define OFFB_FPSCR    offsetof(VexGuestARMState,guest_FPSCR)
    434 #define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO)
    435 #define OFFB_ITSTATE  offsetof(VexGuestARMState,guest_ITSTATE)
    436 #define OFFB_QFLAG32  offsetof(VexGuestARMState,guest_QFLAG32)
    437 #define OFFB_GEFLAG0  offsetof(VexGuestARMState,guest_GEFLAG0)
    438 #define OFFB_GEFLAG1  offsetof(VexGuestARMState,guest_GEFLAG1)
    439 #define OFFB_GEFLAG2  offsetof(VexGuestARMState,guest_GEFLAG2)
    440 #define OFFB_GEFLAG3  offsetof(VexGuestARMState,guest_GEFLAG3)
    441 
    442 
    443 /* ---------------- Integer registers ---------------- */
    444 
    445 static Int integerGuestRegOffset ( UInt iregNo )
    446 {
    447    /* Do we care about endianness here?  We do if sub-parts of integer
    448       registers are accessed, but I don't think that ever happens on
    449       ARM. */
    450    switch (iregNo) {
    451       case 0:  return OFFB_R0;
    452       case 1:  return OFFB_R1;
    453       case 2:  return OFFB_R2;
    454       case 3:  return OFFB_R3;
    455       case 4:  return OFFB_R4;
    456       case 5:  return OFFB_R5;
    457       case 6:  return OFFB_R6;
    458       case 7:  return OFFB_R7;
    459       case 8:  return OFFB_R8;
    460       case 9:  return OFFB_R9;
    461       case 10: return OFFB_R10;
    462       case 11: return OFFB_R11;
    463       case 12: return OFFB_R12;
    464       case 13: return OFFB_R13;
    465       case 14: return OFFB_R14;
    466       case 15: return OFFB_R15T;
    467       default: vassert(0);
    468    }
    469 }
    470 
    471 /* Plain ("low level") read from a reg; no +8 offset magic for r15. */
    472 static IRExpr* llGetIReg ( UInt iregNo )
    473 {
    474    vassert(iregNo < 16);
    475    return IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
    476 }
    477 
    478 /* Architected read from a reg in ARM mode.  This automagically adds 8
    479    to all reads of r15. */
    480 static IRExpr* getIRegA ( UInt iregNo )
    481 {
    482    IRExpr* e;
    483    ASSERT_IS_ARM;
    484    vassert(iregNo < 16);
    485    if (iregNo == 15) {
    486       /* If asked for r15, don't read the guest state value, as that
    487          may not be up to date in the case where loop unrolling has
    488          happened, because the first insn's write to the block is
    489          omitted; hence in the 2nd and subsequent unrollings we don't
    490          have a correct value in guest r15.  Instead produce the
    491          constant that we know would be produced at this point. */
    492       vassert(0 == (guest_R15_curr_instr_notENC & 3));
    493       e = mkU32(guest_R15_curr_instr_notENC + 8);
    494    } else {
    495       e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
    496    }
    497    return e;
    498 }
    499 
    500 /* Architected read from a reg in Thumb mode.  This automagically adds
    501    4 to all reads of r15. */
    502 static IRExpr* getIRegT ( UInt iregNo )
    503 {
    504    IRExpr* e;
    505    ASSERT_IS_THUMB;
    506    vassert(iregNo < 16);
    507    if (iregNo == 15) {
    508       /* Ditto comment in getIReg. */
    509       vassert(0 == (guest_R15_curr_instr_notENC & 1));
    510       e = mkU32(guest_R15_curr_instr_notENC + 4);
    511    } else {
    512       e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
    513    }
    514    return e;
    515 }
    516 
    517 /* Plain ("low level") write to a reg; no jump or alignment magic for
    518    r15. */
    519 static void llPutIReg ( UInt iregNo, IRExpr* e )
    520 {
    521    vassert(iregNo < 16);
    522    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
    523    stmt( IRStmt_Put(integerGuestRegOffset(iregNo), e) );
    524 }
    525 
    526 /* Architected write to an integer register in ARM mode.  If it is to
    527    r15, record info so at the end of this insn's translation, a branch
    528    to it can be made.  Also handles conditional writes to the
    529    register: if guardT == IRTemp_INVALID then the write is
    530    unconditional.  If writing r15, also 4-align it. */
    531 static void putIRegA ( UInt       iregNo,
    532                        IRExpr*    e,
    533                        IRTemp     guardT /* :: Ity_I32, 0 or 1 */,
    534                        IRJumpKind jk /* if a jump is generated */ )
    535 {
    536    /* if writing r15, force e to be 4-aligned. */
    537    // INTERWORKING FIXME.  this needs to be relaxed so that
    538    // puts caused by LDMxx which load r15 interwork right.
    539    // but is no aligned too relaxed?
    540    //if (iregNo == 15)
    541    //   e = binop(Iop_And32, e, mkU32(~3));
    542    ASSERT_IS_ARM;
    543    /* So, generate either an unconditional or a conditional write to
    544       the reg. */
    545    if (guardT == IRTemp_INVALID) {
    546       /* unconditional write */
    547       llPutIReg( iregNo, e );
    548    } else {
    549       llPutIReg( iregNo,
    550                  IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
    551                                llGetIReg(iregNo),
    552                                e ));
    553    }
    554    if (iregNo == 15) {
    555       // assert against competing r15 updates.  Shouldn't
    556       // happen; should be ruled out by the instr matching
    557       // logic.
    558       vassert(r15written == False);
    559       vassert(r15guard   == IRTemp_INVALID);
    560       vassert(r15kind    == Ijk_Boring);
    561       r15written = True;
    562       r15guard   = guardT;
    563       r15kind    = jk;
    564    }
    565 }
    566 
    567 
    568 /* Architected write to an integer register in Thumb mode.  Writes to
    569    r15 are not allowed.  Handles conditional writes to the register:
    570    if guardT == IRTemp_INVALID then the write is unconditional. */
    571 static void putIRegT ( UInt       iregNo,
    572                        IRExpr*    e,
    573                        IRTemp     guardT /* :: Ity_I32, 0 or 1 */ )
    574 {
    575    /* So, generate either an unconditional or a conditional write to
    576       the reg. */
    577    ASSERT_IS_THUMB;
    578    vassert(iregNo >= 0 && iregNo <= 14);
    579    if (guardT == IRTemp_INVALID) {
    580       /* unconditional write */
    581       llPutIReg( iregNo, e );
    582    } else {
    583       llPutIReg( iregNo,
    584                  IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
    585                                llGetIReg(iregNo),
    586                                e ));
    587    }
    588 }
    589 
    590 
    591 /* Thumb16 and Thumb32 only.
    592    Returns true if reg is 13 or 15.  Implements the BadReg
    593    predicate in the ARM ARM. */
    594 static Bool isBadRegT ( UInt r )
    595 {
    596    vassert(r <= 15);
    597    ASSERT_IS_THUMB;
    598    return r == 13 || r == 15;
    599 }
    600 
    601 
    602 /* ---------------- Double registers ---------------- */
    603 
    604 static Int doubleGuestRegOffset ( UInt dregNo )
    605 {
    606    /* Do we care about endianness here?  Probably do if we ever get
    607       into the situation of dealing with the single-precision VFP
    608       registers. */
    609    switch (dregNo) {
    610       case 0:  return OFFB_D0;
    611       case 1:  return OFFB_D1;
    612       case 2:  return OFFB_D2;
    613       case 3:  return OFFB_D3;
    614       case 4:  return OFFB_D4;
    615       case 5:  return OFFB_D5;
    616       case 6:  return OFFB_D6;
    617       case 7:  return OFFB_D7;
    618       case 8:  return OFFB_D8;
    619       case 9:  return OFFB_D9;
    620       case 10: return OFFB_D10;
    621       case 11: return OFFB_D11;
    622       case 12: return OFFB_D12;
    623       case 13: return OFFB_D13;
    624       case 14: return OFFB_D14;
    625       case 15: return OFFB_D15;
    626       case 16: return OFFB_D16;
    627       case 17: return OFFB_D17;
    628       case 18: return OFFB_D18;
    629       case 19: return OFFB_D19;
    630       case 20: return OFFB_D20;
    631       case 21: return OFFB_D21;
    632       case 22: return OFFB_D22;
    633       case 23: return OFFB_D23;
    634       case 24: return OFFB_D24;
    635       case 25: return OFFB_D25;
    636       case 26: return OFFB_D26;
    637       case 27: return OFFB_D27;
    638       case 28: return OFFB_D28;
    639       case 29: return OFFB_D29;
    640       case 30: return OFFB_D30;
    641       case 31: return OFFB_D31;
    642       default: vassert(0);
    643    }
    644 }
    645 
    646 /* Plain ("low level") read from a VFP Dreg. */
    647 static IRExpr* llGetDReg ( UInt dregNo )
    648 {
    649    vassert(dregNo < 32);
    650    return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_F64 );
    651 }
    652 
    653 /* Architected read from a VFP Dreg. */
    654 static IRExpr* getDReg ( UInt dregNo ) {
    655    return llGetDReg( dregNo );
    656 }
    657 
    658 /* Plain ("low level") write to a VFP Dreg. */
    659 static void llPutDReg ( UInt dregNo, IRExpr* e )
    660 {
    661    vassert(dregNo < 32);
    662    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64);
    663    stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
    664 }
    665 
    666 /* Architected write to a VFP Dreg.  Handles conditional writes to the
    667    register: if guardT == IRTemp_INVALID then the write is
    668    unconditional. */
    669 static void putDReg ( UInt    dregNo,
    670                       IRExpr* e,
    671                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    672 {
    673    /* So, generate either an unconditional or a conditional write to
    674       the reg. */
    675    if (guardT == IRTemp_INVALID) {
    676       /* unconditional write */
    677       llPutDReg( dregNo, e );
    678    } else {
    679       llPutDReg( dregNo,
    680                  IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
    681                                llGetDReg(dregNo),
    682                                e ));
    683    }
    684 }
    685 
    686 /* And now exactly the same stuff all over again, but this time
    687    taking/returning I64 rather than F64, to support 64-bit Neon
    688    ops. */
    689 
    690 /* Plain ("low level") read from a Neon Integer Dreg. */
    691 static IRExpr* llGetDRegI64 ( UInt dregNo )
    692 {
    693    vassert(dregNo < 32);
    694    return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_I64 );
    695 }
    696 
    697 /* Architected read from a Neon Integer Dreg. */
    698 static IRExpr* getDRegI64 ( UInt dregNo ) {
    699    return llGetDRegI64( dregNo );
    700 }
    701 
    702 /* Plain ("low level") write to a Neon Integer Dreg. */
    703 static void llPutDRegI64 ( UInt dregNo, IRExpr* e )
    704 {
    705    vassert(dregNo < 32);
    706    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
    707    stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
    708 }
    709 
    710 /* Architected write to a Neon Integer Dreg.  Handles conditional
    711    writes to the register: if guardT == IRTemp_INVALID then the write
    712    is unconditional. */
    713 static void putDRegI64 ( UInt    dregNo,
    714                          IRExpr* e,
    715                          IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    716 {
    717    /* So, generate either an unconditional or a conditional write to
    718       the reg. */
    719    if (guardT == IRTemp_INVALID) {
    720       /* unconditional write */
    721       llPutDRegI64( dregNo, e );
    722    } else {
    723       llPutDRegI64( dregNo,
    724                     IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
    725                                   llGetDRegI64(dregNo),
    726                                   e ));
    727    }
    728 }
    729 
    730 /* ---------------- Quad registers ---------------- */
    731 
    732 static Int quadGuestRegOffset ( UInt qregNo )
    733 {
    734    /* Do we care about endianness here?  Probably do if we ever get
    735       into the situation of dealing with the 64 bit Neon registers. */
    736    switch (qregNo) {
    737       case 0:  return OFFB_D0;
    738       case 1:  return OFFB_D2;
    739       case 2:  return OFFB_D4;
    740       case 3:  return OFFB_D6;
    741       case 4:  return OFFB_D8;
    742       case 5:  return OFFB_D10;
    743       case 6:  return OFFB_D12;
    744       case 7:  return OFFB_D14;
    745       case 8:  return OFFB_D16;
    746       case 9:  return OFFB_D18;
    747       case 10: return OFFB_D20;
    748       case 11: return OFFB_D22;
    749       case 12: return OFFB_D24;
    750       case 13: return OFFB_D26;
    751       case 14: return OFFB_D28;
    752       case 15: return OFFB_D30;
    753       default: vassert(0);
    754    }
    755 }
    756 
    757 /* Plain ("low level") read from a Neon Qreg. */
    758 static IRExpr* llGetQReg ( UInt qregNo )
    759 {
    760    vassert(qregNo < 16);
    761    return IRExpr_Get( quadGuestRegOffset(qregNo), Ity_V128 );
    762 }
    763 
    764 /* Architected read from a Neon Qreg. */
    765 static IRExpr* getQReg ( UInt qregNo ) {
    766    return llGetQReg( qregNo );
    767 }
    768 
    769 /* Plain ("low level") write to a Neon Qreg. */
    770 static void llPutQReg ( UInt qregNo, IRExpr* e )
    771 {
    772    vassert(qregNo < 16);
    773    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
    774    stmt( IRStmt_Put(quadGuestRegOffset(qregNo), e) );
    775 }
    776 
    777 /* Architected write to a Neon Qreg.  Handles conditional writes to the
    778    register: if guardT == IRTemp_INVALID then the write is
    779    unconditional. */
    780 static void putQReg ( UInt    qregNo,
    781                       IRExpr* e,
    782                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    783 {
    784    /* So, generate either an unconditional or a conditional write to
    785       the reg. */
    786    if (guardT == IRTemp_INVALID) {
    787       /* unconditional write */
    788       llPutQReg( qregNo, e );
    789    } else {
    790       llPutQReg( qregNo,
    791                  IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
    792                                llGetQReg(qregNo),
    793                                e ));
    794    }
    795 }
    796 
    797 
    798 /* ---------------- Float registers ---------------- */
    799 
    800 static Int floatGuestRegOffset ( UInt fregNo )
    801 {
    802    /* Start with the offset of the containing double, and then correct
    803       for endianness.  Actually this is completely bogus and needs
    804       careful thought. */
    805    Int off;
    806    vassert(fregNo < 32);
    807    off = doubleGuestRegOffset(fregNo >> 1);
    808    if (host_is_bigendian) {
    809       vassert(0);
    810    } else {
    811       if (fregNo & 1)
    812          off += 4;
    813    }
    814    return off;
    815 }
    816 
    817 /* Plain ("low level") read from a VFP Freg. */
    818 static IRExpr* llGetFReg ( UInt fregNo )
    819 {
    820    vassert(fregNo < 32);
    821    return IRExpr_Get( floatGuestRegOffset(fregNo), Ity_F32 );
    822 }
    823 
    824 /* Architected read from a VFP Freg. */
    825 static IRExpr* getFReg ( UInt fregNo ) {
    826    return llGetFReg( fregNo );
    827 }
    828 
    829 /* Plain ("low level") write to a VFP Freg. */
    830 static void llPutFReg ( UInt fregNo, IRExpr* e )
    831 {
    832    vassert(fregNo < 32);
    833    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32);
    834    stmt( IRStmt_Put(floatGuestRegOffset(fregNo), e) );
    835 }
    836 
    837 /* Architected write to a VFP Freg.  Handles conditional writes to the
    838    register: if guardT == IRTemp_INVALID then the write is
    839    unconditional. */
    840 static void putFReg ( UInt    fregNo,
    841                       IRExpr* e,
    842                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    843 {
    844    /* So, generate either an unconditional or a conditional write to
    845       the reg. */
    846    if (guardT == IRTemp_INVALID) {
    847       /* unconditional write */
    848       llPutFReg( fregNo, e );
    849    } else {
    850       llPutFReg( fregNo,
    851                  IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
    852                                llGetFReg(fregNo),
    853                                e ));
    854    }
    855 }
    856 
    857 
    858 /* ---------------- Misc registers ---------------- */
    859 
    860 static void putMiscReg32 ( UInt    gsoffset,
    861                            IRExpr* e, /* :: Ity_I32 */
    862                            IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    863 {
    864    switch (gsoffset) {
    865       case OFFB_FPSCR:   break;
    866       case OFFB_QFLAG32: break;
    867       case OFFB_GEFLAG0: break;
    868       case OFFB_GEFLAG1: break;
    869       case OFFB_GEFLAG2: break;
    870       case OFFB_GEFLAG3: break;
    871       default: vassert(0); /* awaiting more cases */
    872    }
    873    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
    874 
    875    if (guardT == IRTemp_INVALID) {
    876       /* unconditional write */
    877       stmt(IRStmt_Put(gsoffset, e));
    878    } else {
    879       stmt(IRStmt_Put(
    880          gsoffset,
    881          IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
    882                        IRExpr_Get(gsoffset, Ity_I32),
    883                        e
    884          )
    885       ));
    886    }
    887 }
    888 
    889 static IRTemp get_ITSTATE ( void )
    890 {
    891    ASSERT_IS_THUMB;
    892    IRTemp t = newTemp(Ity_I32);
    893    assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
    894    return t;
    895 }
    896 
    897 static void put_ITSTATE ( IRTemp t )
    898 {
    899    ASSERT_IS_THUMB;
    900    stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
    901 }
    902 
    903 static IRTemp get_QFLAG32 ( void )
    904 {
    905    IRTemp t = newTemp(Ity_I32);
    906    assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
    907    return t;
    908 }
    909 
    910 static void put_QFLAG32 ( IRTemp t, IRTemp condT )
    911 {
    912    putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
    913 }
    914 
    915 /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
    916    Status Register) to indicate that overflow or saturation occurred.
    917    Nb: t must be zero to denote no saturation, and any nonzero
    918    value to indicate saturation. */
    919 static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
    920 {
    921    IRTemp old = get_QFLAG32();
    922    IRTemp nyu = newTemp(Ity_I32);
    923    assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
    924    put_QFLAG32(nyu, condT);
    925 }
    926 
    927 /* Generate code to set APSR.GE[flagNo]. Each fn call sets 1 bit.
    928    flagNo: which flag bit to set [3...0]
    929    lowbits_to_ignore:  0 = look at all 32 bits
    930                        8 = look at top 24 bits only
    931                       16 = look at top 16 bits only
    932                       31 = look at the top bit only
    933    e: input value to be evaluated.
    934    The new value is taken from 'e' with the lowest 'lowbits_to_ignore'
    935    masked out.  If the resulting value is zero then the GE flag is
    936    set to 0; any other value sets the flag to 1. */
    937 static void put_GEFLAG32 ( Int flagNo,            /* 0, 1, 2 or 3 */
    938                            Int lowbits_to_ignore, /* 0, 8, 16 or 31   */
    939                            IRExpr* e,             /* Ity_I32 */
    940                            IRTemp condT )
    941 {
    942    vassert( flagNo >= 0 && flagNo <= 3 );
    943    vassert( lowbits_to_ignore == 0  ||
    944             lowbits_to_ignore == 8  ||
    945             lowbits_to_ignore == 16 ||
    946             lowbits_to_ignore == 31 );
    947    IRTemp masked = newTemp(Ity_I32);
    948    assign(masked, binop(Iop_Shr32, e, mkU8(lowbits_to_ignore)));
    949 
    950    switch (flagNo) {
    951       case 0: putMiscReg32(OFFB_GEFLAG0, mkexpr(masked), condT); break;
    952       case 1: putMiscReg32(OFFB_GEFLAG1, mkexpr(masked), condT); break;
    953       case 2: putMiscReg32(OFFB_GEFLAG2, mkexpr(masked), condT); break;
    954       case 3: putMiscReg32(OFFB_GEFLAG3, mkexpr(masked), condT); break;
    955       default: vassert(0);
    956    }
    957 }
    958 
    959 /* Return the (32-bit, zero-or-nonzero representation scheme) of
    960    the specified GE flag. */
    961 static IRExpr* get_GEFLAG32( Int flagNo /* 0, 1, 2, 3 */ )
    962 {
    963    switch (flagNo) {
    964       case 0: return IRExpr_Get( OFFB_GEFLAG0, Ity_I32 );
    965       case 1: return IRExpr_Get( OFFB_GEFLAG1, Ity_I32 );
    966       case 2: return IRExpr_Get( OFFB_GEFLAG2, Ity_I32 );
    967       case 3: return IRExpr_Get( OFFB_GEFLAG3, Ity_I32 );
    968       default: vassert(0);
    969    }
    970 }
    971 
    972 /* Set all 4 GE flags from the given 32-bit value as follows: GE 3 and
    973    2 are set from bit 31 of the value, and GE 1 and 0 are set from bit
    974    15 of the value.  All other bits are ignored. */
    975 static void set_GE_32_10_from_bits_31_15 ( IRTemp t32, IRTemp condT )
    976 {
    977    IRTemp ge10 = newTemp(Ity_I32);
    978    IRTemp ge32 = newTemp(Ity_I32);
    979    assign(ge10, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
    980    assign(ge32, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
    981    put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
    982    put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
    983    put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
    984    put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
    985 }
    986 
    987 
    988 /* Set all 4 GE flags from the given 32-bit value as follows: GE 3
    989    from bit 31, GE 2 from bit 23, GE 1 from bit 15, and GE0 from
    990    bit 7.  All other bits are ignored. */
    991 static void set_GE_3_2_1_0_from_bits_31_23_15_7 ( IRTemp t32, IRTemp condT )
    992 {
    993    IRTemp ge0 = newTemp(Ity_I32);
    994    IRTemp ge1 = newTemp(Ity_I32);
    995    IRTemp ge2 = newTemp(Ity_I32);
    996    IRTemp ge3 = newTemp(Ity_I32);
    997    assign(ge0, binop(Iop_And32, mkexpr(t32), mkU32(0x00000080)));
    998    assign(ge1, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
    999    assign(ge2, binop(Iop_And32, mkexpr(t32), mkU32(0x00800000)));
   1000    assign(ge3, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
   1001    put_GEFLAG32( 0, 0, mkexpr(ge0), condT );
   1002    put_GEFLAG32( 1, 0, mkexpr(ge1), condT );
   1003    put_GEFLAG32( 2, 0, mkexpr(ge2), condT );
   1004    put_GEFLAG32( 3, 0, mkexpr(ge3), condT );
   1005 }
   1006 
   1007 
   1008 /* ---------------- FPSCR stuff ---------------- */
   1009 
   1010 /* Generate IR to get hold of the rounding mode bits in FPSCR, and
   1011    convert them to IR format.  Bind the final result to the
   1012    returned temp. */
   1013 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
   1014 {
   1015    /* The ARMvfp encoding for rounding mode bits is:
   1016          00  to nearest
   1017          01  to +infinity
   1018          10  to -infinity
   1019          11  to zero
   1020       We need to convert that to the IR encoding:
   1021          00  to nearest (the default)
   1022          10  to +infinity
   1023          01  to -infinity
   1024          11  to zero
   1025       Which can be done by swapping bits 0 and 1.
   1026       The rmode bits are at 23:22 in FPSCR.
   1027    */
   1028    IRTemp armEncd = newTemp(Ity_I32);
   1029    IRTemp swapped = newTemp(Ity_I32);
   1030    /* Fish FPSCR[23:22] out, and slide to bottom.  Doesn't matter that
   1031       we don't zero out bits 24 and above, since the assignment to
   1032       'swapped' will mask them out anyway. */
   1033    assign(armEncd,
   1034           binop(Iop_Shr32, IRExpr_Get(OFFB_FPSCR, Ity_I32), mkU8(22)));
   1035    /* Now swap them. */
   1036    assign(swapped,
   1037           binop(Iop_Or32,
   1038                 binop(Iop_And32,
   1039                       binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
   1040                       mkU32(2)),
   1041                 binop(Iop_And32,
   1042                       binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
   1043                       mkU32(1))
   1044          ));
   1045    return swapped;
   1046 }
   1047 
   1048 
   1049 /*------------------------------------------------------------*/
   1050 /*--- Helpers for flag handling and conditional insns      ---*/
   1051 /*------------------------------------------------------------*/
   1052 
   1053 static HChar* name_ARMCondcode ( ARMCondcode cond )
   1054 {
   1055    switch (cond) {
   1056       case ARMCondEQ:  return "{eq}";
   1057       case ARMCondNE:  return "{ne}";
   1058       case ARMCondHS:  return "{hs}";  // or 'cs'
   1059       case ARMCondLO:  return "{lo}";  // or 'cc'
   1060       case ARMCondMI:  return "{mi}";
   1061       case ARMCondPL:  return "{pl}";
   1062       case ARMCondVS:  return "{vs}";
   1063       case ARMCondVC:  return "{vc}";
   1064       case ARMCondHI:  return "{hi}";
   1065       case ARMCondLS:  return "{ls}";
   1066       case ARMCondGE:  return "{ge}";
   1067       case ARMCondLT:  return "{lt}";
   1068       case ARMCondGT:  return "{gt}";
   1069       case ARMCondLE:  return "{le}";
   1070       case ARMCondAL:  return ""; // {al}: is the default
   1071       case ARMCondNV:  return "{nv}";
   1072       default: vpanic("name_ARMCondcode");
   1073    }
   1074 }
   1075 /* and a handy shorthand for it */
   1076 static HChar* nCC ( ARMCondcode cond ) {
   1077    return name_ARMCondcode(cond);
   1078 }
   1079 
   1080 
   1081 /* Build IR to calculate some particular condition from stored
   1082    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
   1083    Ity_I32, suitable for narrowing.  Although the return type is
   1084    Ity_I32, the returned value is either 0 or 1.  'cond' must be
   1085    :: Ity_I32 and must denote the condition to compute in
   1086    bits 7:4, and be zero everywhere else.
   1087 */
   1088 static IRExpr* mk_armg_calculate_condition_dyn ( IRExpr* cond )
   1089 {
   1090    vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I32);
   1091    /* And 'cond' had better produce a value in which only bits 7:4 are
   1092       nonzero.  However, obviously we can't assert for that. */
   1093 
   1094    /* So what we're constructing for the first argument is
   1095       "(cond << 4) | stored-operation".
   1096       However, as per comments above, 'cond' must be supplied
   1097       pre-shifted to this function.
   1098 
   1099       This pairing scheme requires that the ARM_CC_OP_ values all fit
   1100       in 4 bits.  Hence we are passing a (COND, OP) pair in the lowest
   1101       8 bits of the first argument. */
   1102    IRExpr** args
   1103       = mkIRExprVec_4(
   1104            binop(Iop_Or32, IRExpr_Get(OFFB_CC_OP, Ity_I32), cond),
   1105            IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1106            IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1107            IRExpr_Get(OFFB_CC_NDEP, Ity_I32)
   1108         );
   1109    IRExpr* call
   1110       = mkIRExprCCall(
   1111            Ity_I32,
   1112            0/*regparm*/,
   1113            "armg_calculate_condition", &armg_calculate_condition,
   1114            args
   1115         );
   1116 
   1117    /* Exclude the requested condition, OP and NDEP from definedness
   1118       checking.  We're only interested in DEP1 and DEP2. */
   1119    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1120    return call;
   1121 }
   1122 
   1123 
   1124 /* Build IR to calculate some particular condition from stored
   1125    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
   1126    Ity_I32, suitable for narrowing.  Although the return type is
   1127    Ity_I32, the returned value is either 0 or 1.
   1128 */
   1129 static IRExpr* mk_armg_calculate_condition ( ARMCondcode cond )
   1130 {
   1131   /* First arg is "(cond << 4) | condition".  This requires that the
   1132      ARM_CC_OP_ values all fit in 4 bits.  Hence we are passing a
   1133      (COND, OP) pair in the lowest 8 bits of the first argument. */
   1134    vassert(cond >= 0 && cond <= 15);
   1135    return mk_armg_calculate_condition_dyn( mkU32(cond << 4) );
   1136 }
   1137 
   1138 
   1139 /* Build IR to calculate just the carry flag from stored
   1140    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
   1141    Ity_I32. */
   1142 static IRExpr* mk_armg_calculate_flag_c ( void )
   1143 {
   1144    IRExpr** args
   1145       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
   1146                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1147                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1148                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
   1149    IRExpr* call
   1150       = mkIRExprCCall(
   1151            Ity_I32,
   1152            0/*regparm*/,
   1153            "armg_calculate_flag_c", &armg_calculate_flag_c,
   1154            args
   1155         );
   1156    /* Exclude OP and NDEP from definedness checking.  We're only
   1157       interested in DEP1 and DEP2. */
   1158    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1159    return call;
   1160 }
   1161 
   1162 
   1163 /* Build IR to calculate just the overflow flag from stored
   1164    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
   1165    Ity_I32. */
   1166 static IRExpr* mk_armg_calculate_flag_v ( void )
   1167 {
   1168    IRExpr** args
   1169       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
   1170                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1171                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1172                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
   1173    IRExpr* call
   1174       = mkIRExprCCall(
   1175            Ity_I32,
   1176            0/*regparm*/,
   1177            "armg_calculate_flag_v", &armg_calculate_flag_v,
   1178            args
   1179         );
   1180    /* Exclude OP and NDEP from definedness checking.  We're only
   1181       interested in DEP1 and DEP2. */
   1182    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1183    return call;
   1184 }
   1185 
   1186 
   1187 /* Build IR to calculate N Z C V in bits 31:28 of the
   1188    returned word. */
   1189 static IRExpr* mk_armg_calculate_flags_nzcv ( void )
   1190 {
   1191    IRExpr** args
   1192       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
   1193                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1194                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1195                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
   1196    IRExpr* call
   1197       = mkIRExprCCall(
   1198            Ity_I32,
   1199            0/*regparm*/,
   1200            "armg_calculate_flags_nzcv", &armg_calculate_flags_nzcv,
   1201            args
   1202         );
   1203    /* Exclude OP and NDEP from definedness checking.  We're only
   1204       interested in DEP1 and DEP2. */
   1205    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1206    return call;
   1207 }
   1208 
   1209 static IRExpr* mk_armg_calculate_flag_qc ( IRExpr* resL, IRExpr* resR, Bool Q )
   1210 {
   1211    IRExpr** args1;
   1212    IRExpr** args2;
   1213    IRExpr *call1, *call2, *res;
   1214 
   1215    if (Q) {
   1216       args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(0)),
   1217                               binop(Iop_GetElem32x4, resL, mkU8(1)),
   1218                               binop(Iop_GetElem32x4, resR, mkU8(0)),
   1219                               binop(Iop_GetElem32x4, resR, mkU8(1)) );
   1220       args2 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(2)),
   1221                               binop(Iop_GetElem32x4, resL, mkU8(3)),
   1222                               binop(Iop_GetElem32x4, resR, mkU8(2)),
   1223                               binop(Iop_GetElem32x4, resR, mkU8(3)) );
   1224    } else {
   1225       args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x2, resL, mkU8(0)),
   1226                               binop(Iop_GetElem32x2, resL, mkU8(1)),
   1227                               binop(Iop_GetElem32x2, resR, mkU8(0)),
   1228                               binop(Iop_GetElem32x2, resR, mkU8(1)) );
   1229    }
   1230 
   1231 #if 1
   1232    call1 = mkIRExprCCall(
   1233              Ity_I32,
   1234              0/*regparm*/,
   1235              "armg_calculate_flag_qc", &armg_calculate_flag_qc,
   1236              args1
   1237           );
   1238    if (Q) {
   1239       call2 = mkIRExprCCall(
   1240                 Ity_I32,
   1241                 0/*regparm*/,
   1242                 "armg_calculate_flag_qc", &armg_calculate_flag_qc,
   1243                 args2
   1244              );
   1245    }
   1246    if (Q) {
   1247       res = binop(Iop_Or32, call1, call2);
   1248    } else {
   1249       res = call1;
   1250    }
   1251 #else
   1252    if (Q) {
   1253       res = unop(Iop_1Uto32,
   1254                  binop(Iop_CmpNE32,
   1255                        binop(Iop_Or32,
   1256                              binop(Iop_Or32,
   1257                                    binop(Iop_Xor32,
   1258                                          args1[0],
   1259                                          args1[2]),
   1260                                    binop(Iop_Xor32,
   1261                                          args1[1],
   1262                                          args1[3])),
   1263                              binop(Iop_Or32,
   1264                                    binop(Iop_Xor32,
   1265                                          args2[0],
   1266                                          args2[2]),
   1267                                    binop(Iop_Xor32,
   1268                                          args2[1],
   1269                                          args2[3]))),
   1270                        mkU32(0)));
   1271    } else {
   1272       res = unop(Iop_1Uto32,
   1273                  binop(Iop_CmpNE32,
   1274                        binop(Iop_Or32,
   1275                              binop(Iop_Xor32,
   1276                                    args1[0],
   1277                                    args1[2]),
   1278                              binop(Iop_Xor32,
   1279                                    args1[1],
   1280                                    args1[3])),
   1281                        mkU32(0)));
   1282    }
   1283 #endif
   1284    return res;
   1285 }
   1286 
   1287 // FIXME: this is named wrongly .. looks like a sticky set of
   1288 // QC, not a write to it.
   1289 static void setFlag_QC ( IRExpr* resL, IRExpr* resR, Bool Q,
   1290                          IRTemp condT )
   1291 {
   1292    putMiscReg32 (OFFB_FPSCR,
   1293                  binop(Iop_Or32,
   1294                        IRExpr_Get(OFFB_FPSCR, Ity_I32),
   1295                        binop(Iop_Shl32,
   1296                              mk_armg_calculate_flag_qc(resL, resR, Q),
   1297                              mkU8(27))),
   1298                  condT);
   1299 }
   1300 
   1301 /* Build IR to conditionally set the flags thunk.  As with putIReg, if
   1302    guard is IRTemp_INVALID then it's unconditional, else it holds a
   1303    condition :: Ity_I32. */
   1304 static
   1305 void setFlags_D1_D2_ND ( UInt cc_op, IRTemp t_dep1,
   1306                          IRTemp t_dep2, IRTemp t_ndep,
   1307                          IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   1308 {
   1309    IRTemp c8;
   1310    vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I32));
   1311    vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I32));
   1312    vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I32));
   1313    vassert(cc_op >= ARMG_CC_OP_COPY && cc_op < ARMG_CC_OP_NUMBER);
   1314    if (guardT == IRTemp_INVALID) {
   1315       /* unconditional */
   1316       stmt( IRStmt_Put( OFFB_CC_OP,   mkU32(cc_op) ));
   1317       stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
   1318       stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
   1319       stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
   1320    } else {
   1321       /* conditional */
   1322       c8 = newTemp(Ity_I8);
   1323       assign( c8, unop(Iop_32to8, mkexpr(guardT)) );
   1324       stmt( IRStmt_Put(
   1325                OFFB_CC_OP,
   1326                IRExpr_Mux0X( mkexpr(c8),
   1327                              IRExpr_Get(OFFB_CC_OP, Ity_I32),
   1328                              mkU32(cc_op) )));
   1329       stmt( IRStmt_Put(
   1330                OFFB_CC_DEP1,
   1331                IRExpr_Mux0X( mkexpr(c8),
   1332                              IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1333                              mkexpr(t_dep1) )));
   1334       stmt( IRStmt_Put(
   1335                OFFB_CC_DEP2,
   1336                IRExpr_Mux0X( mkexpr(c8),
   1337                              IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1338                              mkexpr(t_dep2) )));
   1339       stmt( IRStmt_Put(
   1340                OFFB_CC_NDEP,
   1341                IRExpr_Mux0X( mkexpr(c8),
   1342                              IRExpr_Get(OFFB_CC_NDEP, Ity_I32),
   1343                              mkexpr(t_ndep) )));
   1344    }
   1345 }
   1346 
   1347 
   1348 /* Minor variant of the above that sets NDEP to zero (if it
   1349    sets it at all) */
   1350 static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
   1351                              IRTemp t_dep2,
   1352                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   1353 {
   1354    IRTemp z32 = newTemp(Ity_I32);
   1355    assign( z32, mkU32(0) );
   1356    setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
   1357 }
   1358 
   1359 
   1360 /* Minor variant of the above that sets DEP2 to zero (if it
   1361    sets it at all) */
   1362 static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
   1363                              IRTemp t_ndep,
   1364                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   1365 {
   1366    IRTemp z32 = newTemp(Ity_I32);
   1367    assign( z32, mkU32(0) );
   1368    setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
   1369 }
   1370 
   1371 
   1372 /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
   1373    sets them at all) */
   1374 static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
   1375                           IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   1376 {
   1377    IRTemp z32 = newTemp(Ity_I32);
   1378    assign( z32, mkU32(0) );
   1379    setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
   1380 }
   1381 
   1382 
   1383 /* ARM only */
   1384 /* Generate a side-exit to the next instruction, if the given guard
   1385    expression :: Ity_I32 is 0 (note!  the side exit is taken if the
   1386    condition is false!)  This is used to skip over conditional
   1387    instructions which we can't generate straight-line code for, either
   1388    because they are too complex or (more likely) they potentially
   1389    generate exceptions.
   1390 */
   1391 static void mk_skip_over_A32_if_cond_is_false (
   1392                IRTemp guardT /* :: Ity_I32, 0 or 1 */
   1393             )
   1394 {
   1395    ASSERT_IS_ARM;
   1396    vassert(guardT != IRTemp_INVALID);
   1397    vassert(0 == (guest_R15_curr_instr_notENC & 3));
   1398    stmt( IRStmt_Exit(
   1399             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
   1400             Ijk_Boring,
   1401             IRConst_U32(toUInt(guest_R15_curr_instr_notENC + 4)),
   1402             OFFB_R15T
   1403        ));
   1404 }
   1405 
   1406 /* Thumb16 only */
   1407 /* ditto, but jump over a 16-bit thumb insn */
   1408 static void mk_skip_over_T16_if_cond_is_false (
   1409                IRTemp guardT /* :: Ity_I32, 0 or 1 */
   1410             )
   1411 {
   1412    ASSERT_IS_THUMB;
   1413    vassert(guardT != IRTemp_INVALID);
   1414    vassert(0 == (guest_R15_curr_instr_notENC & 1));
   1415    stmt( IRStmt_Exit(
   1416             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
   1417             Ijk_Boring,
   1418             IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 2) | 1)),
   1419             OFFB_R15T
   1420        ));
   1421 }
   1422 
   1423 
   1424 /* Thumb32 only */
   1425 /* ditto, but jump over a 32-bit thumb insn */
   1426 static void mk_skip_over_T32_if_cond_is_false (
   1427                IRTemp guardT /* :: Ity_I32, 0 or 1 */
   1428             )
   1429 {
   1430    ASSERT_IS_THUMB;
   1431    vassert(guardT != IRTemp_INVALID);
   1432    vassert(0 == (guest_R15_curr_instr_notENC & 1));
   1433    stmt( IRStmt_Exit(
   1434             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
   1435             Ijk_Boring,
   1436             IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 4) | 1)),
   1437             OFFB_R15T
   1438        ));
   1439 }
   1440 
   1441 
   1442 /* Thumb16 and Thumb32 only
   1443    Generate a SIGILL followed by a restart of the current instruction
   1444    if the given temp is nonzero. */
   1445 static void gen_SIGILL_T_if_nonzero ( IRTemp t /* :: Ity_I32 */ )
   1446 {
   1447    ASSERT_IS_THUMB;
   1448    vassert(t != IRTemp_INVALID);
   1449    vassert(0 == (guest_R15_curr_instr_notENC & 1));
   1450    stmt(
   1451       IRStmt_Exit(
   1452          binop(Iop_CmpNE32, mkexpr(t), mkU32(0)),
   1453          Ijk_NoDecode,
   1454          IRConst_U32(toUInt(guest_R15_curr_instr_notENC | 1)),
   1455          OFFB_R15T
   1456       )
   1457    );
   1458 }
   1459 
   1460 
   1461 /* Inspect the old_itstate, and generate a SIGILL if it indicates that
   1462    we are currently in an IT block and are not the last in the block.
   1463    This also rolls back guest_ITSTATE to its old value before the exit
   1464    and restores it to its new value afterwards.  This is so that if
   1465    the exit is taken, we have an up to date version of ITSTATE
   1466    available.  Without doing that, we have no hope of making precise
   1467    exceptions work. */
   1468 static void gen_SIGILL_T_if_in_but_NLI_ITBlock (
   1469                IRTemp old_itstate /* :: Ity_I32 */,
   1470                IRTemp new_itstate /* :: Ity_I32 */
   1471             )
   1472 {
   1473    ASSERT_IS_THUMB;
   1474    put_ITSTATE(old_itstate); // backout
   1475    IRTemp guards_for_next3 = newTemp(Ity_I32);
   1476    assign(guards_for_next3,
   1477           binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
   1478    gen_SIGILL_T_if_nonzero(guards_for_next3);
   1479    put_ITSTATE(new_itstate); //restore
   1480 }
   1481 
   1482 
   1483 /* Simpler version of the above, which generates a SIGILL if
   1484    we're anywhere within an IT block. */
   1485 static void gen_SIGILL_T_if_in_ITBlock (
   1486                IRTemp old_itstate /* :: Ity_I32 */,
   1487                IRTemp new_itstate /* :: Ity_I32 */
   1488             )
   1489 {
   1490    put_ITSTATE(old_itstate); // backout
   1491    gen_SIGILL_T_if_nonzero(old_itstate);
   1492    put_ITSTATE(new_itstate); //restore
   1493 }
   1494 
   1495 
   1496 /* Generate an APSR value, from the NZCV thunk, and
   1497    from QFLAG32 and GEFLAG0 .. GEFLAG3. */
   1498 static IRTemp synthesise_APSR ( void )
   1499 {
   1500    IRTemp res1 = newTemp(Ity_I32);
   1501    // Get NZCV
   1502    assign( res1, mk_armg_calculate_flags_nzcv() );
   1503    // OR in the Q value
   1504    IRTemp res2 = newTemp(Ity_I32);
   1505    assign(
   1506       res2,
   1507       binop(Iop_Or32,
   1508             mkexpr(res1),
   1509             binop(Iop_Shl32,
   1510                   unop(Iop_1Uto32,
   1511                        binop(Iop_CmpNE32,
   1512                              mkexpr(get_QFLAG32()),
   1513                              mkU32(0))),
   1514                   mkU8(ARMG_CC_SHIFT_Q)))
   1515    );
   1516    // OR in GE0 .. GE3
   1517    IRExpr* ge0
   1518       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(0), mkU32(0)));
   1519    IRExpr* ge1
   1520       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(1), mkU32(0)));
   1521    IRExpr* ge2
   1522       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(2), mkU32(0)));
   1523    IRExpr* ge3
   1524       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(3), mkU32(0)));
   1525    IRTemp res3 = newTemp(Ity_I32);
   1526    assign(res3,
   1527           binop(Iop_Or32,
   1528                 mkexpr(res2),
   1529                 binop(Iop_Or32,
   1530                       binop(Iop_Or32,
   1531                             binop(Iop_Shl32, ge0, mkU8(16)),
   1532                             binop(Iop_Shl32, ge1, mkU8(17))),
   1533                       binop(Iop_Or32,
   1534                             binop(Iop_Shl32, ge2, mkU8(18)),
   1535                             binop(Iop_Shl32, ge3, mkU8(19))) )));
   1536    return res3;
   1537 }
   1538 
   1539 
   1540 /* and the inverse transformation: given an APSR value,
   1541    set the NZCV thunk, the Q flag, and the GE flags. */
   1542 static void desynthesise_APSR ( Bool write_nzcvq, Bool write_ge,
   1543                                 IRTemp apsrT, IRTemp condT )
   1544 {
   1545    vassert(write_nzcvq || write_ge);
   1546    if (write_nzcvq) {
   1547       // Do NZCV
   1548       IRTemp immT = newTemp(Ity_I32);
   1549       assign(immT, binop(Iop_And32, mkexpr(apsrT), mkU32(0xF0000000)) );
   1550       setFlags_D1(ARMG_CC_OP_COPY, immT, condT);
   1551       // Do Q
   1552       IRTemp qnewT = newTemp(Ity_I32);
   1553       assign(qnewT, binop(Iop_And32, mkexpr(apsrT), mkU32(ARMG_CC_MASK_Q)));
   1554       put_QFLAG32(qnewT, condT);
   1555    }
   1556    if (write_ge) {
   1557       // Do GE3..0
   1558       put_GEFLAG32(0, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<16)),
   1559                    condT);
   1560       put_GEFLAG32(1, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<17)),
   1561                    condT);
   1562       put_GEFLAG32(2, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<18)),
   1563                    condT);
   1564       put_GEFLAG32(3, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<19)),
   1565                    condT);
   1566    }
   1567 }
   1568 
   1569 
   1570 /*------------------------------------------------------------*/
   1571 /*--- Helpers for saturation                               ---*/
   1572 /*------------------------------------------------------------*/
   1573 
   1574 /* FIXME: absolutely the only diff. between (a) armUnsignedSatQ and
   1575    (b) armSignedSatQ is that in (a) the floor is set to 0, whereas in
   1576    (b) the floor is computed from the value of imm5.  these two fnsn
   1577    should be commoned up. */
   1578 
   1579 /* UnsignedSatQ(): 'clamp' each value so it lies between 0 <= x <= (2^N)-1
   1580    Optionally return flag resQ saying whether saturation occurred.
   1581    See definition in manual, section A2.2.1, page 41
   1582    (bits(N), boolean) UnsignedSatQ( integer i, integer N )
   1583    {
   1584      if ( i > (2^N)-1 ) { result = (2^N)-1; saturated = TRUE; }
   1585      elsif ( i < 0 )    { result = 0; saturated = TRUE; }
   1586      else               { result = i; saturated = FALSE; }
   1587      return ( result<N-1:0>, saturated );
   1588    }
   1589 */
   1590 static void armUnsignedSatQ( IRTemp* res,  /* OUT - Ity_I32 */
   1591                              IRTemp* resQ, /* OUT - Ity_I32  */
   1592                              IRTemp regT,  /* value to clamp - Ity_I32 */
   1593                              UInt imm5 )   /* saturation ceiling */
   1594 {
   1595    UInt ceil  = (1 << imm5) - 1;    // (2^imm5)-1
   1596    UInt floor = 0;
   1597 
   1598    IRTemp node0 = newTemp(Ity_I32);
   1599    IRTemp node1 = newTemp(Ity_I32);
   1600    IRTemp node2 = newTemp(Ity_I1);
   1601    IRTemp node3 = newTemp(Ity_I32);
   1602    IRTemp node4 = newTemp(Ity_I32);
   1603    IRTemp node5 = newTemp(Ity_I1);
   1604    IRTemp node6 = newTemp(Ity_I32);
   1605 
   1606    assign( node0, mkexpr(regT) );
   1607    assign( node1, mkU32(ceil) );
   1608    assign( node2, binop( Iop_CmpLT32S, mkexpr(node1), mkexpr(node0) ) );
   1609    assign( node3, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node2)),
   1610                                 mkexpr(node0),
   1611                                 mkexpr(node1) ) );
   1612    assign( node4, mkU32(floor) );
   1613    assign( node5, binop( Iop_CmpLT32S, mkexpr(node3), mkexpr(node4) ) );
   1614    assign( node6, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node5)),
   1615                                 mkexpr(node3),
   1616                                 mkexpr(node4) ) );
   1617    assign( *res, mkexpr(node6) );
   1618 
   1619    /* if saturation occurred, then resQ is set to some nonzero value
   1620       if sat did not occur, resQ is guaranteed to be zero. */
   1621    if (resQ) {
   1622       assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
   1623    }
   1624 }
   1625 
   1626 
   1627 /* SignedSatQ(): 'clamp' each value so it lies between  -2^N <= x <= (2^N) - 1
   1628    Optionally return flag resQ saying whether saturation occurred.
   1629    - see definition in manual, section A2.2.1, page 41
   1630    (bits(N), boolean ) SignedSatQ( integer i, integer N )
   1631    {
   1632      if ( i > 2^(N-1) - 1 )    { result = 2^(N-1) - 1; saturated = TRUE; }
   1633      elsif ( i < -(2^(N-1)) )  { result = -(2^(N-1));  saturated = FALSE; }
   1634      else                      { result = i;           saturated = FALSE; }
   1635      return ( result[N-1:0], saturated );
   1636    }
   1637 */
   1638 static void armSignedSatQ( IRTemp regT,    /* value to clamp - Ity_I32 */
   1639                            UInt imm5,      /* saturation ceiling */
   1640                            IRTemp* res,    /* OUT - Ity_I32 */
   1641                            IRTemp* resQ )  /* OUT - Ity_I32  */
   1642 {
   1643    Int ceil  =  (1 << (imm5-1)) - 1;  //  (2^(imm5-1))-1
   1644    Int floor = -(1 << (imm5-1));      // -(2^(imm5-1))
   1645 
   1646    IRTemp node0 = newTemp(Ity_I32);
   1647    IRTemp node1 = newTemp(Ity_I32);
   1648    IRTemp node2 = newTemp(Ity_I1);
   1649    IRTemp node3 = newTemp(Ity_I32);
   1650    IRTemp node4 = newTemp(Ity_I32);
   1651    IRTemp node5 = newTemp(Ity_I1);
   1652    IRTemp node6 = newTemp(Ity_I32);
   1653 
   1654    assign( node0, mkexpr(regT) );
   1655    assign( node1, mkU32(ceil) );
   1656    assign( node2, binop( Iop_CmpLT32S, mkexpr(node1), mkexpr(node0) ) );
   1657    assign( node3, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node2)),
   1658                                 mkexpr(node0),  mkexpr(node1) ) );
   1659    assign( node4, mkU32(floor) );
   1660    assign( node5, binop( Iop_CmpLT32S, mkexpr(node3), mkexpr(node4) ) );
   1661    assign( node6, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node5)),
   1662                                 mkexpr(node3),  mkexpr(node4) ) );
   1663    assign( *res, mkexpr(node6) );
   1664 
   1665    /* if saturation occurred, then resQ is set to some nonzero value
   1666       if sat did not occur, resQ is guaranteed to be zero. */
   1667    if (resQ) {
   1668      assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
   1669    }
   1670 }
   1671 
   1672 
   1673 /* Compute a value 0 :: I32 or 1 :: I32, indicating whether signed
   1674    overflow occurred for 32-bit addition.  Needs both args and the
   1675    result.  HD p27. */
   1676 static
   1677 IRExpr* signed_overflow_after_Add32 ( IRExpr* resE,
   1678                                       IRTemp argL, IRTemp argR )
   1679 {
   1680    IRTemp res = newTemp(Ity_I32);
   1681    assign(res, resE);
   1682    return
   1683       binop( Iop_Shr32,
   1684              binop( Iop_And32,
   1685                     binop( Iop_Xor32, mkexpr(res), mkexpr(argL) ),
   1686                     binop( Iop_Xor32, mkexpr(res), mkexpr(argR) )),
   1687              mkU8(31) );
   1688 }
   1689 
   1690 /* Similarly .. also from HD p27 .. */
   1691 static
   1692 IRExpr* signed_overflow_after_Sub32 ( IRExpr* resE,
   1693                                       IRTemp argL, IRTemp argR )
   1694 {
   1695    IRTemp res = newTemp(Ity_I32);
   1696    assign(res, resE);
   1697    return
   1698       binop( Iop_Shr32,
   1699              binop( Iop_And32,
   1700                     binop( Iop_Xor32, mkexpr(argL), mkexpr(argR) ),
   1701                     binop( Iop_Xor32, mkexpr(res),  mkexpr(argL) )),
   1702              mkU8(31) );
   1703 }
   1704 
   1705 
   1706 /*------------------------------------------------------------*/
   1707 /*--- Larger helpers                                       ---*/
   1708 /*------------------------------------------------------------*/
   1709 
   1710 /* Compute both the result and new C flag value for a LSL by an imm5
   1711    or by a register operand.  May generate reads of the old C value
   1712    (hence only safe to use before any writes to guest state happen).
   1713    Are factored out so can be used by both ARM and Thumb.
   1714 
   1715    Note that in compute_result_and_C_after_{LSL,LSR,ASR}_by{imm5,reg},
   1716    "res" (the result)  is a.k.a. "shop", shifter operand
   1717    "newC" (the new C)  is a.k.a. "shco", shifter carry out
   1718 
   1719    The calling convention for res and newC is a bit funny.  They could
   1720    be passed by value, but instead are passed by ref.
   1721 
   1722    The C (shco) value computed must be zero in bits 31:1, as the IR
   1723    optimisations for flag handling (guest_arm_spechelper) rely on
   1724    that, and the slow-path handlers (armg_calculate_flags_nzcv) assert
   1725    for it.  Same applies to all these functions that compute shco
   1726    after a shift or rotate, not just this one.
   1727 */
   1728 
   1729 static void compute_result_and_C_after_LSL_by_imm5 (
   1730                /*OUT*/HChar* buf,
   1731                IRTemp* res,
   1732                IRTemp* newC,
   1733                IRTemp rMt, UInt shift_amt, /* operands */
   1734                UInt rM      /* only for debug printing */
   1735             )
   1736 {
   1737    if (shift_amt == 0) {
   1738       if (newC) {
   1739          assign( *newC, mk_armg_calculate_flag_c() );
   1740       }
   1741       assign( *res, mkexpr(rMt) );
   1742       DIS(buf, "r%u", rM);
   1743    } else {
   1744       vassert(shift_amt >= 1 && shift_amt <= 31);
   1745       if (newC) {
   1746          assign( *newC,
   1747                  binop(Iop_And32,
   1748                        binop(Iop_Shr32, mkexpr(rMt),
   1749                                         mkU8(32 - shift_amt)),
   1750                        mkU32(1)));
   1751       }
   1752       assign( *res,
   1753               binop(Iop_Shl32, mkexpr(rMt), mkU8(shift_amt)) );
   1754       DIS(buf, "r%u, LSL #%u", rM, shift_amt);
   1755    }
   1756 }
   1757 
   1758 
   1759 static void compute_result_and_C_after_LSL_by_reg (
   1760                /*OUT*/HChar* buf,
   1761                IRTemp* res,
   1762                IRTemp* newC,
   1763                IRTemp rMt, IRTemp rSt,  /* operands */
   1764                UInt rM,    UInt rS      /* only for debug printing */
   1765             )
   1766 {
   1767    // shift left in range 0 .. 255
   1768    // amt  = rS & 255
   1769    // res  = amt < 32 ?  Rm << amt  : 0
   1770    // newC = amt == 0     ? oldC  :
   1771    //        amt in 1..32 ?  Rm[32-amt]  : 0
   1772    IRTemp amtT = newTemp(Ity_I32);
   1773    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   1774    if (newC) {
   1775       /* mux0X(amt == 0,
   1776                mux0X(amt < 32,
   1777                      0,
   1778                      Rm[(32-amt) & 31]),
   1779                oldC)
   1780       */
   1781       /* About the best you can do is pray that iropt is able
   1782          to nuke most or all of the following junk. */
   1783       IRTemp oldC = newTemp(Ity_I32);
   1784       assign(oldC, mk_armg_calculate_flag_c() );
   1785       assign(
   1786          *newC,
   1787          IRExpr_Mux0X(
   1788             unop(Iop_1Uto8,
   1789                  binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0))),
   1790             IRExpr_Mux0X(
   1791                unop(Iop_1Uto8,
   1792                     binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32))),
   1793                mkU32(0),
   1794                binop(Iop_And32,
   1795                      binop(Iop_Shr32,
   1796                            mkexpr(rMt),
   1797                            unop(Iop_32to8,
   1798                                 binop(Iop_And32,
   1799                                       binop(Iop_Sub32,
   1800                                             mkU32(32),
   1801                                             mkexpr(amtT)),
   1802                                       mkU32(31)
   1803                                 )
   1804                            )
   1805                      ),
   1806                      mkU32(1)
   1807                )
   1808             ),
   1809             mkexpr(oldC)
   1810          )
   1811       );
   1812    }
   1813    // (Rm << (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
   1814    // Lhs of the & limits the shift to 31 bits, so as to
   1815    // give known IR semantics.  Rhs of the & is all 1s for
   1816    // Rs <= 31 and all 0s for Rs >= 32.
   1817    assign(
   1818       *res,
   1819       binop(
   1820          Iop_And32,
   1821          binop(Iop_Shl32,
   1822                mkexpr(rMt),
   1823                unop(Iop_32to8,
   1824                     binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
   1825          binop(Iop_Sar32,
   1826                binop(Iop_Sub32,
   1827                      mkexpr(amtT),
   1828                      mkU32(32)),
   1829                mkU8(31))));
   1830     DIS(buf, "r%u, LSL r%u", rM, rS);
   1831 }
   1832 
   1833 
   1834 static void compute_result_and_C_after_LSR_by_imm5 (
   1835                /*OUT*/HChar* buf,
   1836                IRTemp* res,
   1837                IRTemp* newC,
   1838                IRTemp rMt, UInt shift_amt, /* operands */
   1839                UInt rM      /* only for debug printing */
   1840             )
   1841 {
   1842    if (shift_amt == 0) {
   1843       // conceptually a 32-bit shift, however:
   1844       // res  = 0
   1845       // newC = Rm[31]
   1846       if (newC) {
   1847          assign( *newC,
   1848                  binop(Iop_And32,
   1849                        binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
   1850                        mkU32(1)));
   1851       }
   1852       assign( *res, mkU32(0) );
   1853       DIS(buf, "r%u, LSR #0(a.k.a. 32)", rM);
   1854    } else {
   1855       // shift in range 1..31
   1856       // res  = Rm >>u shift_amt
   1857       // newC = Rm[shift_amt - 1]
   1858       vassert(shift_amt >= 1 && shift_amt <= 31);
   1859       if (newC) {
   1860          assign( *newC,
   1861                  binop(Iop_And32,
   1862                        binop(Iop_Shr32, mkexpr(rMt),
   1863                                         mkU8(shift_amt - 1)),
   1864                        mkU32(1)));
   1865       }
   1866       assign( *res,
   1867               binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)) );
   1868       DIS(buf, "r%u, LSR #%u", rM, shift_amt);
   1869    }
   1870 }
   1871 
   1872 
   1873 static void compute_result_and_C_after_LSR_by_reg (
   1874                /*OUT*/HChar* buf,
   1875                IRTemp* res,
   1876                IRTemp* newC,
   1877                IRTemp rMt, IRTemp rSt,  /* operands */
   1878                UInt rM,    UInt rS      /* only for debug printing */
   1879             )
   1880 {
   1881    // shift right in range 0 .. 255
   1882    // amt = rS & 255
   1883    // res  = amt < 32 ?  Rm >>u amt  : 0
   1884    // newC = amt == 0     ? oldC  :
   1885    //        amt in 1..32 ?  Rm[amt-1]  : 0
   1886    IRTemp amtT = newTemp(Ity_I32);
   1887    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   1888    if (newC) {
   1889       /* mux0X(amt == 0,
   1890                mux0X(amt < 32,
   1891                      0,
   1892                      Rm[(amt-1) & 31]),
   1893                oldC)
   1894       */
   1895       IRTemp oldC = newTemp(Ity_I32);
   1896       assign(oldC, mk_armg_calculate_flag_c() );
   1897       assign(
   1898          *newC,
   1899          IRExpr_Mux0X(
   1900             unop(Iop_1Uto8,
   1901                  binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0))),
   1902             IRExpr_Mux0X(
   1903                unop(Iop_1Uto8,
   1904                     binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32))),
   1905                mkU32(0),
   1906                binop(Iop_And32,
   1907                      binop(Iop_Shr32,
   1908                            mkexpr(rMt),
   1909                            unop(Iop_32to8,
   1910                                 binop(Iop_And32,
   1911                                       binop(Iop_Sub32,
   1912                                             mkexpr(amtT),
   1913                                             mkU32(1)),
   1914                                       mkU32(31)
   1915                                 )
   1916                            )
   1917                      ),
   1918                      mkU32(1)
   1919                )
   1920             ),
   1921             mkexpr(oldC)
   1922          )
   1923       );
   1924    }
   1925    // (Rm >>u (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
   1926    // Lhs of the & limits the shift to 31 bits, so as to
   1927    // give known IR semantics.  Rhs of the & is all 1s for
   1928    // Rs <= 31 and all 0s for Rs >= 32.
   1929    assign(
   1930       *res,
   1931       binop(
   1932          Iop_And32,
   1933          binop(Iop_Shr32,
   1934                mkexpr(rMt),
   1935                unop(Iop_32to8,
   1936                     binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
   1937          binop(Iop_Sar32,
   1938                binop(Iop_Sub32,
   1939                      mkexpr(amtT),
   1940                      mkU32(32)),
   1941                mkU8(31))));
   1942     DIS(buf, "r%u, LSR r%u", rM, rS);
   1943 }
   1944 
   1945 
   1946 static void compute_result_and_C_after_ASR_by_imm5 (
   1947                /*OUT*/HChar* buf,
   1948                IRTemp* res,
   1949                IRTemp* newC,
   1950                IRTemp rMt, UInt shift_amt, /* operands */
   1951                UInt rM      /* only for debug printing */
   1952             )
   1953 {
   1954    if (shift_amt == 0) {
   1955       // conceptually a 32-bit shift, however:
   1956       // res  = Rm >>s 31
   1957       // newC = Rm[31]
   1958       if (newC) {
   1959          assign( *newC,
   1960                  binop(Iop_And32,
   1961                        binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
   1962                        mkU32(1)));
   1963       }
   1964       assign( *res, binop(Iop_Sar32, mkexpr(rMt), mkU8(31)) );
   1965       DIS(buf, "r%u, ASR #0(a.k.a. 32)", rM);
   1966    } else {
   1967       // shift in range 1..31
   1968       // res = Rm >>s shift_amt
   1969       // newC = Rm[shift_amt - 1]
   1970       vassert(shift_amt >= 1 && shift_amt <= 31);
   1971       if (newC) {
   1972          assign( *newC,
   1973                  binop(Iop_And32,
   1974                        binop(Iop_Shr32, mkexpr(rMt),
   1975                                         mkU8(shift_amt - 1)),
   1976                        mkU32(1)));
   1977       }
   1978       assign( *res,
   1979               binop(Iop_Sar32, mkexpr(rMt), mkU8(shift_amt)) );
   1980       DIS(buf, "r%u, ASR #%u", rM, shift_amt);
   1981    }
   1982 }
   1983 
   1984 
   1985 static void compute_result_and_C_after_ASR_by_reg (
   1986                /*OUT*/HChar* buf,
   1987                IRTemp* res,
   1988                IRTemp* newC,
   1989                IRTemp rMt, IRTemp rSt,  /* operands */
   1990                UInt rM,    UInt rS      /* only for debug printing */
   1991             )
   1992 {
   1993    // arithmetic shift right in range 0 .. 255
   1994    // amt = rS & 255
   1995    // res  = amt < 32 ?  Rm >>s amt  : Rm >>s 31
   1996    // newC = amt == 0     ? oldC  :
   1997    //        amt in 1..32 ?  Rm[amt-1]  : Rm[31]
   1998    IRTemp amtT = newTemp(Ity_I32);
   1999    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   2000    if (newC) {
   2001       /* mux0X(amt == 0,
   2002                mux0X(amt < 32,
   2003                      Rm[31],
   2004                      Rm[(amt-1) & 31])
   2005                oldC)
   2006       */
   2007       IRTemp oldC = newTemp(Ity_I32);
   2008       assign(oldC, mk_armg_calculate_flag_c() );
   2009       assign(
   2010          *newC,
   2011          IRExpr_Mux0X(
   2012             unop(Iop_1Uto8,
   2013                  binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0))),
   2014             IRExpr_Mux0X(
   2015                unop(Iop_1Uto8,
   2016                     binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32))),
   2017                binop(Iop_And32,
   2018                      binop(Iop_Shr32,
   2019                            mkexpr(rMt),
   2020                            mkU8(31)
   2021                      ),
   2022                      mkU32(1)
   2023                ),
   2024                binop(Iop_And32,
   2025                      binop(Iop_Shr32,
   2026                            mkexpr(rMt),
   2027                            unop(Iop_32to8,
   2028                                 binop(Iop_And32,
   2029                                       binop(Iop_Sub32,
   2030                                             mkexpr(amtT),
   2031                                             mkU32(1)),
   2032                                       mkU32(31)
   2033                                 )
   2034                            )
   2035                      ),
   2036                      mkU32(1)
   2037                )
   2038             ),
   2039             mkexpr(oldC)
   2040          )
   2041       );
   2042    }
   2043    // (Rm >>s (amt <u 32 ? amt : 31))
   2044    assign(
   2045       *res,
   2046       binop(
   2047          Iop_Sar32,
   2048          mkexpr(rMt),
   2049          unop(
   2050             Iop_32to8,
   2051             IRExpr_Mux0X(
   2052                unop(
   2053                  Iop_1Uto8,
   2054                  binop(Iop_CmpLT32U, mkexpr(amtT), mkU32(32))),
   2055                mkU32(31),
   2056                mkexpr(amtT)))));
   2057     DIS(buf, "r%u, ASR r%u", rM, rS);
   2058 }
   2059 
   2060 
   2061 static void compute_result_and_C_after_ROR_by_reg (
   2062                /*OUT*/HChar* buf,
   2063                IRTemp* res,
   2064                IRTemp* newC,
   2065                IRTemp rMt, IRTemp rSt,  /* operands */
   2066                UInt rM,    UInt rS      /* only for debug printing */
   2067             )
   2068 {
   2069    // rotate right in range 0 .. 255
   2070    // amt = rS & 255
   2071    // shop =  Rm `ror` (amt & 31)
   2072    // shco =  amt == 0 ? oldC : Rm[(amt-1) & 31]
   2073    IRTemp amtT = newTemp(Ity_I32);
   2074    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   2075    IRTemp amt5T = newTemp(Ity_I32);
   2076    assign( amt5T, binop(Iop_And32, mkexpr(rSt), mkU32(31)) );
   2077    IRTemp oldC = newTemp(Ity_I32);
   2078    assign(oldC, mk_armg_calculate_flag_c() );
   2079    if (newC) {
   2080       assign(
   2081          *newC,
   2082          IRExpr_Mux0X(
   2083             unop(Iop_32to8, mkexpr(amtT)),
   2084             mkexpr(oldC),
   2085             binop(Iop_And32,
   2086                   binop(Iop_Shr32,
   2087                         mkexpr(rMt),
   2088                         unop(Iop_32to8,
   2089                              binop(Iop_And32,
   2090                                    binop(Iop_Sub32,
   2091                                          mkexpr(amtT),
   2092                                          mkU32(1)
   2093                                    ),
   2094                                    mkU32(31)
   2095                              )
   2096                         )
   2097                   ),
   2098                   mkU32(1)
   2099             )
   2100          )
   2101       );
   2102    }
   2103    assign(
   2104       *res,
   2105       IRExpr_Mux0X(
   2106          unop(Iop_32to8, mkexpr(amt5T)), mkexpr(rMt),
   2107          binop(Iop_Or32,
   2108                binop(Iop_Shr32,
   2109                      mkexpr(rMt),
   2110                      unop(Iop_32to8, mkexpr(amt5T))
   2111                ),
   2112                binop(Iop_Shl32,
   2113                      mkexpr(rMt),
   2114                      unop(Iop_32to8,
   2115                           binop(Iop_Sub32, mkU32(32), mkexpr(amt5T))
   2116                      )
   2117                )
   2118          )
   2119       )
   2120    );
   2121    DIS(buf, "r%u, ROR r#%u", rM, rS);
   2122 }
   2123 
   2124 
   2125 /* Generate an expression corresponding to the immediate-shift case of
   2126    a shifter operand.  This is used both for ARM and Thumb2.
   2127 
   2128    Bind it to a temporary, and return that via *res.  If newC is
   2129    non-NULL, also compute a value for the shifter's carry out (in the
   2130    LSB of a word), bind it to a temporary, and return that via *shco.
   2131 
   2132    Generates GETs from the guest state and is therefore not safe to
   2133    use once we start doing PUTs to it, for any given instruction.
   2134 
   2135    'how' is encoded thusly:
   2136       00b LSL,  01b LSR,  10b ASR,  11b ROR
   2137    Most but not all ARM and Thumb integer insns use this encoding.
   2138    Be careful to ensure the right value is passed here.
   2139 */
   2140 static void compute_result_and_C_after_shift_by_imm5 (
   2141                /*OUT*/HChar* buf,
   2142                /*OUT*/IRTemp* res,
   2143                /*OUT*/IRTemp* newC,
   2144                IRTemp  rMt,       /* reg to shift */
   2145                UInt    how,       /* what kind of shift */
   2146                UInt    shift_amt, /* shift amount (0..31) */
   2147                UInt    rM         /* only for debug printing */
   2148             )
   2149 {
   2150    vassert(shift_amt < 32);
   2151    vassert(how < 4);
   2152 
   2153    switch (how) {
   2154 
   2155       case 0:
   2156          compute_result_and_C_after_LSL_by_imm5(
   2157             buf, res, newC, rMt, shift_amt, rM
   2158          );
   2159          break;
   2160 
   2161       case 1:
   2162          compute_result_and_C_after_LSR_by_imm5(
   2163             buf, res, newC, rMt, shift_amt, rM
   2164          );
   2165          break;
   2166 
   2167       case 2:
   2168          compute_result_and_C_after_ASR_by_imm5(
   2169             buf, res, newC, rMt, shift_amt, rM
   2170          );
   2171          break;
   2172 
   2173       case 3:
   2174          if (shift_amt == 0) {
   2175             IRTemp oldcT = newTemp(Ity_I32);
   2176             // rotate right 1 bit through carry (?)
   2177             // RRX -- described at ARM ARM A5-17
   2178             // res  = (oldC << 31) | (Rm >>u 1)
   2179             // newC = Rm[0]
   2180             if (newC) {
   2181                assign( *newC,
   2182                        binop(Iop_And32, mkexpr(rMt), mkU32(1)));
   2183             }
   2184             assign( oldcT, mk_armg_calculate_flag_c() );
   2185             assign( *res,
   2186                     binop(Iop_Or32,
   2187                           binop(Iop_Shl32, mkexpr(oldcT), mkU8(31)),
   2188                           binop(Iop_Shr32, mkexpr(rMt), mkU8(1))) );
   2189             DIS(buf, "r%u, RRX", rM);
   2190          } else {
   2191             // rotate right in range 1..31
   2192             // res  = Rm `ror` shift_amt
   2193             // newC = Rm[shift_amt - 1]
   2194             vassert(shift_amt >= 1 && shift_amt <= 31);
   2195             if (newC) {
   2196                assign( *newC,
   2197                        binop(Iop_And32,
   2198                              binop(Iop_Shr32, mkexpr(rMt),
   2199                                               mkU8(shift_amt - 1)),
   2200                              mkU32(1)));
   2201             }
   2202             assign( *res,
   2203                     binop(Iop_Or32,
   2204                           binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)),
   2205                           binop(Iop_Shl32, mkexpr(rMt),
   2206                                            mkU8(32-shift_amt))));
   2207             DIS(buf, "r%u, ROR #%u", rM, shift_amt);
   2208          }
   2209          break;
   2210 
   2211       default:
   2212          /*NOTREACHED*/
   2213          vassert(0);
   2214    }
   2215 }
   2216 
   2217 
   2218 /* Generate an expression corresponding to the register-shift case of
   2219    a shifter operand.  This is used both for ARM and Thumb2.
   2220 
   2221    Bind it to a temporary, and return that via *res.  If newC is
   2222    non-NULL, also compute a value for the shifter's carry out (in the
   2223    LSB of a word), bind it to a temporary, and return that via *shco.
   2224 
   2225    Generates GETs from the guest state and is therefore not safe to
   2226    use once we start doing PUTs to it, for any given instruction.
   2227 
   2228    'how' is encoded thusly:
   2229       00b LSL,  01b LSR,  10b ASR,  11b ROR
   2230    Most but not all ARM and Thumb integer insns use this encoding.
   2231    Be careful to ensure the right value is passed here.
   2232 */
   2233 static void compute_result_and_C_after_shift_by_reg (
   2234                /*OUT*/HChar*  buf,
   2235                /*OUT*/IRTemp* res,
   2236                /*OUT*/IRTemp* newC,
   2237                IRTemp  rMt,       /* reg to shift */
   2238                UInt    how,       /* what kind of shift */
   2239                IRTemp  rSt,       /* shift amount */
   2240                UInt    rM,        /* only for debug printing */
   2241                UInt    rS         /* only for debug printing */
   2242             )
   2243 {
   2244    vassert(how < 4);
   2245    switch (how) {
   2246       case 0: { /* LSL */
   2247          compute_result_and_C_after_LSL_by_reg(
   2248             buf, res, newC, rMt, rSt, rM, rS
   2249          );
   2250          break;
   2251       }
   2252       case 1: { /* LSR */
   2253          compute_result_and_C_after_LSR_by_reg(
   2254             buf, res, newC, rMt, rSt, rM, rS
   2255          );
   2256          break;
   2257       }
   2258       case 2: { /* ASR */
   2259          compute_result_and_C_after_ASR_by_reg(
   2260             buf, res, newC, rMt, rSt, rM, rS
   2261          );
   2262          break;
   2263       }
   2264       case 3: { /* ROR */
   2265          compute_result_and_C_after_ROR_by_reg(
   2266              buf, res, newC, rMt, rSt, rM, rS
   2267          );
   2268          break;
   2269       }
   2270       default:
   2271          /*NOTREACHED*/
   2272          vassert(0);
   2273    }
   2274 }
   2275 
   2276 
   2277 /* Generate an expression corresponding to a shifter_operand, bind it
   2278    to a temporary, and return that via *shop.  If shco is non-NULL,
   2279    also compute a value for the shifter's carry out (in the LSB of a
   2280    word), bind it to a temporary, and return that via *shco.
   2281 
   2282    If for some reason we can't come up with a shifter operand (missing
   2283    case?  not really a shifter operand?) return False.
   2284 
   2285    Generates GETs from the guest state and is therefore not safe to
   2286    use once we start doing PUTs to it, for any given instruction.
   2287 
   2288    For ARM insns only; not for Thumb.
   2289 */
   2290 static Bool mk_shifter_operand ( UInt insn_25, UInt insn_11_0,
   2291                                  /*OUT*/IRTemp* shop,
   2292                                  /*OUT*/IRTemp* shco,
   2293                                  /*OUT*/HChar* buf )
   2294 {
   2295    UInt insn_4 = (insn_11_0 >> 4) & 1;
   2296    UInt insn_7 = (insn_11_0 >> 7) & 1;
   2297    vassert(insn_25 <= 0x1);
   2298    vassert(insn_11_0 <= 0xFFF);
   2299 
   2300    vassert(shop && *shop == IRTemp_INVALID);
   2301    *shop = newTemp(Ity_I32);
   2302 
   2303    if (shco) {
   2304       vassert(*shco == IRTemp_INVALID);
   2305       *shco = newTemp(Ity_I32);
   2306    }
   2307 
   2308    /* 32-bit immediate */
   2309 
   2310    if (insn_25 == 1) {
   2311       /* immediate: (7:0) rotated right by 2 * (11:8) */
   2312       UInt imm = (insn_11_0 >> 0) & 0xFF;
   2313       UInt rot = 2 * ((insn_11_0 >> 8) & 0xF);
   2314       vassert(rot <= 30);
   2315       imm = ROR32(imm, rot);
   2316       if (shco) {
   2317          if (rot == 0) {
   2318             assign( *shco, mk_armg_calculate_flag_c() );
   2319          } else {
   2320             assign( *shco, mkU32( (imm >> 31) & 1 ) );
   2321          }
   2322       }
   2323       DIS(buf, "#0x%x", imm);
   2324       assign( *shop, mkU32(imm) );
   2325       return True;
   2326    }
   2327 
   2328    /* Shift/rotate by immediate */
   2329 
   2330    if (insn_25 == 0 && insn_4 == 0) {
   2331       /* Rm (3:0) shifted (6:5) by immediate (11:7) */
   2332       UInt shift_amt = (insn_11_0 >> 7) & 0x1F;
   2333       UInt rM        = (insn_11_0 >> 0) & 0xF;
   2334       UInt how       = (insn_11_0 >> 5) & 3;
   2335       /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
   2336       IRTemp rMt = newTemp(Ity_I32);
   2337       assign(rMt, getIRegA(rM));
   2338 
   2339       vassert(shift_amt <= 31);
   2340 
   2341       compute_result_and_C_after_shift_by_imm5(
   2342          buf, shop, shco, rMt, how, shift_amt, rM
   2343       );
   2344       return True;
   2345    }
   2346 
   2347    /* Shift/rotate by register */
   2348    if (insn_25 == 0 && insn_4 == 1) {
   2349       /* Rm (3:0) shifted (6:5) by Rs (11:8) */
   2350       UInt rM  = (insn_11_0 >> 0) & 0xF;
   2351       UInt rS  = (insn_11_0 >> 8) & 0xF;
   2352       UInt how = (insn_11_0 >> 5) & 3;
   2353       /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
   2354       IRTemp rMt = newTemp(Ity_I32);
   2355       IRTemp rSt = newTemp(Ity_I32);
   2356 
   2357       if (insn_7 == 1)
   2358          return False; /* not really a shifter operand */
   2359 
   2360       assign(rMt, getIRegA(rM));
   2361       assign(rSt, getIRegA(rS));
   2362 
   2363       compute_result_and_C_after_shift_by_reg(
   2364          buf, shop, shco, rMt, how, rSt, rM, rS
   2365       );
   2366       return True;
   2367    }
   2368 
   2369    vex_printf("mk_shifter_operand(0x%x,0x%x)\n", insn_25, insn_11_0 );
   2370    return False;
   2371 }
   2372 
   2373 
   2374 /* ARM only */
   2375 static
   2376 IRExpr* mk_EA_reg_plusminus_imm12 ( UInt rN, UInt bU, UInt imm12,
   2377                                     /*OUT*/HChar* buf )
   2378 {
   2379    vassert(rN < 16);
   2380    vassert(bU < 2);
   2381    vassert(imm12 < 0x1000);
   2382    UChar opChar = bU == 1 ? '+' : '-';
   2383    DIS(buf, "[r%u, #%c%u]", rN, opChar, imm12);
   2384    return
   2385       binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
   2386              getIRegA(rN),
   2387              mkU32(imm12) );
   2388 }
   2389 
   2390 
   2391 /* ARM only.
   2392    NB: This is "DecodeImmShift" in newer versions of the the ARM ARM.
   2393 */
   2394 static
   2395 IRExpr* mk_EA_reg_plusminus_shifted_reg ( UInt rN, UInt bU, UInt rM,
   2396                                           UInt sh2, UInt imm5,
   2397                                           /*OUT*/HChar* buf )
   2398 {
   2399    vassert(rN < 16);
   2400    vassert(bU < 2);
   2401    vassert(rM < 16);
   2402    vassert(sh2 < 4);
   2403    vassert(imm5 < 32);
   2404    UChar   opChar = bU == 1 ? '+' : '-';
   2405    IRExpr* index  = NULL;
   2406    switch (sh2) {
   2407       case 0: /* LSL */
   2408          /* imm5 can be in the range 0 .. 31 inclusive. */
   2409          index = binop(Iop_Shl32, getIRegA(rM), mkU8(imm5));
   2410          DIS(buf, "[r%u, %c r%u LSL #%u]", rN, opChar, rM, imm5);
   2411          break;
   2412       case 1: /* LSR */
   2413          if (imm5 == 0) {
   2414             index = mkU32(0);
   2415             vassert(0); // ATC
   2416          } else {
   2417             index = binop(Iop_Shr32, getIRegA(rM), mkU8(imm5));
   2418          }
   2419          DIS(buf, "[r%u, %cr%u, LSR #%u]",
   2420                   rN, opChar, rM, imm5 == 0 ? 32 : imm5);
   2421          break;
   2422       case 2: /* ASR */
   2423          /* Doesn't this just mean that the behaviour with imm5 == 0
   2424             is the same as if it had been 31 ? */
   2425          if (imm5 == 0) {
   2426             index = binop(Iop_Sar32, getIRegA(rM), mkU8(31));
   2427             vassert(0); // ATC
   2428          } else {
   2429             index = binop(Iop_Sar32, getIRegA(rM), mkU8(imm5));
   2430          }
   2431          DIS(buf, "[r%u, %cr%u, ASR #%u]",
   2432                   rN, opChar, rM, imm5 == 0 ? 32 : imm5);
   2433          break;
   2434       case 3: /* ROR or RRX */
   2435          if (imm5 == 0) {
   2436             IRTemp rmT    = newTemp(Ity_I32);
   2437             IRTemp cflagT = newTemp(Ity_I32);
   2438             assign(rmT, getIRegA(rM));
   2439             assign(cflagT, mk_armg_calculate_flag_c());
   2440             index = binop(Iop_Or32,
   2441                           binop(Iop_Shl32, mkexpr(cflagT), mkU8(31)),
   2442                           binop(Iop_Shr32, mkexpr(rmT), mkU8(1)));
   2443             DIS(buf, "[r%u, %cr%u, RRX]", rN, opChar, rM);
   2444          } else {
   2445             IRTemp rmT = newTemp(Ity_I32);
   2446             assign(rmT, getIRegA(rM));
   2447             vassert(imm5 >= 1 && imm5 <= 31);
   2448             index = binop(Iop_Or32,
   2449                           binop(Iop_Shl32, mkexpr(rmT), mkU8(32-imm5)),
   2450                           binop(Iop_Shr32, mkexpr(rmT), mkU8(imm5)));
   2451             DIS(buf, "[r%u, %cr%u, ROR #%u]", rN, opChar, rM, imm5);
   2452          }
   2453          break;
   2454       default:
   2455          vassert(0);
   2456    }
   2457    vassert(index);
   2458    return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
   2459                 getIRegA(rN), index);
   2460 }
   2461 
   2462 
   2463 /* ARM only */
   2464 static
   2465 IRExpr* mk_EA_reg_plusminus_imm8 ( UInt rN, UInt bU, UInt imm8,
   2466                                    /*OUT*/HChar* buf )
   2467 {
   2468    vassert(rN < 16);
   2469    vassert(bU < 2);
   2470    vassert(imm8 < 0x100);
   2471    UChar opChar = bU == 1 ? '+' : '-';
   2472    DIS(buf, "[r%u, #%c%u]", rN, opChar, imm8);
   2473    return
   2474       binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
   2475              getIRegA(rN),
   2476              mkU32(imm8) );
   2477 }
   2478 
   2479 
   2480 /* ARM only */
   2481 static
   2482 IRExpr* mk_EA_reg_plusminus_reg ( UInt rN, UInt bU, UInt rM,
   2483                                   /*OUT*/HChar* buf )
   2484 {
   2485    vassert(rN < 16);
   2486    vassert(bU < 2);
   2487    vassert(rM < 16);
   2488    UChar   opChar = bU == 1 ? '+' : '-';
   2489    IRExpr* index  = getIRegA(rM);
   2490    DIS(buf, "[r%u, %c r%u]", rN, opChar, rM);
   2491    return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
   2492                 getIRegA(rN), index);
   2493 }
   2494 
   2495 
   2496 /* irRes :: Ity_I32 holds a floating point comparison result encoded
   2497    as an IRCmpF64Result.  Generate code to convert it to an
   2498    ARM-encoded (N,Z,C,V) group in the lowest 4 bits of an I32 value.
   2499    Assign a new temp to hold that value, and return the temp. */
   2500 static
   2501 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes )
   2502 {
   2503    IRTemp ix       = newTemp(Ity_I32);
   2504    IRTemp termL    = newTemp(Ity_I32);
   2505    IRTemp termR    = newTemp(Ity_I32);
   2506    IRTemp nzcv     = newTemp(Ity_I32);
   2507 
   2508    /* This is where the fun starts.  We have to convert 'irRes' from
   2509       an IR-convention return result (IRCmpF64Result) to an
   2510       ARM-encoded (N,Z,C,V) group.  The final result is in the bottom
   2511       4 bits of 'nzcv'. */
   2512    /* Map compare result from IR to ARM(nzcv) */
   2513    /*
   2514       FP cmp result | IR   | ARM(nzcv)
   2515       --------------------------------
   2516       UN              0x45   0011
   2517       LT              0x01   1000
   2518       GT              0x00   0010
   2519       EQ              0x40   0110
   2520    */
   2521    /* Now since you're probably wondering WTF ..
   2522 
   2523       ix fishes the useful bits out of the IR value, bits 6 and 0, and
   2524       places them side by side, giving a number which is 0, 1, 2 or 3.
   2525 
   2526       termL is a sequence cooked up by GNU superopt.  It converts ix
   2527          into an almost correct value NZCV value (incredibly), except
   2528          for the case of UN, where it produces 0100 instead of the
   2529          required 0011.
   2530 
   2531       termR is therefore a correction term, also computed from ix.  It
   2532          is 1 in the UN case and 0 for LT, GT and UN.  Hence, to get
   2533          the final correct value, we subtract termR from termL.
   2534 
   2535       Don't take my word for it.  There's a test program at the bottom
   2536       of this file, to try this out with.
   2537    */
   2538    assign(
   2539       ix,
   2540       binop(Iop_Or32,
   2541             binop(Iop_And32,
   2542                   binop(Iop_Shr32, mkexpr(irRes), mkU8(5)),
   2543                   mkU32(3)),
   2544             binop(Iop_And32, mkexpr(irRes), mkU32(1))));
   2545 
   2546    assign(
   2547       termL,
   2548       binop(Iop_Add32,
   2549             binop(Iop_Shr32,
   2550                   binop(Iop_Sub32,
   2551                         binop(Iop_Shl32,
   2552                               binop(Iop_Xor32, mkexpr(ix), mkU32(1)),
   2553                               mkU8(30)),
   2554                         mkU32(1)),
   2555                   mkU8(29)),
   2556             mkU32(1)));
   2557 
   2558    assign(
   2559       termR,
   2560       binop(Iop_And32,
   2561             binop(Iop_And32,
   2562                   mkexpr(ix),
   2563                   binop(Iop_Shr32, mkexpr(ix), mkU8(1))),
   2564             mkU32(1)));
   2565 
   2566    assign(nzcv, binop(Iop_Sub32, mkexpr(termL), mkexpr(termR)));
   2567    return nzcv;
   2568 }
   2569 
   2570 
   2571 /* Thumb32 only.  This is "ThumbExpandImm" in the ARM ARM.  If
   2572    updatesC is non-NULL, a boolean is written to it indicating whether
   2573    or not the C flag is updated, as per ARM ARM "ThumbExpandImm_C".
   2574 */
   2575 static UInt thumbExpandImm ( Bool* updatesC,
   2576                              UInt imm1, UInt imm3, UInt imm8 )
   2577 {
   2578    vassert(imm1 < (1<<1));
   2579    vassert(imm3 < (1<<3));
   2580    vassert(imm8 < (1<<8));
   2581    UInt i_imm3_a = (imm1 << 4) | (imm3 << 1) | ((imm8 >> 7) & 1);
   2582    UInt abcdefgh = imm8;
   2583    UInt lbcdefgh = imm8 | 0x80;
   2584    if (updatesC) {
   2585       *updatesC = i_imm3_a >= 8;
   2586    }
   2587    switch (i_imm3_a) {
   2588       case 0: case 1:
   2589          return abcdefgh;
   2590       case 2: case 3:
   2591          return (abcdefgh << 16) | abcdefgh;
   2592       case 4: case 5:
   2593          return (abcdefgh << 24) | (abcdefgh << 8);
   2594       case 6: case 7:
   2595          return (abcdefgh << 24) | (abcdefgh << 16)
   2596                 | (abcdefgh << 8) | abcdefgh;
   2597       case 8 ... 31:
   2598          return lbcdefgh << (32 - i_imm3_a);
   2599       default:
   2600          break;
   2601    }
   2602    /*NOTREACHED*/vassert(0);
   2603 }
   2604 
   2605 
   2606 /* Version of thumbExpandImm where we simply feed it the
   2607    instruction halfwords (the lowest addressed one is I0). */
   2608 static UInt thumbExpandImm_from_I0_I1 ( Bool* updatesC,
   2609                                         UShort i0s, UShort i1s )
   2610 {
   2611    UInt i0    = (UInt)i0s;
   2612    UInt i1    = (UInt)i1s;
   2613    UInt imm1  = SLICE_UInt(i0,10,10);
   2614    UInt imm3  = SLICE_UInt(i1,14,12);
   2615    UInt imm8  = SLICE_UInt(i1,7,0);
   2616    return thumbExpandImm(updatesC, imm1, imm3, imm8);
   2617 }
   2618 
   2619 
   2620 /* Thumb16 only.  Given the firstcond and mask fields from an IT
   2621    instruction, compute the 32-bit ITSTATE value implied, as described
   2622    in libvex_guest_arm.h.  This is not the ARM ARM representation.
   2623    Also produce the t/e chars for the 2nd, 3rd, 4th insns, for
   2624    disassembly printing.  Returns False if firstcond or mask
   2625    denote something invalid.
   2626 
   2627    The number and conditions for the instructions to be
   2628    conditionalised depend on firstcond and mask:
   2629 
   2630    mask      cond 1    cond 2      cond 3      cond 4
   2631 
   2632    1000      fc[3:0]
   2633    x100      fc[3:0]   fc[3:1]:x
   2634    xy10      fc[3:0]   fc[3:1]:x   fc[3:1]:y
   2635    xyz1      fc[3:0]   fc[3:1]:x   fc[3:1]:y   fc[3:1]:z
   2636 
   2637    The condition fields are assembled in *itstate backwards (cond 4 at
   2638    the top, cond 1 at the bottom).  Conditions are << 4'd and then
   2639    ^0xE'd, and those fields that correspond to instructions in the IT
   2640    block are tagged with a 1 bit.
   2641 */
   2642 static Bool compute_ITSTATE ( /*OUT*/UInt*  itstate,
   2643                               /*OUT*/UChar* ch1,
   2644                               /*OUT*/UChar* ch2,
   2645                               /*OUT*/UChar* ch3,
   2646                               UInt firstcond, UInt mask )
   2647 {
   2648    vassert(firstcond <= 0xF);
   2649    vassert(mask <= 0xF);
   2650    *itstate = 0;
   2651    *ch1 = *ch2 = *ch3 = '.';
   2652    if (mask == 0)
   2653       return False; /* the logic below actually ensures this anyway,
   2654                        but clearer to make it explicit. */
   2655    if (firstcond == 0xF)
   2656       return False; /* NV is not allowed */
   2657    if (firstcond == 0xE && popcount32(mask) != 1)
   2658       return False; /* if firstcond is AL then all the rest must be too */
   2659 
   2660    UInt m3 = (mask >> 3) & 1;
   2661    UInt m2 = (mask >> 2) & 1;
   2662    UInt m1 = (mask >> 1) & 1;
   2663    UInt m0 = (mask >> 0) & 1;
   2664 
   2665    UInt fc = (firstcond << 4) | 1/*in-IT-block*/;
   2666    UInt ni = (0xE/*AL*/ << 4) | 0/*not-in-IT-block*/;
   2667 
   2668    if (m3 == 1 && (m2|m1|m0) == 0) {
   2669       *itstate = (ni << 24) | (ni << 16) | (ni << 8) | fc;
   2670       *itstate ^= 0xE0E0E0E0;
   2671       return True;
   2672    }
   2673 
   2674    if (m2 == 1 && (m1|m0) == 0) {
   2675       *itstate = (ni << 24) | (ni << 16) | (setbit32(fc, 4, m3) << 8) | fc;
   2676       *itstate ^= 0xE0E0E0E0;
   2677       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
   2678       return True;
   2679    }
   2680 
   2681    if (m1 == 1 && m0 == 0) {
   2682       *itstate = (ni << 24)
   2683                  | (setbit32(fc, 4, m2) << 16)
   2684                  | (setbit32(fc, 4, m3) << 8) | fc;
   2685       *itstate ^= 0xE0E0E0E0;
   2686       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
   2687       *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
   2688       return True;
   2689    }
   2690 
   2691    if (m0 == 1) {
   2692       *itstate = (setbit32(fc, 4, m1) << 24)
   2693                  | (setbit32(fc, 4, m2) << 16)
   2694                  | (setbit32(fc, 4, m3) << 8) | fc;
   2695       *itstate ^= 0xE0E0E0E0;
   2696       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
   2697       *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
   2698       *ch3 = m1 == (firstcond & 1) ? 't' : 'e';
   2699       return True;
   2700    }
   2701 
   2702    return False;
   2703 }
   2704 
   2705 
   2706 /* Generate IR to do 32-bit bit reversal, a la Hacker's Delight
   2707    Chapter 7 Section 1. */
   2708 static IRTemp gen_BITREV ( IRTemp x0 )
   2709 {
   2710    IRTemp x1 = newTemp(Ity_I32);
   2711    IRTemp x2 = newTemp(Ity_I32);
   2712    IRTemp x3 = newTemp(Ity_I32);
   2713    IRTemp x4 = newTemp(Ity_I32);
   2714    IRTemp x5 = newTemp(Ity_I32);
   2715    UInt   c1 = 0x55555555;
   2716    UInt   c2 = 0x33333333;
   2717    UInt   c3 = 0x0F0F0F0F;
   2718    UInt   c4 = 0x00FF00FF;
   2719    UInt   c5 = 0x0000FFFF;
   2720    assign(x1,
   2721           binop(Iop_Or32,
   2722                 binop(Iop_Shl32,
   2723                       binop(Iop_And32, mkexpr(x0), mkU32(c1)),
   2724                       mkU8(1)),
   2725                 binop(Iop_Shr32,
   2726                       binop(Iop_And32, mkexpr(x0), mkU32(~c1)),
   2727                       mkU8(1))
   2728    ));
   2729    assign(x2,
   2730           binop(Iop_Or32,
   2731                 binop(Iop_Shl32,
   2732                       binop(Iop_And32, mkexpr(x1), mkU32(c2)),
   2733                       mkU8(2)),
   2734                 binop(Iop_Shr32,
   2735                       binop(Iop_And32, mkexpr(x1), mkU32(~c2)),
   2736                       mkU8(2))
   2737    ));
   2738    assign(x3,
   2739           binop(Iop_Or32,
   2740                 binop(Iop_Shl32,
   2741                       binop(Iop_And32, mkexpr(x2), mkU32(c3)),
   2742                       mkU8(4)),
   2743                 binop(Iop_Shr32,
   2744                       binop(Iop_And32, mkexpr(x2), mkU32(~c3)),
   2745                       mkU8(4))
   2746    ));
   2747    assign(x4,
   2748           binop(Iop_Or32,
   2749                 binop(Iop_Shl32,
   2750                       binop(Iop_And32, mkexpr(x3), mkU32(c4)),
   2751                       mkU8(8)),
   2752                 binop(Iop_Shr32,
   2753                       binop(Iop_And32, mkexpr(x3), mkU32(~c4)),
   2754                       mkU8(8))
   2755    ));
   2756    assign(x5,
   2757           binop(Iop_Or32,
   2758                 binop(Iop_Shl32,
   2759                       binop(Iop_And32, mkexpr(x4), mkU32(c5)),
   2760                       mkU8(16)),
   2761                 binop(Iop_Shr32,
   2762                       binop(Iop_And32, mkexpr(x4), mkU32(~c5)),
   2763                       mkU8(16))
   2764    ));
   2765    return x5;
   2766 }
   2767 
   2768 
   2769 /* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
   2770    0:1:2:3 (aka byte-swap). */
   2771 static IRTemp gen_REV ( IRTemp arg )
   2772 {
   2773    IRTemp res = newTemp(Ity_I32);
   2774    assign(res,
   2775           binop(Iop_Or32,
   2776                 binop(Iop_Shl32, mkexpr(arg), mkU8(24)),
   2777           binop(Iop_Or32,
   2778                 binop(Iop_And32, binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
   2779                                  mkU32(0x00FF0000)),
   2780           binop(Iop_Or32,
   2781                 binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
   2782                                        mkU32(0x0000FF00)),
   2783                 binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(24)),
   2784                                        mkU32(0x000000FF) )
   2785    ))));
   2786    return res;
   2787 }
   2788 
   2789 
   2790 /* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
   2791    2:3:0:1 (swap within lo and hi halves). */
   2792 static IRTemp gen_REV16 ( IRTemp arg )
   2793 {
   2794    IRTemp res = newTemp(Ity_I32);
   2795    assign(res,
   2796           binop(Iop_Or32,
   2797                 binop(Iop_And32,
   2798                       binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
   2799                       mkU32(0xFF00FF00)),
   2800                 binop(Iop_And32,
   2801                       binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
   2802                       mkU32(0x00FF00FF))));
   2803    return res;
   2804 }
   2805 
   2806 
   2807 /*------------------------------------------------------------*/
   2808 /*--- Advanced SIMD (NEON) instructions                    ---*/
   2809 /*------------------------------------------------------------*/
   2810 
   2811 /*------------------------------------------------------------*/
   2812 /*--- NEON data processing                                 ---*/
   2813 /*------------------------------------------------------------*/
   2814 
   2815 /* For all NEON DP ops, we use the normal scheme to handle conditional
   2816    writes to registers -- pass in condT and hand that on to the
   2817    put*Reg functions.  In ARM mode condT is always IRTemp_INVALID
   2818    since NEON is unconditional for ARM.  In Thumb mode condT is
   2819    derived from the ITSTATE shift register in the normal way. */
   2820 
   2821 static
   2822 UInt get_neon_d_regno(UInt theInstr)
   2823 {
   2824    UInt x = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
   2825    if (theInstr & 0x40) {
   2826       if (x & 1) {
   2827          x = x + 0x100;
   2828       } else {
   2829          x = x >> 1;
   2830       }
   2831    }
   2832    return x;
   2833 }
   2834 
   2835 static
   2836 UInt get_neon_n_regno(UInt theInstr)
   2837 {
   2838    UInt x = ((theInstr >> 3) & 0x10) | ((theInstr >> 16) & 0xF);
   2839    if (theInstr & 0x40) {
   2840       if (x & 1) {
   2841          x = x + 0x100;
   2842       } else {
   2843          x = x >> 1;
   2844       }
   2845    }
   2846    return x;
   2847 }
   2848 
   2849 static
   2850 UInt get_neon_m_regno(UInt theInstr)
   2851 {
   2852    UInt x = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
   2853    if (theInstr & 0x40) {
   2854       if (x & 1) {
   2855          x = x + 0x100;
   2856       } else {
   2857          x = x >> 1;
   2858       }
   2859    }
   2860    return x;
   2861 }
   2862 
   2863 static
   2864 Bool dis_neon_vext ( UInt theInstr, IRTemp condT )
   2865 {
   2866    UInt dreg = get_neon_d_regno(theInstr);
   2867    UInt mreg = get_neon_m_regno(theInstr);
   2868    UInt nreg = get_neon_n_regno(theInstr);
   2869    UInt imm4 = (theInstr >> 8) & 0xf;
   2870    UInt Q = (theInstr >> 6) & 1;
   2871    HChar reg_t = Q ? 'q' : 'd';
   2872 
   2873    if (Q) {
   2874       putQReg(dreg, triop(Iop_ExtractV128, getQReg(nreg),
   2875                getQReg(mreg), mkU8(imm4)), condT);
   2876    } else {
   2877       putDRegI64(dreg, triop(Iop_Extract64, getDRegI64(nreg),
   2878                  getDRegI64(mreg), mkU8(imm4)), condT);
   2879    }
   2880    DIP("vext.8 %c%d, %c%d, %c%d, #%d\n", reg_t, dreg, reg_t, nreg,
   2881                                          reg_t, mreg, imm4);
   2882    return True;
   2883 }
   2884 
   2885 /* VTBL, VTBX */
   2886 static
   2887 Bool dis_neon_vtb ( UInt theInstr, IRTemp condT )
   2888 {
   2889    UInt op = (theInstr >> 6) & 1;
   2890    UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
   2891    UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
   2892    UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
   2893    UInt len = (theInstr >> 8) & 3;
   2894    Int i;
   2895    IROp cmp;
   2896    ULong imm;
   2897    IRTemp arg_l;
   2898    IRTemp old_mask, new_mask, cur_mask;
   2899    IRTemp old_res, new_res;
   2900    IRTemp old_arg, new_arg;
   2901 
   2902    if (dreg >= 0x100 || mreg >= 0x100 || nreg >= 0x100)
   2903       return False;
   2904    if (nreg + len > 31)
   2905       return False;
   2906 
   2907    cmp = Iop_CmpGT8Ux8;
   2908 
   2909    old_mask = newTemp(Ity_I64);
   2910    old_res = newTemp(Ity_I64);
   2911    old_arg = newTemp(Ity_I64);
   2912    assign(old_mask, mkU64(0));
   2913    assign(old_res, mkU64(0));
   2914    assign(old_arg, getDRegI64(mreg));
   2915    imm = 8;
   2916    imm = (imm <<  8) | imm;
   2917    imm = (imm << 16) | imm;
   2918    imm = (imm << 32) | imm;
   2919 
   2920    for (i = 0; i <= len; i++) {
   2921       arg_l = newTemp(Ity_I64);
   2922       new_mask = newTemp(Ity_I64);
   2923       cur_mask = newTemp(Ity_I64);
   2924       new_res = newTemp(Ity_I64);
   2925       new_arg = newTemp(Ity_I64);
   2926       assign(arg_l, getDRegI64(nreg+i));
   2927       assign(new_arg, binop(Iop_Sub8x8, mkexpr(old_arg), mkU64(imm)));
   2928       assign(cur_mask, binop(cmp, mkU64(imm), mkexpr(old_arg)));
   2929       assign(new_mask, binop(Iop_Or64, mkexpr(old_mask), mkexpr(cur_mask)));
   2930       assign(new_res, binop(Iop_Or64,
   2931                             mkexpr(old_res),
   2932                             binop(Iop_And64,
   2933                                   binop(Iop_Perm8x8,
   2934                                         mkexpr(arg_l),
   2935                                         binop(Iop_And64,
   2936                                               mkexpr(old_arg),
   2937                                               mkexpr(cur_mask))),
   2938                                   mkexpr(cur_mask))));
   2939 
   2940       old_arg = new_arg;
   2941       old_mask = new_mask;
   2942       old_res = new_res;
   2943    }
   2944    if (op) {
   2945       new_res = newTemp(Ity_I64);
   2946       assign(new_res, binop(Iop_Or64,
   2947                             binop(Iop_And64,
   2948                                   getDRegI64(dreg),
   2949                                   unop(Iop_Not64, mkexpr(old_mask))),
   2950                             mkexpr(old_res)));
   2951       old_res = new_res;
   2952    }
   2953 
   2954    putDRegI64(dreg, mkexpr(old_res), condT);
   2955    DIP("vtb%c.8 d%u, {", op ? 'x' : 'l', dreg);
   2956    if (len > 0) {
   2957       DIP("d%u-d%u", nreg, nreg + len);
   2958    } else {
   2959       DIP("d%u", nreg);
   2960    }
   2961    DIP("}, d%u\n", mreg);
   2962    return True;
   2963 }
   2964 
   2965 /* VDUP (scalar)  */
   2966 static
   2967 Bool dis_neon_vdup ( UInt theInstr, IRTemp condT )
   2968 {
   2969    UInt Q = (theInstr >> 6) & 1;
   2970    UInt dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
   2971    UInt mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
   2972    UInt imm4 = (theInstr >> 16) & 0xF;
   2973    UInt index;
   2974    UInt size;
   2975    IRTemp arg_m;
   2976    IRTemp res;
   2977    IROp op, op2;
   2978 
   2979    if ((imm4 == 0) || (imm4 == 8))
   2980       return False;
   2981    if ((Q == 1) && ((dreg & 1) == 1))
   2982       return False;
   2983    if (Q)
   2984       dreg >>= 1;
   2985    arg_m = newTemp(Ity_I64);
   2986    assign(arg_m, getDRegI64(mreg));
   2987    if (Q)
   2988       res = newTemp(Ity_V128);
   2989    else
   2990       res = newTemp(Ity_I64);
   2991    if ((imm4 & 1) == 1) {
   2992       op = Q ? Iop_Dup8x16 : Iop_Dup8x8;
   2993       op2 = Iop_GetElem8x8;
   2994       index = imm4 >> 1;
   2995       size = 8;
   2996    } else if ((imm4 & 3) == 2) {
   2997       op = Q ? Iop_Dup16x8 : Iop_Dup16x4;
   2998       op2 = Iop_GetElem16x4;
   2999       index = imm4 >> 2;
   3000       size = 16;
   3001    } else if ((imm4 & 7) == 4) {
   3002       op = Q ? Iop_Dup32x4 : Iop_Dup32x2;
   3003       op2 = Iop_GetElem32x2;
   3004       index = imm4 >> 3;
   3005       size = 32;
   3006    } else {
   3007       return False; // can this ever happen?
   3008    }
   3009    assign(res, unop(op, binop(op2, mkexpr(arg_m), mkU8(index))));
   3010    if (Q) {
   3011       putQReg(dreg, mkexpr(res), condT);
   3012    } else {
   3013       putDRegI64(dreg, mkexpr(res), condT);
   3014    }
   3015    DIP("vdup.%d %c%d, d%d[%d]\n", size, Q ? 'q' : 'd', dreg, mreg, index);
   3016    return True;
   3017 }
   3018 
   3019 /* A7.4.1 Three registers of the same length */
   3020 static
   3021 Bool dis_neon_data_3same ( UInt theInstr, IRTemp condT )
   3022 {
   3023    UInt Q = (theInstr >> 6) & 1;
   3024    UInt dreg = get_neon_d_regno(theInstr);
   3025    UInt nreg = get_neon_n_regno(theInstr);
   3026    UInt mreg = get_neon_m_regno(theInstr);
   3027    UInt A = (theInstr >> 8) & 0xF;
   3028    UInt B = (theInstr >> 4) & 1;
   3029    UInt C = (theInstr >> 20) & 0x3;
   3030    UInt U = (theInstr >> 24) & 1;
   3031    UInt size = C;
   3032 
   3033    IRTemp arg_n;
   3034    IRTemp arg_m;
   3035    IRTemp res;
   3036 
   3037    if (Q) {
   3038       arg_n = newTemp(Ity_V128);
   3039       arg_m = newTemp(Ity_V128);
   3040       res = newTemp(Ity_V128);
   3041       assign(arg_n, getQReg(nreg));
   3042       assign(arg_m, getQReg(mreg));
   3043    } else {
   3044       arg_n = newTemp(Ity_I64);
   3045       arg_m = newTemp(Ity_I64);
   3046       res = newTemp(Ity_I64);
   3047       assign(arg_n, getDRegI64(nreg));
   3048       assign(arg_m, getDRegI64(mreg));
   3049    }
   3050 
   3051    switch(A) {
   3052       case 0:
   3053          if (B == 0) {
   3054             /* VHADD */
   3055             ULong imm = 0;
   3056             IRExpr *imm_val;
   3057             IROp addOp;
   3058             IROp andOp;
   3059             IROp shOp;
   3060             char regType = Q ? 'q' : 'd';
   3061 
   3062             if (size == 3)
   3063                return False;
   3064             switch(size) {
   3065                case 0: imm = 0x101010101010101LL; break;
   3066                case 1: imm = 0x1000100010001LL; break;
   3067                case 2: imm = 0x100000001LL; break;
   3068                default: vassert(0);
   3069             }
   3070             if (Q) {
   3071                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
   3072                andOp = Iop_AndV128;
   3073             } else {
   3074                imm_val = mkU64(imm);
   3075                andOp = Iop_And64;
   3076             }
   3077             if (U) {
   3078                switch(size) {
   3079                   case 0:
   3080                      addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
   3081                      shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3082                      break;
   3083                   case 1:
   3084                      addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
   3085                      shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3086                      break;
   3087                   case 2:
   3088                      addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
   3089                      shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3090                      break;
   3091                   default:
   3092                      vassert(0);
   3093                }
   3094             } else {
   3095                switch(size) {
   3096                   case 0:
   3097                      addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
   3098                      shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
   3099                      break;
   3100                   case 1:
   3101                      addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
   3102                      shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
   3103                      break;
   3104                   case 2:
   3105                      addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
   3106                      shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
   3107                      break;
   3108                   default:
   3109                      vassert(0);
   3110                }
   3111             }
   3112             assign(res,
   3113                    binop(addOp,
   3114                          binop(addOp,
   3115                                binop(shOp, mkexpr(arg_m), mkU8(1)),
   3116                                binop(shOp, mkexpr(arg_n), mkU8(1))),
   3117                          binop(shOp,
   3118                                binop(addOp,
   3119                                      binop(andOp, mkexpr(arg_m), imm_val),
   3120                                      binop(andOp, mkexpr(arg_n), imm_val)),
   3121                                mkU8(1))));
   3122             DIP("vhadd.%c%d %c%d, %c%d, %c%d\n",
   3123                 U ? 'u' : 's', 8 << size, regType,
   3124                 dreg, regType, nreg, regType, mreg);
   3125          } else {
   3126             /* VQADD */
   3127             IROp op, op2;
   3128             IRTemp tmp;
   3129             char reg_t = Q ? 'q' : 'd';
   3130             if (Q) {
   3131                switch (size) {
   3132                   case 0:
   3133                      op = U ? Iop_QAdd8Ux16 : Iop_QAdd8Sx16;
   3134                      op2 = Iop_Add8x16;
   3135                      break;
   3136                   case 1:
   3137                      op = U ? Iop_QAdd16Ux8 : Iop_QAdd16Sx8;
   3138                      op2 = Iop_Add16x8;
   3139                      break;
   3140                   case 2:
   3141                      op = U ? Iop_QAdd32Ux4 : Iop_QAdd32Sx4;
   3142                      op2 = Iop_Add32x4;
   3143                      break;
   3144                   case 3:
   3145                      op = U ? Iop_QAdd64Ux2 : Iop_QAdd64Sx2;
   3146                      op2 = Iop_Add64x2;
   3147                      break;
   3148                   default:
   3149                      vassert(0);
   3150                }
   3151             } else {
   3152                switch (size) {
   3153                   case 0:
   3154                      op = U ? Iop_QAdd8Ux8 : Iop_QAdd8Sx8;
   3155                      op2 = Iop_Add8x8;
   3156                      break;
   3157                   case 1:
   3158                      op = U ? Iop_QAdd16Ux4 : Iop_QAdd16Sx4;
   3159                      op2 = Iop_Add16x4;
   3160                      break;
   3161                   case 2:
   3162                      op = U ? Iop_QAdd32Ux2 : Iop_QAdd32Sx2;
   3163                      op2 = Iop_Add32x2;
   3164                      break;
   3165                   case 3:
   3166                      op = U ? Iop_QAdd64Ux1 : Iop_QAdd64Sx1;
   3167                      op2 = Iop_Add64;
   3168                      break;
   3169                   default:
   3170                      vassert(0);
   3171                }
   3172             }
   3173             if (Q) {
   3174                tmp = newTemp(Ity_V128);
   3175             } else {
   3176                tmp = newTemp(Ity_I64);
   3177             }
   3178             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   3179 #ifndef DISABLE_QC_FLAG
   3180             assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
   3181             setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
   3182 #endif
   3183             DIP("vqadd.%c%d %c%d, %c%d, %c%d\n",
   3184                 U ? 'u' : 's',
   3185                 8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3186          }
   3187          break;
   3188       case 1:
   3189          if (B == 0) {
   3190             /* VRHADD */
   3191             /* VRHADD C, A, B ::=
   3192                  C = (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1) */
   3193             IROp shift_op, add_op;
   3194             IRTemp cc;
   3195             ULong one = 1;
   3196             HChar reg_t = Q ? 'q' : 'd';
   3197             switch (size) {
   3198                case 0: one = (one <<  8) | one; /* fall through */
   3199                case 1: one = (one << 16) | one; /* fall through */
   3200                case 2: one = (one << 32) | one; break;
   3201                case 3: return False;
   3202                default: vassert(0);
   3203             }
   3204             if (Q) {
   3205                switch (size) {
   3206                   case 0:
   3207                      shift_op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
   3208                      add_op = Iop_Add8x16;
   3209                      break;
   3210                   case 1:
   3211                      shift_op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
   3212                      add_op = Iop_Add16x8;
   3213                      break;
   3214                   case 2:
   3215                      shift_op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
   3216                      add_op = Iop_Add32x4;
   3217                      break;
   3218                   case 3:
   3219                      return False;
   3220                   default:
   3221                      vassert(0);
   3222                }
   3223             } else {
   3224                switch (size) {
   3225                   case 0:
   3226                      shift_op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
   3227                      add_op = Iop_Add8x8;
   3228                      break;
   3229                   case 1:
   3230                      shift_op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
   3231                      add_op = Iop_Add16x4;
   3232                      break;
   3233                   case 2:
   3234                      shift_op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
   3235                      add_op = Iop_Add32x2;
   3236                      break;
   3237                   case 3:
   3238                      return False;
   3239                   default:
   3240                      vassert(0);
   3241                }
   3242             }
   3243             if (Q) {
   3244                cc = newTemp(Ity_V128);
   3245                assign(cc, binop(shift_op,
   3246                                 binop(add_op,
   3247                                       binop(add_op,
   3248                                             binop(Iop_AndV128,
   3249                                                   mkexpr(arg_n),
   3250                                                   binop(Iop_64HLtoV128,
   3251                                                         mkU64(one),
   3252                                                         mkU64(one))),
   3253                                             binop(Iop_AndV128,
   3254                                                   mkexpr(arg_m),
   3255                                                   binop(Iop_64HLtoV128,
   3256                                                         mkU64(one),
   3257                                                         mkU64(one)))),
   3258                                       binop(Iop_64HLtoV128,
   3259                                             mkU64(one),
   3260                                             mkU64(one))),
   3261                                 mkU8(1)));
   3262                assign(res, binop(add_op,
   3263                                  binop(add_op,
   3264                                        binop(shift_op,
   3265                                              mkexpr(arg_n),
   3266                                              mkU8(1)),
   3267                                        binop(shift_op,
   3268                                              mkexpr(arg_m),
   3269                                              mkU8(1))),
   3270                                  mkexpr(cc)));
   3271             } else {
   3272                cc = newTemp(Ity_I64);
   3273                assign(cc, binop(shift_op,
   3274                                 binop(add_op,
   3275                                       binop(add_op,
   3276                                             binop(Iop_And64,
   3277                                                   mkexpr(arg_n),
   3278                                                   mkU64(one)),
   3279                                             binop(Iop_And64,
   3280                                                   mkexpr(arg_m),
   3281                                                   mkU64(one))),
   3282                                       mkU64(one)),
   3283                                 mkU8(1)));
   3284                assign(res, binop(add_op,
   3285                                  binop(add_op,
   3286                                        binop(shift_op,
   3287                                              mkexpr(arg_n),
   3288                                              mkU8(1)),
   3289                                        binop(shift_op,
   3290                                              mkexpr(arg_m),
   3291                                              mkU8(1))),
   3292                                  mkexpr(cc)));
   3293             }
   3294             DIP("vrhadd.%c%d %c%d, %c%d, %c%d\n",
   3295                 U ? 'u' : 's',
   3296                 8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3297          } else {
   3298             if (U == 0)  {
   3299                switch(C) {
   3300                   case 0: {
   3301                      /* VAND  */
   3302                      HChar reg_t = Q ? 'q' : 'd';
   3303                      if (Q) {
   3304                         assign(res, binop(Iop_AndV128, mkexpr(arg_n),
   3305                                                        mkexpr(arg_m)));
   3306                      } else {
   3307                         assign(res, binop(Iop_And64, mkexpr(arg_n),
   3308                                                      mkexpr(arg_m)));
   3309                      }
   3310                      DIP("vand %c%d, %c%d, %c%d\n",
   3311                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3312                      break;
   3313                   }
   3314                   case 1: {
   3315                      /* VBIC  */
   3316                      HChar reg_t = Q ? 'q' : 'd';
   3317                      if (Q) {
   3318                         assign(res, binop(Iop_AndV128,mkexpr(arg_n),
   3319                                unop(Iop_NotV128, mkexpr(arg_m))));
   3320                      } else {
   3321                         assign(res, binop(Iop_And64, mkexpr(arg_n),
   3322                                unop(Iop_Not64, mkexpr(arg_m))));
   3323                      }
   3324                      DIP("vbic %c%d, %c%d, %c%d\n",
   3325                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3326                      break;
   3327                   }
   3328                   case 2:
   3329                      if ( nreg != mreg) {
   3330                         /* VORR  */
   3331                         HChar reg_t = Q ? 'q' : 'd';
   3332                         if (Q) {
   3333                            assign(res, binop(Iop_OrV128, mkexpr(arg_n),
   3334                                                          mkexpr(arg_m)));
   3335                         } else {
   3336                            assign(res, binop(Iop_Or64, mkexpr(arg_n),
   3337                                                        mkexpr(arg_m)));
   3338                         }
   3339                         DIP("vorr %c%d, %c%d, %c%d\n",
   3340                             reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3341                      } else {
   3342                         /* VMOV  */
   3343                         HChar reg_t = Q ? 'q' : 'd';
   3344                         assign(res, mkexpr(arg_m));
   3345                         DIP("vmov %c%d, %c%d\n", reg_t, dreg, reg_t, mreg);
   3346                      }
   3347                      break;
   3348                   case 3:{
   3349                      /* VORN  */
   3350                      HChar reg_t = Q ? 'q' : 'd';
   3351                      if (Q) {
   3352                         assign(res, binop(Iop_OrV128,mkexpr(arg_n),
   3353                                unop(Iop_NotV128, mkexpr(arg_m))));
   3354                      } else {
   3355                         assign(res, binop(Iop_Or64, mkexpr(arg_n),
   3356                                unop(Iop_Not64, mkexpr(arg_m))));
   3357                      }
   3358                      DIP("vorn %c%d, %c%d, %c%d\n",
   3359                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3360                      break;
   3361                   }
   3362                }
   3363             } else {
   3364                switch(C) {
   3365                   case 0:
   3366                      /* VEOR (XOR)  */
   3367                      if (Q) {
   3368                         assign(res, binop(Iop_XorV128, mkexpr(arg_n),
   3369                                                        mkexpr(arg_m)));
   3370                      } else {
   3371                         assign(res, binop(Iop_Xor64, mkexpr(arg_n),
   3372                                                      mkexpr(arg_m)));
   3373                      }
   3374                      DIP("veor %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
   3375                            Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   3376                      break;
   3377                   case 1:
   3378                      /* VBSL  */
   3379                      if (Q) {
   3380                         IRTemp reg_d = newTemp(Ity_V128);
   3381                         assign(reg_d, getQReg(dreg));
   3382                         assign(res,
   3383                                binop(Iop_OrV128,
   3384                                      binop(Iop_AndV128, mkexpr(arg_n),
   3385                                                         mkexpr(reg_d)),
   3386                                      binop(Iop_AndV128,
   3387                                            mkexpr(arg_m),
   3388                                            unop(Iop_NotV128,
   3389                                                  mkexpr(reg_d)) ) ) );
   3390                      } else {
   3391                         IRTemp reg_d = newTemp(Ity_I64);
   3392                         assign(reg_d, getDRegI64(dreg));
   3393                         assign(res,
   3394                                binop(Iop_Or64,
   3395                                      binop(Iop_And64, mkexpr(arg_n),
   3396                                                       mkexpr(reg_d)),
   3397                                      binop(Iop_And64,
   3398                                            mkexpr(arg_m),
   3399                                            unop(Iop_Not64, mkexpr(reg_d)))));
   3400                      }
   3401                      DIP("vbsl %c%u, %c%u, %c%u\n",
   3402                          Q ? 'q' : 'd', dreg,
   3403                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   3404                      break;
   3405                   case 2:
   3406                      /* VBIT  */
   3407                      if (Q) {
   3408                         IRTemp reg_d = newTemp(Ity_V128);
   3409                         assign(reg_d, getQReg(dreg));
   3410                         assign(res,
   3411                                binop(Iop_OrV128,
   3412                                      binop(Iop_AndV128, mkexpr(arg_n),
   3413                                                         mkexpr(arg_m)),
   3414                                      binop(Iop_AndV128,
   3415                                            mkexpr(reg_d),
   3416                                            unop(Iop_NotV128, mkexpr(arg_m)))));
   3417                      } else {
   3418                         IRTemp reg_d = newTemp(Ity_I64);
   3419                         assign(reg_d, getDRegI64(dreg));
   3420                         assign(res,
   3421                                binop(Iop_Or64,
   3422                                      binop(Iop_And64, mkexpr(arg_n),
   3423                                                       mkexpr(arg_m)),
   3424                                      binop(Iop_And64,
   3425                                            mkexpr(reg_d),
   3426                                            unop(Iop_Not64, mkexpr(arg_m)))));
   3427                      }
   3428                      DIP("vbit %c%u, %c%u, %c%u\n",
   3429                          Q ? 'q' : 'd', dreg,
   3430                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   3431                      break;
   3432                   case 3:
   3433                      /* VBIF  */
   3434                      if (Q) {
   3435                         IRTemp reg_d = newTemp(Ity_V128);
   3436                         assign(reg_d, getQReg(dreg));
   3437                         assign(res,
   3438                                binop(Iop_OrV128,
   3439                                      binop(Iop_AndV128, mkexpr(reg_d),
   3440                                                         mkexpr(arg_m)),
   3441                                      binop(Iop_AndV128,
   3442                                            mkexpr(arg_n),
   3443                                            unop(Iop_NotV128, mkexpr(arg_m)))));
   3444                      } else {
   3445                         IRTemp reg_d = newTemp(Ity_I64);
   3446                         assign(reg_d, getDRegI64(dreg));
   3447                         assign(res,
   3448                                binop(Iop_Or64,
   3449                                      binop(Iop_And64, mkexpr(reg_d),
   3450                                                       mkexpr(arg_m)),
   3451                                      binop(Iop_And64,
   3452                                            mkexpr(arg_n),
   3453                                            unop(Iop_Not64, mkexpr(arg_m)))));
   3454                      }
   3455                      DIP("vbif %c%u, %c%u, %c%u\n",
   3456                          Q ? 'q' : 'd', dreg,
   3457                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   3458                      break;
   3459                }
   3460             }
   3461          }
   3462          break;
   3463       case 2:
   3464          if (B == 0) {
   3465             /* VHSUB */
   3466             /* (A >> 1) - (B >> 1) - (NOT (A) & B & 1)   */
   3467             ULong imm = 0;
   3468             IRExpr *imm_val;
   3469             IROp subOp;
   3470             IROp notOp;
   3471             IROp andOp;
   3472             IROp shOp;
   3473             if (size == 3)
   3474                return False;
   3475             switch(size) {
   3476                case 0: imm = 0x101010101010101LL; break;
   3477                case 1: imm = 0x1000100010001LL; break;
   3478                case 2: imm = 0x100000001LL; break;
   3479                default: vassert(0);
   3480             }
   3481             if (Q) {
   3482                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
   3483                andOp = Iop_AndV128;
   3484                notOp = Iop_NotV128;
   3485             } else {
   3486                imm_val = mkU64(imm);
   3487                andOp = Iop_And64;
   3488                notOp = Iop_Not64;
   3489             }
   3490             if (U) {
   3491                switch(size) {
   3492                   case 0:
   3493                      subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   3494                      shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3495                      break;
   3496                   case 1:
   3497                      subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   3498                      shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3499                      break;
   3500                   case 2:
   3501                      subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   3502                      shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3503                      break;
   3504                   default:
   3505                      vassert(0);
   3506                }
   3507             } else {
   3508                switch(size) {
   3509                   case 0:
   3510                      subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   3511                      shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
   3512                      break;
   3513                   case 1:
   3514                      subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   3515                      shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
   3516                      break;
   3517                   case 2:
   3518                      subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   3519                      shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
   3520                      break;
   3521                   default:
   3522                      vassert(0);
   3523                }
   3524             }
   3525             assign(res,
   3526                    binop(subOp,
   3527                          binop(subOp,
   3528                                binop(shOp, mkexpr(arg_n), mkU8(1)),
   3529                                binop(shOp, mkexpr(arg_m), mkU8(1))),
   3530                          binop(andOp,
   3531                                binop(andOp,
   3532                                      unop(notOp, mkexpr(arg_n)),
   3533                                      mkexpr(arg_m)),
   3534                                imm_val)));
   3535             DIP("vhsub.%c%u %c%u, %c%u, %c%u\n",
   3536                 U ? 'u' : 's', 8 << size,
   3537                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   3538                 mreg);
   3539          } else {
   3540             /* VQSUB */
   3541             IROp op, op2;
   3542             IRTemp tmp;
   3543             if (Q) {
   3544                switch (size) {
   3545                   case 0:
   3546                      op = U ? Iop_QSub8Ux16 : Iop_QSub8Sx16;
   3547                      op2 = Iop_Sub8x16;
   3548                      break;
   3549                   case 1:
   3550                      op = U ? Iop_QSub16Ux8 : Iop_QSub16Sx8;
   3551                      op2 = Iop_Sub16x8;
   3552                      break;
   3553                   case 2:
   3554                      op = U ? Iop_QSub32Ux4 : Iop_QSub32Sx4;
   3555                      op2 = Iop_Sub32x4;
   3556                      break;
   3557                   case 3:
   3558                      op = U ? Iop_QSub64Ux2 : Iop_QSub64Sx2;
   3559                      op2 = Iop_Sub64x2;
   3560                      break;
   3561                   default:
   3562                      vassert(0);
   3563                }
   3564             } else {
   3565                switch (size) {
   3566                   case 0:
   3567                      op = U ? Iop_QSub8Ux8 : Iop_QSub8Sx8;
   3568                      op2 = Iop_Sub8x8;
   3569                      break;
   3570                   case 1:
   3571                      op = U ? Iop_QSub16Ux4 : Iop_QSub16Sx4;
   3572                      op2 = Iop_Sub16x4;
   3573                      break;
   3574                   case 2:
   3575                      op = U ? Iop_QSub32Ux2 : Iop_QSub32Sx2;
   3576                      op2 = Iop_Sub32x2;
   3577                      break;
   3578                   case 3:
   3579                      op = U ? Iop_QSub64Ux1 : Iop_QSub64Sx1;
   3580                      op2 = Iop_Sub64;
   3581                      break;
   3582                   default:
   3583                      vassert(0);
   3584                }
   3585             }
   3586             if (Q)
   3587                tmp = newTemp(Ity_V128);
   3588             else
   3589                tmp = newTemp(Ity_I64);
   3590             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   3591 #ifndef DISABLE_QC_FLAG
   3592             assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
   3593             setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
   3594 #endif
   3595             DIP("vqsub.%c%u %c%u, %c%u, %c%u\n",
   3596                 U ? 'u' : 's', 8 << size,
   3597                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   3598                 mreg);
   3599          }
   3600          break;
   3601       case 3: {
   3602             IROp op;
   3603             if (Q) {
   3604                switch (size) {
   3605                   case 0: op = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16; break;
   3606                   case 1: op = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8; break;
   3607                   case 2: op = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4; break;
   3608                   case 3: return False;
   3609                   default: vassert(0);
   3610                }
   3611             } else {
   3612                switch (size) {
   3613                   case 0: op = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8; break;
   3614                   case 1: op = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4; break;
   3615                   case 2: op = U ? Iop_CmpGT32Ux2: Iop_CmpGT32Sx2; break;
   3616                   case 3: return False;
   3617                   default: vassert(0);
   3618                }
   3619             }
   3620             if (B == 0) {
   3621                /* VCGT  */
   3622                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   3623                DIP("vcgt.%c%u %c%u, %c%u, %c%u\n",
   3624                    U ? 'u' : 's', 8 << size,
   3625                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   3626                    mreg);
   3627             } else {
   3628                /* VCGE  */
   3629                /* VCGE res, argn, argm
   3630                     is equal to
   3631                   VCGT tmp, argm, argn
   3632                   VNOT res, tmp */
   3633                assign(res,
   3634                       unop(Q ? Iop_NotV128 : Iop_Not64,
   3635                            binop(op, mkexpr(arg_m), mkexpr(arg_n))));
   3636                DIP("vcge.%c%u %c%u, %c%u, %c%u\n",
   3637                    U ? 'u' : 's', 8 << size,
   3638                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   3639                    mreg);
   3640             }
   3641          }
   3642          break;
   3643       case 4:
   3644          if (B == 0) {
   3645             /* VSHL */
   3646             IROp op, sub_op;
   3647             IRTemp tmp;
   3648             if (U) {
   3649                switch (size) {
   3650                   case 0: op = Q ? Iop_Shl8x16 : Iop_Shl8x8; break;
   3651                   case 1: op = Q ? Iop_Shl16x8 : Iop_Shl16x4; break;
   3652                   case 2: op = Q ? Iop_Shl32x4 : Iop_Shl32x2; break;
   3653                   case 3: op = Q ? Iop_Shl64x2 : Iop_Shl64; break;
   3654                   default: vassert(0);
   3655                }
   3656             } else {
   3657                tmp = newTemp(Q ? Ity_V128 : Ity_I64);
   3658                switch (size) {
   3659                   case 0:
   3660                      op = Q ? Iop_Sar8x16 : Iop_Sar8x8;
   3661                      sub_op = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   3662                      break;
   3663                   case 1:
   3664                      op = Q ? Iop_Sar16x8 : Iop_Sar16x4;
   3665                      sub_op = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   3666                      break;
   3667                   case 2:
   3668                      op = Q ? Iop_Sar32x4 : Iop_Sar32x2;
   3669                      sub_op = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   3670                      break;
   3671                   case 3:
   3672                      op = Q ? Iop_Sar64x2 : Iop_Sar64;
   3673                      sub_op = Q ? Iop_Sub64x2 : Iop_Sub64;
   3674                      break;
   3675                   default:
   3676                      vassert(0);
   3677                }
   3678             }
   3679             if (U) {
   3680                if (!Q && (size == 3))
   3681                   assign(res, binop(op, mkexpr(arg_m),
   3682                                         unop(Iop_64to8, mkexpr(arg_n))));
   3683                else
   3684                   assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
   3685             } else {
   3686                if (Q)
   3687                   assign(tmp, binop(sub_op,
   3688                                     binop(Iop_64HLtoV128, mkU64(0), mkU64(0)),
   3689                                     mkexpr(arg_n)));
   3690                else
   3691                   assign(tmp, binop(sub_op, mkU64(0), mkexpr(arg_n)));
   3692                if (!Q && (size == 3))
   3693                   assign(res, binop(op, mkexpr(arg_m),
   3694                                         unop(Iop_64to8, mkexpr(tmp))));
   3695                else
   3696                   assign(res, binop(op, mkexpr(arg_m), mkexpr(tmp)));
   3697             }
   3698             DIP("vshl.%c%u %c%u, %c%u, %c%u\n",
   3699                 U ? 'u' : 's', 8 << size,
   3700                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
   3701                 nreg);
   3702          } else {
   3703             /* VQSHL */
   3704             IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt;
   3705             IRTemp tmp, shval, mask, old_shval;
   3706             UInt i;
   3707             ULong esize;
   3708             cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
   3709             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
   3710             if (U) {
   3711                switch (size) {
   3712                   case 0:
   3713                      op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
   3714                      op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
   3715                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3716                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   3717                      break;
   3718                   case 1:
   3719                      op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
   3720                      op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
   3721                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3722                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   3723                      break;
   3724                   case 2:
   3725                      op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
   3726                      op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
   3727                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3728                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   3729                      break;
   3730                   case 3:
   3731                      op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
   3732                      op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
   3733                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   3734                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   3735                      break;
   3736                   default:
   3737                      vassert(0);
   3738                }
   3739             } else {
   3740                switch (size) {
   3741                   case 0:
   3742                      op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
   3743                      op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
   3744                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3745                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   3746                      break;
   3747                   case 1:
   3748                      op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
   3749                      op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
   3750                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3751                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   3752                      break;
   3753                   case 2:
   3754                      op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
   3755                      op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
   3756                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3757                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   3758                      break;
   3759                   case 3:
   3760                      op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
   3761                      op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
   3762                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   3763                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   3764                      break;
   3765                   default:
   3766                      vassert(0);
   3767                }
   3768             }
   3769             if (Q) {
   3770                tmp = newTemp(Ity_V128);
   3771                shval = newTemp(Ity_V128);
   3772                mask = newTemp(Ity_V128);
   3773             } else {
   3774                tmp = newTemp(Ity_I64);
   3775                shval = newTemp(Ity_I64);
   3776                mask = newTemp(Ity_I64);
   3777             }
   3778             assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
   3779 #ifndef DISABLE_QC_FLAG
   3780             /* Only least significant byte from second argument is used.
   3781                Copy this byte to the whole vector element. */
   3782             assign(shval, binop(op_shrn,
   3783                                 binop(op_shln,
   3784                                        mkexpr(arg_n),
   3785                                        mkU8((8 << size) - 8)),
   3786                                 mkU8((8 << size) - 8)));
   3787             for(i = 0; i < size; i++) {
   3788                old_shval = shval;
   3789                shval = newTemp(Q ? Ity_V128 : Ity_I64);
   3790                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
   3791                                    mkexpr(old_shval),
   3792                                    binop(op_shln,
   3793                                          mkexpr(old_shval),
   3794                                          mkU8(8 << i))));
   3795             }
   3796             /* If shift is greater or equal to the element size and
   3797                element is non-zero, then QC flag should be set. */
   3798             esize = (8 << size) - 1;
   3799             esize = (esize <<  8) | esize;
   3800             esize = (esize << 16) | esize;
   3801             esize = (esize << 32) | esize;
   3802             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   3803                              binop(cmp_gt, mkexpr(shval),
   3804                                            Q ? mkU128(esize) : mkU64(esize)),
   3805                              unop(cmp_neq, mkexpr(arg_m))),
   3806                        Q ? mkU128(0) : mkU64(0),
   3807                        Q, condT);
   3808             /* Othervise QC flag should be set if shift value is positive and
   3809                result beign rightshifted the same value is not equal to left
   3810                argument. */
   3811             assign(mask, binop(cmp_gt, mkexpr(shval),
   3812                                        Q ? mkU128(0) : mkU64(0)));
   3813             if (!Q && size == 3)
   3814                assign(tmp, binop(op_rev, mkexpr(res),
   3815                                          unop(Iop_64to8, mkexpr(arg_n))));
   3816             else
   3817                assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
   3818             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   3819                              mkexpr(tmp), mkexpr(mask)),
   3820                        binop(Q ? Iop_AndV128 : Iop_And64,
   3821                              mkexpr(arg_m), mkexpr(mask)),
   3822                        Q, condT);
   3823 #endif
   3824             DIP("vqshl.%c%u %c%u, %c%u, %c%u\n",
   3825                 U ? 'u' : 's', 8 << size,
   3826                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
   3827                 nreg);
   3828          }
   3829          break;
   3830       case 5:
   3831          if (B == 0) {
   3832             /* VRSHL */
   3833             IROp op, op_shrn, op_shln, cmp_gt, op_add;
   3834             IRTemp shval, old_shval, imm_val, round;
   3835             UInt i;
   3836             ULong imm;
   3837             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
   3838             imm = 1L;
   3839             switch (size) {
   3840                case 0: imm = (imm <<  8) | imm; /* fall through */
   3841                case 1: imm = (imm << 16) | imm; /* fall through */
   3842                case 2: imm = (imm << 32) | imm; /* fall through */
   3843                case 3: break;
   3844                default: vassert(0);
   3845             }
   3846             imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
   3847             round = newTemp(Q ? Ity_V128 : Ity_I64);
   3848             assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
   3849             if (U) {
   3850                switch (size) {
   3851                   case 0:
   3852                      op = Q ? Iop_Shl8x16 : Iop_Shl8x8;
   3853                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
   3854                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3855                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   3856                      break;
   3857                   case 1:
   3858                      op = Q ? Iop_Shl16x8 : Iop_Shl16x4;
   3859                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
   3860                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3861                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   3862                      break;
   3863                   case 2:
   3864                      op = Q ? Iop_Shl32x4 : Iop_Shl32x2;
   3865                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
   3866                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3867                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   3868                      break;
   3869                   case 3:
   3870                      op = Q ? Iop_Shl64x2 : Iop_Shl64;
   3871                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
   3872                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   3873                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   3874                      break;
   3875                   default:
   3876                      vassert(0);
   3877                }
   3878             } else {
   3879                switch (size) {
   3880                   case 0:
   3881                      op = Q ? Iop_Sal8x16 : Iop_Sal8x8;
   3882                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
   3883                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3884                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   3885                      break;
   3886                   case 1:
   3887                      op = Q ? Iop_Sal16x8 : Iop_Sal16x4;
   3888                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
   3889                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3890                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   3891                      break;
   3892                   case 2:
   3893                      op = Q ? Iop_Sal32x4 : Iop_Sal32x2;
   3894                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
   3895                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3896                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   3897                      break;
   3898                   case 3:
   3899                      op = Q ? Iop_Sal64x2 : Iop_Sal64x1;
   3900                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
   3901                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   3902                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   3903                      break;
   3904                   default:
   3905                      vassert(0);
   3906                }
   3907             }
   3908             if (Q) {
   3909                shval = newTemp(Ity_V128);
   3910             } else {
   3911                shval = newTemp(Ity_I64);
   3912             }
   3913             /* Only least significant byte from second argument is used.
   3914                Copy this byte to the whole vector element. */
   3915             assign(shval, binop(op_shrn,
   3916                                 binop(op_shln,
   3917                                        mkexpr(arg_n),
   3918                                        mkU8((8 << size) - 8)),
   3919                                 mkU8((8 << size) - 8)));
   3920             for (i = 0; i < size; i++) {
   3921                old_shval = shval;
   3922                shval = newTemp(Q ? Ity_V128 : Ity_I64);
   3923                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
   3924                                    mkexpr(old_shval),
   3925                                    binop(op_shln,
   3926                                          mkexpr(old_shval),
   3927                                          mkU8(8 << i))));
   3928             }
   3929             /* Compute the result */
   3930             if (!Q && size == 3 && U) {
   3931                assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
   3932                                    binop(op,
   3933                                          mkexpr(arg_m),
   3934                                          unop(Iop_64to8,
   3935                                               binop(op_add,
   3936                                                     mkexpr(arg_n),
   3937                                                     mkexpr(imm_val)))),
   3938                                    binop(Q ? Iop_AndV128 : Iop_And64,
   3939                                          mkexpr(imm_val),
   3940                                          binop(cmp_gt,
   3941                                                Q ? mkU128(0) : mkU64(0),
   3942                                                mkexpr(arg_n)))));
   3943                assign(res, binop(op_add,
   3944                                  binop(op,
   3945                                        mkexpr(arg_m),
   3946                                        unop(Iop_64to8, mkexpr(arg_n))),
   3947                                  mkexpr(round)));
   3948             } else {
   3949                assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
   3950                                    binop(op,
   3951                                          mkexpr(arg_m),
   3952                                          binop(op_add,
   3953                                                mkexpr(arg_n),
   3954                                                mkexpr(imm_val))),
   3955                                    binop(Q ? Iop_AndV128 : Iop_And64,
   3956                                          mkexpr(imm_val),
   3957                                          binop(cmp_gt,
   3958                                                Q ? mkU128(0) : mkU64(0),
   3959                                                mkexpr(arg_n)))));
   3960                assign(res, binop(op_add,
   3961                                  binop(op, mkexpr(arg_m), mkexpr(arg_n)),
   3962                                  mkexpr(round)));
   3963             }
   3964             DIP("vrshl.%c%u %c%u, %c%u, %c%u\n",
   3965                 U ? 'u' : 's', 8 << size,
   3966                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
   3967                 nreg);
   3968          } else {
   3969             /* VQRSHL */
   3970             IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt, op_add;
   3971             IRTemp tmp, shval, mask, old_shval, imm_val, round;
   3972             UInt i;
   3973             ULong esize, imm;
   3974             cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
   3975             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
   3976             imm = 1L;
   3977             switch (size) {
   3978                case 0: imm = (imm <<  8) | imm; /* fall through */
   3979                case 1: imm = (imm << 16) | imm; /* fall through */
   3980                case 2: imm = (imm << 32) | imm; /* fall through */
   3981                case 3: break;
   3982                default: vassert(0);
   3983             }
   3984             imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
   3985             round = newTemp(Q ? Ity_V128 : Ity_I64);
   3986             assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
   3987             if (U) {
   3988                switch (size) {
   3989                   case 0:
   3990                      op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
   3991                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
   3992                      op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
   3993                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3994                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   3995                      break;
   3996                   case 1:
   3997                      op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
   3998                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
   3999                      op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
   4000                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   4001                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   4002                      break;
   4003                   case 2:
   4004                      op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
   4005                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
   4006                      op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
   4007                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   4008                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   4009                      break;
   4010                   case 3:
   4011                      op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
   4012                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
   4013                      op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
   4014                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   4015                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   4016                      break;
   4017                   default:
   4018                      vassert(0);
   4019                }
   4020             } else {
   4021                switch (size) {
   4022                   case 0:
   4023                      op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
   4024                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
   4025                      op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
   4026                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   4027                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   4028                      break;
   4029                   case 1:
   4030                      op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
   4031                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
   4032                      op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
   4033                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   4034                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   4035                      break;
   4036                   case 2:
   4037                      op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
   4038                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
   4039                      op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
   4040                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   4041                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   4042                      break;
   4043                   case 3:
   4044                      op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
   4045                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
   4046                      op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
   4047                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   4048                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   4049                      break;
   4050                   default:
   4051                      vassert(0);
   4052                }
   4053             }
   4054             if (Q) {
   4055                tmp = newTemp(Ity_V128);
   4056                shval = newTemp(Ity_V128);
   4057                mask = newTemp(Ity_V128);
   4058             } else {
   4059                tmp = newTemp(Ity_I64);
   4060                shval = newTemp(Ity_I64);
   4061                mask = newTemp(Ity_I64);
   4062             }
   4063             /* Only least significant byte from second argument is used.
   4064                Copy this byte to the whole vector element. */
   4065             assign(shval, binop(op_shrn,
   4066                                 binop(op_shln,
   4067                                        mkexpr(arg_n),
   4068                                        mkU8((8 << size) - 8)),
   4069                                 mkU8((8 << size) - 8)));
   4070             for (i = 0; i < size; i++) {
   4071                old_shval = shval;
   4072                shval = newTemp(Q ? Ity_V128 : Ity_I64);
   4073                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
   4074                                    mkexpr(old_shval),
   4075                                    binop(op_shln,
   4076                                          mkexpr(old_shval),
   4077                                          mkU8(8 << i))));
   4078             }
   4079             /* Compute the result */
   4080             assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
   4081                                 binop(op,
   4082                                       mkexpr(arg_m),
   4083                                       binop(op_add,
   4084                                             mkexpr(arg_n),
   4085                                             mkexpr(imm_val))),
   4086                                 binop(Q ? Iop_AndV128 : Iop_And64,
   4087                                       mkexpr(imm_val),
   4088                                       binop(cmp_gt,
   4089                                             Q ? mkU128(0) : mkU64(0),
   4090                                             mkexpr(arg_n)))));
   4091             assign(res, binop(op_add,
   4092                               binop(op, mkexpr(arg_m), mkexpr(arg_n)),
   4093                               mkexpr(round)));
   4094 #ifndef DISABLE_QC_FLAG
   4095             /* If shift is greater or equal to the element size and element is
   4096                non-zero, then QC flag should be set. */
   4097             esize = (8 << size) - 1;
   4098             esize = (esize <<  8) | esize;
   4099             esize = (esize << 16) | esize;
   4100             esize = (esize << 32) | esize;
   4101             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   4102                              binop(cmp_gt, mkexpr(shval),
   4103                                            Q ? mkU128(esize) : mkU64(esize)),
   4104                              unop(cmp_neq, mkexpr(arg_m))),
   4105                        Q ? mkU128(0) : mkU64(0),
   4106                        Q, condT);
   4107             /* Othervise QC flag should be set if shift value is positive and
   4108                result beign rightshifted the same value is not equal to left
   4109                argument. */
   4110             assign(mask, binop(cmp_gt, mkexpr(shval),
   4111                                Q ? mkU128(0) : mkU64(0)));
   4112             if (!Q && size == 3)
   4113                assign(tmp, binop(op_rev, mkexpr(res),
   4114                                          unop(Iop_64to8, mkexpr(arg_n))));
   4115             else
   4116                assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
   4117             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   4118                              mkexpr(tmp), mkexpr(mask)),
   4119                        binop(Q ? Iop_AndV128 : Iop_And64,
   4120                              mkexpr(arg_m), mkexpr(mask)),
   4121                        Q, condT);
   4122 #endif
   4123             DIP("vqrshl.%c%u %c%u, %c%u, %c%u\n",
   4124                 U ? 'u' : 's', 8 << size,
   4125                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
   4126                 nreg);
   4127          }
   4128          break;
   4129       case 6:
   4130          /* VMAX, VMIN  */
   4131          if (B == 0) {
   4132             /* VMAX */
   4133             IROp op;
   4134             if (U == 0) {
   4135                switch (size) {
   4136                   case 0: op = Q ? Iop_Max8Sx16 : Iop_Max8Sx8; break;
   4137                   case 1: op = Q ? Iop_Max16Sx8 : Iop_Max16Sx4; break;
   4138                   case 2: op = Q ? Iop_Max32Sx4 : Iop_Max32Sx2; break;
   4139                   case 3: return False;
   4140                   default: vassert(0);
   4141                }
   4142             } else {
   4143                switch (size) {
   4144                   case 0: op = Q ? Iop_Max8Ux16 : Iop_Max8Ux8; break;
   4145                   case 1: op = Q ? Iop_Max16Ux8 : Iop_Max16Ux4; break;
   4146                   case 2: op = Q ? Iop_Max32Ux4 : Iop_Max32Ux2; break;
   4147                   case 3: return False;
   4148                   default: vassert(0);
   4149                }
   4150             }
   4151             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4152             DIP("vmax.%c%u %c%u, %c%u, %c%u\n",
   4153                 U ? 'u' : 's', 8 << size,
   4154                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4155                 mreg);
   4156          } else {
   4157             /* VMIN */
   4158             IROp op;
   4159             if (U == 0) {
   4160                switch (size) {
   4161                   case 0: op = Q ? Iop_Min8Sx16 : Iop_Min8Sx8; break;
   4162                   case 1: op = Q ? Iop_Min16Sx8 : Iop_Min16Sx4; break;
   4163                   case 2: op = Q ? Iop_Min32Sx4 : Iop_Min32Sx2; break;
   4164                   case 3: return False;
   4165                   default: vassert(0);
   4166                }
   4167             } else {
   4168                switch (size) {
   4169                   case 0: op = Q ? Iop_Min8Ux16 : Iop_Min8Ux8; break;
   4170                   case 1: op = Q ? Iop_Min16Ux8 : Iop_Min16Ux4; break;
   4171                   case 2: op = Q ? Iop_Min32Ux4 : Iop_Min32Ux2; break;
   4172                   case 3: return False;
   4173                   default: vassert(0);
   4174                }
   4175             }
   4176             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4177             DIP("vmin.%c%u %c%u, %c%u, %c%u\n",
   4178                 U ? 'u' : 's', 8 << size,
   4179                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4180                 mreg);
   4181          }
   4182          break;
   4183       case 7:
   4184          if (B == 0) {
   4185             /* VABD */
   4186             IROp op_cmp, op_sub;
   4187             IRTemp cond;
   4188             if ((theInstr >> 23) & 1) {
   4189                vpanic("VABDL should not be in dis_neon_data_3same\n");
   4190             }
   4191             if (Q) {
   4192                switch (size) {
   4193                   case 0:
   4194                      op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
   4195                      op_sub = Iop_Sub8x16;
   4196                      break;
   4197                   case 1:
   4198                      op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
   4199                      op_sub = Iop_Sub16x8;
   4200                      break;
   4201                   case 2:
   4202                      op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
   4203                      op_sub = Iop_Sub32x4;
   4204                      break;
   4205                   case 3:
   4206                      return False;
   4207                   default:
   4208                      vassert(0);
   4209                }
   4210             } else {
   4211                switch (size) {
   4212                   case 0:
   4213                      op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
   4214                      op_sub = Iop_Sub8x8;
   4215                      break;
   4216                   case 1:
   4217                      op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
   4218                      op_sub = Iop_Sub16x4;
   4219                      break;
   4220                   case 2:
   4221                      op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
   4222                      op_sub = Iop_Sub32x2;
   4223                      break;
   4224                   case 3:
   4225                      return False;
   4226                   default:
   4227                      vassert(0);
   4228                }
   4229             }
   4230             if (Q) {
   4231                cond = newTemp(Ity_V128);
   4232             } else {
   4233                cond = newTemp(Ity_I64);
   4234             }
   4235             assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
   4236             assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
   4237                               binop(Q ? Iop_AndV128 : Iop_And64,
   4238                                     binop(op_sub, mkexpr(arg_n),
   4239                                                   mkexpr(arg_m)),
   4240                                     mkexpr(cond)),
   4241                               binop(Q ? Iop_AndV128 : Iop_And64,
   4242                                     binop(op_sub, mkexpr(arg_m),
   4243                                                   mkexpr(arg_n)),
   4244                                     unop(Q ? Iop_NotV128 : Iop_Not64,
   4245                                          mkexpr(cond)))));
   4246             DIP("vabd.%c%u %c%u, %c%u, %c%u\n",
   4247                 U ? 'u' : 's', 8 << size,
   4248                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4249                 mreg);
   4250          } else {
   4251             /* VABA */
   4252             IROp op_cmp, op_sub, op_add;
   4253             IRTemp cond, acc, tmp;
   4254             if ((theInstr >> 23) & 1) {
   4255                vpanic("VABAL should not be in dis_neon_data_3same");
   4256             }
   4257             if (Q) {
   4258                switch (size) {
   4259                   case 0:
   4260                      op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
   4261                      op_sub = Iop_Sub8x16;
   4262                      op_add = Iop_Add8x16;
   4263                      break;
   4264                   case 1:
   4265                      op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
   4266                      op_sub = Iop_Sub16x8;
   4267                      op_add = Iop_Add16x8;
   4268                      break;
   4269                   case 2:
   4270                      op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
   4271                      op_sub = Iop_Sub32x4;
   4272                      op_add = Iop_Add32x4;
   4273                      break;
   4274                   case 3:
   4275                      return False;
   4276                   default:
   4277                      vassert(0);
   4278                }
   4279             } else {
   4280                switch (size) {
   4281                   case 0:
   4282                      op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
   4283                      op_sub = Iop_Sub8x8;
   4284                      op_add = Iop_Add8x8;
   4285                      break;
   4286                   case 1:
   4287                      op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
   4288                      op_sub = Iop_Sub16x4;
   4289                      op_add = Iop_Add16x4;
   4290                      break;
   4291                   case 2:
   4292                      op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
   4293                      op_sub = Iop_Sub32x2;
   4294                      op_add = Iop_Add32x2;
   4295                      break;
   4296                   case 3:
   4297                      return False;
   4298                   default:
   4299                      vassert(0);
   4300                }
   4301             }
   4302             if (Q) {
   4303                cond = newTemp(Ity_V128);
   4304                acc = newTemp(Ity_V128);
   4305                tmp = newTemp(Ity_V128);
   4306                assign(acc, getQReg(dreg));
   4307             } else {
   4308                cond = newTemp(Ity_I64);
   4309                acc = newTemp(Ity_I64);
   4310                tmp = newTemp(Ity_I64);
   4311                assign(acc, getDRegI64(dreg));
   4312             }
   4313             assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
   4314             assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
   4315                               binop(Q ? Iop_AndV128 : Iop_And64,
   4316                                     binop(op_sub, mkexpr(arg_n),
   4317                                                   mkexpr(arg_m)),
   4318                                     mkexpr(cond)),
   4319                               binop(Q ? Iop_AndV128 : Iop_And64,
   4320                                     binop(op_sub, mkexpr(arg_m),
   4321                                                   mkexpr(arg_n)),
   4322                                     unop(Q ? Iop_NotV128 : Iop_Not64,
   4323                                          mkexpr(cond)))));
   4324             assign(res, binop(op_add, mkexpr(acc), mkexpr(tmp)));
   4325             DIP("vaba.%c%u %c%u, %c%u, %c%u\n",
   4326                 U ? 'u' : 's', 8 << size,
   4327                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4328                 mreg);
   4329          }
   4330          break;
   4331       case 8:
   4332          if (B == 0) {
   4333             IROp op;
   4334             if (U == 0) {
   4335                /* VADD  */
   4336                switch (size) {
   4337                   case 0: op = Q ? Iop_Add8x16 : Iop_Add8x8; break;
   4338                   case 1: op = Q ? Iop_Add16x8 : Iop_Add16x4; break;
   4339                   case 2: op = Q ? Iop_Add32x4 : Iop_Add32x2; break;
   4340                   case 3: op = Q ? Iop_Add64x2 : Iop_Add64; break;
   4341                   default: vassert(0);
   4342                }
   4343                DIP("vadd.i%u %c%u, %c%u, %c%u\n",
   4344                    8 << size, Q ? 'q' : 'd',
   4345                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4346             } else {
   4347                /* VSUB  */
   4348                switch (size) {
   4349                   case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
   4350                   case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
   4351                   case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
   4352                   case 3: op = Q ? Iop_Sub64x2 : Iop_Sub64; break;
   4353                   default: vassert(0);
   4354                }
   4355                DIP("vsub.i%u %c%u, %c%u, %c%u\n",
   4356                    8 << size, Q ? 'q' : 'd',
   4357                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4358             }
   4359             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4360          } else {
   4361             IROp op;
   4362             switch (size) {
   4363                case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
   4364                case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
   4365                case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
   4366                case 3: op = Q ? Iop_CmpNEZ64x2 : Iop_CmpwNEZ64; break;
   4367                default: vassert(0);
   4368             }
   4369             if (U == 0) {
   4370                /* VTST  */
   4371                assign(res, unop(op, binop(Q ? Iop_AndV128 : Iop_And64,
   4372                                           mkexpr(arg_n),
   4373                                           mkexpr(arg_m))));
   4374                DIP("vtst.%u %c%u, %c%u, %c%u\n",
   4375                    8 << size, Q ? 'q' : 'd',
   4376                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4377             } else {
   4378                /* VCEQ  */
   4379                assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
   4380                                 unop(op,
   4381                                      binop(Q ? Iop_XorV128 : Iop_Xor64,
   4382                                            mkexpr(arg_n),
   4383                                            mkexpr(arg_m)))));
   4384                DIP("vceq.i%u %c%u, %c%u, %c%u\n",
   4385                    8 << size, Q ? 'q' : 'd',
   4386                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4387             }
   4388          }
   4389          break;
   4390       case 9:
   4391          if (B == 0) {
   4392             /* VMLA, VMLS (integer) */
   4393             IROp op, op2;
   4394             UInt P = (theInstr >> 24) & 1;
   4395             if (P) {
   4396                switch (size) {
   4397                   case 0:
   4398                      op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
   4399                      op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   4400                      break;
   4401                   case 1:
   4402                      op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
   4403                      op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   4404                      break;
   4405                   case 2:
   4406                      op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
   4407                      op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   4408                      break;
   4409                   case 3:
   4410                      return False;
   4411                   default:
   4412                      vassert(0);
   4413                }
   4414             } else {
   4415                switch (size) {
   4416                   case 0:
   4417                      op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
   4418                      op2 = Q ? Iop_Add8x16 : Iop_Add8x8;
   4419                      break;
   4420                   case 1:
   4421                      op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
   4422                      op2 = Q ? Iop_Add16x8 : Iop_Add16x4;
   4423                      break;
   4424                   case 2:
   4425                      op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
   4426                      op2 = Q ? Iop_Add32x4 : Iop_Add32x2;
   4427                      break;
   4428                   case 3:
   4429                      return False;
   4430                   default:
   4431                      vassert(0);
   4432                }
   4433             }
   4434             assign(res, binop(op2,
   4435                               Q ? getQReg(dreg) : getDRegI64(dreg),
   4436                               binop(op, mkexpr(arg_n), mkexpr(arg_m))));
   4437             DIP("vml%c.i%u %c%u, %c%u, %c%u\n",
   4438                 P ? 's' : 'a', 8 << size,
   4439                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4440                 mreg);
   4441          } else {
   4442             /* VMUL */
   4443             IROp op;
   4444             UInt P = (theInstr >> 24) & 1;
   4445             if (P) {
   4446                switch (size) {
   4447                   case 0:
   4448                      op = Q ? Iop_PolynomialMul8x16 : Iop_PolynomialMul8x8;
   4449                      break;
   4450                   case 1: case 2: case 3: return False;
   4451                   default: vassert(0);
   4452                }
   4453             } else {
   4454                switch (size) {
   4455                   case 0: op = Q ? Iop_Mul8x16 : Iop_Mul8x8; break;
   4456                   case 1: op = Q ? Iop_Mul16x8 : Iop_Mul16x4; break;
   4457                   case 2: op = Q ? Iop_Mul32x4 : Iop_Mul32x2; break;
   4458                   case 3: return False;
   4459                   default: vassert(0);
   4460                }
   4461             }
   4462             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4463             DIP("vmul.%c%u %c%u, %c%u, %c%u\n",
   4464                 P ? 'p' : 'i', 8 << size,
   4465                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4466                 mreg);
   4467          }
   4468          break;
   4469       case 10: {
   4470          /* VPMAX, VPMIN  */
   4471          UInt P = (theInstr >> 4) & 1;
   4472          IROp op;
   4473          if (Q)
   4474             return False;
   4475          if (P) {
   4476             switch (size) {
   4477                case 0: op = U ? Iop_PwMin8Ux8  : Iop_PwMin8Sx8; break;
   4478                case 1: op = U ? Iop_PwMin16Ux4 : Iop_PwMin16Sx4; break;
   4479                case 2: op = U ? Iop_PwMin32Ux2 : Iop_PwMin32Sx2; break;
   4480                case 3: return False;
   4481                default: vassert(0);
   4482             }
   4483          } else {
   4484             switch (size) {
   4485                case 0: op = U ? Iop_PwMax8Ux8  : Iop_PwMax8Sx8; break;
   4486                case 1: op = U ? Iop_PwMax16Ux4 : Iop_PwMax16Sx4; break;
   4487                case 2: op = U ? Iop_PwMax32Ux2 : Iop_PwMax32Sx2; break;
   4488                case 3: return False;
   4489                default: vassert(0);
   4490             }
   4491          }
   4492          assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4493          DIP("vp%s.%c%u %c%u, %c%u, %c%u\n",
   4494              P ? "min" : "max", U ? 'u' : 's',
   4495              8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
   4496              Q ? 'q' : 'd', mreg);
   4497          break;
   4498       }
   4499       case 11:
   4500          if (B == 0) {
   4501             if (U == 0) {
   4502                /* VQDMULH  */
   4503                IROp op ,op2;
   4504                ULong imm;
   4505                switch (size) {
   4506                   case 0: case 3:
   4507                      return False;
   4508                   case 1:
   4509                      op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
   4510                      op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
   4511                      imm = 1LL << 15;
   4512                      imm = (imm << 16) | imm;
   4513                      imm = (imm << 32) | imm;
   4514                      break;
   4515                   case 2:
   4516                      op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
   4517                      op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
   4518                      imm = 1LL << 31;
   4519                      imm = (imm << 32) | imm;
   4520                      break;
   4521                   default:
   4522                      vassert(0);
   4523                }
   4524                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4525 #ifndef DISABLE_QC_FLAG
   4526                setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   4527                                 binop(op2, mkexpr(arg_n),
   4528                                            Q ? mkU128(imm) : mkU64(imm)),
   4529                                 binop(op2, mkexpr(arg_m),
   4530                                            Q ? mkU128(imm) : mkU64(imm))),
   4531                           Q ? mkU128(0) : mkU64(0),
   4532                           Q, condT);
   4533 #endif
   4534                DIP("vqdmulh.s%u %c%u, %c%u, %c%u\n",
   4535                    8 << size, Q ? 'q' : 'd',
   4536                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4537             } else {
   4538                /* VQRDMULH */
   4539                IROp op ,op2;
   4540                ULong imm;
   4541                switch(size) {
   4542                   case 0: case 3:
   4543                      return False;
   4544                   case 1:
   4545                      imm = 1LL << 15;
   4546                      imm = (imm << 16) | imm;
   4547                      imm = (imm << 32) | imm;
   4548                      op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
   4549                      op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
   4550                      break;
   4551                   case 2:
   4552                      imm = 1LL << 31;
   4553                      imm = (imm << 32) | imm;
   4554                      op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
   4555                      op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
   4556                      break;
   4557                   default:
   4558                      vassert(0);
   4559                }
   4560                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4561 #ifndef DISABLE_QC_FLAG
   4562                setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   4563                                 binop(op2, mkexpr(arg_n),
   4564                                            Q ? mkU128(imm) : mkU64(imm)),
   4565                                 binop(op2, mkexpr(arg_m),
   4566                                            Q ? mkU128(imm) : mkU64(imm))),
   4567                           Q ? mkU128(0) : mkU64(0),
   4568                           Q, condT);
   4569 #endif
   4570                DIP("vqrdmulh.s%u %c%u, %c%u, %c%u\n",
   4571                    8 << size, Q ? 'q' : 'd',
   4572                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4573             }
   4574          } else {
   4575             if (U == 0) {
   4576                /* VPADD */
   4577                IROp op;
   4578                if (Q)
   4579                   return False;
   4580                switch (size) {
   4581                   case 0: op = Q ? Iop_PwAdd8x16 : Iop_PwAdd8x8;  break;
   4582                   case 1: op = Q ? Iop_PwAdd16x8 : Iop_PwAdd16x4; break;
   4583                   case 2: op = Q ? Iop_PwAdd32x4 : Iop_PwAdd32x2; break;
   4584                   case 3: return False;
   4585                   default: vassert(0);
   4586                }
   4587                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4588                DIP("vpadd.i%d %c%u, %c%u, %c%u\n",
   4589                    8 << size, Q ? 'q' : 'd',
   4590                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4591             }
   4592          }
   4593          break;
   4594       /* Starting from here these are FP SIMD cases */
   4595       case 13:
   4596          if (B == 0) {
   4597             IROp op;
   4598             if (U == 0) {
   4599                if ((C >> 1) == 0) {
   4600                   /* VADD  */
   4601                   op = Q ? Iop_Add32Fx4 : Iop_Add32Fx2 ;
   4602                   DIP("vadd.f32 %c%u, %c%u, %c%u\n",
   4603                       Q ? 'q' : 'd', dreg,
   4604                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4605                } else {
   4606                   /* VSUB  */
   4607                   op = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2 ;
   4608                   DIP("vsub.f32 %c%u, %c%u, %c%u\n",
   4609                       Q ? 'q' : 'd', dreg,
   4610                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4611                }
   4612             } else {
   4613                if ((C >> 1) == 0) {
   4614                   /* VPADD */
   4615                   if (Q)
   4616                      return False;
   4617                   op = Iop_PwAdd32Fx2;
   4618                   DIP("vpadd.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
   4619                } else {
   4620                   /* VABD  */
   4621                   if (Q) {
   4622                      assign(res, unop(Iop_Abs32Fx4,
   4623                                       binop(Iop_Sub32Fx4,
   4624                                             mkexpr(arg_n),
   4625                                             mkexpr(arg_m))));
   4626                   } else {
   4627                      assign(res, unop(Iop_Abs32Fx2,
   4628                                       binop(Iop_Sub32Fx2,
   4629                                             mkexpr(arg_n),
   4630                                             mkexpr(arg_m))));
   4631                   }
   4632                   DIP("vabd.f32 %c%u, %c%u, %c%u\n",
   4633                       Q ? 'q' : 'd', dreg,
   4634                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4635                   break;
   4636                }
   4637             }
   4638             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4639          } else {
   4640             if (U == 0) {
   4641                /* VMLA, VMLS  */
   4642                IROp op, op2;
   4643                UInt P = (theInstr >> 21) & 1;
   4644                if (P) {
   4645                   switch (size & 1) {
   4646                      case 0:
   4647                         op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
   4648                         op2 = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
   4649                         break;
   4650                      case 1: return False;
   4651                      default: vassert(0);
   4652                   }
   4653                } else {
   4654                   switch (size & 1) {
   4655                      case 0:
   4656                         op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
   4657                         op2 = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
   4658                         break;
   4659                      case 1: return False;
   4660                      default: vassert(0);
   4661                   }
   4662                }
   4663                assign(res, binop(op2,
   4664                                  Q ? getQReg(dreg) : getDRegI64(dreg),
   4665                                  binop(op, mkexpr(arg_n), mkexpr(arg_m))));
   4666 
   4667                DIP("vml%c.f32 %c%u, %c%u, %c%u\n",
   4668                    P ? 's' : 'a', Q ? 'q' : 'd',
   4669                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   4670             } else {
   4671                /* VMUL  */
   4672                IROp op;
   4673                if ((C >> 1) != 0)
   4674                   return False;
   4675                op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2 ;
   4676                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4677                DIP("vmul.f32 %c%u, %c%u, %c%u\n",
   4678                    Q ? 'q' : 'd'<