Home | History | Annotate | Download | only in priv
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- begin                                       guest_arm_toIR.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2010 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    NEON support is
     14    Copyright (C) 2010-2010 Samsung Electronics
     15    contributed by Dmitry Zhurikhin <zhur (at) ispras.ru>
     16               and Kirill Batuzov <batuzovk (at) ispras.ru>
     17 
     18    This program is free software; you can redistribute it and/or
     19    modify it under the terms of the GNU General Public License as
     20    published by the Free Software Foundation; either version 2 of the
     21    License, or (at your option) any later version.
     22 
     23    This program is distributed in the hope that it will be useful, but
     24    WITHOUT ANY WARRANTY; without even the implied warranty of
     25    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     26    General Public License for more details.
     27 
     28    You should have received a copy of the GNU General Public License
     29    along with this program; if not, write to the Free Software
     30    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     31    02110-1301, USA.
     32 
     33    The GNU General Public License is contained in the file COPYING.
     34 */
     35 
     36 /* XXXX thumb to check:
     37    that all cases where putIRegT writes r15, we generate a jump.
     38 
     39    All uses of newTemp assign to an IRTemp and not a UInt
     40 
     41    For all thumb loads and stores, including VFP ones, new-ITSTATE is
     42    backed out before the memory op, and restored afterwards.  This
     43    needs to happen even after we go uncond.  (and for sure it doesn't
     44    happen for VFP loads/stores right now).
     45 
     46    VFP on thumb: check that we exclude all r13/r15 cases that we
     47    should.
     48 
     49    XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
     50    taking into account the number of insns guarded by an IT.
     51 
     52    remove the nasty hack, in the spechelper, of looking for Or32(...,
     53    0xE0) in as the first arg to armg_calculate_condition, and instead
     54    use Slice44 as specified in comments in the spechelper.
     55 
     56    add specialisations for armg_calculate_flag_c and _v, as they
     57    are moderately often needed in Thumb code.
     58 
     59    Correctness: ITSTATE handling in Thumb SVCs is wrong.
     60 
     61    Correctness (obscure): in m_transtab, when invalidating code
     62    address ranges, invalidate up to 18 bytes after the end of the
     63    range.  This is because the ITSTATE optimisation at the top of
     64    _THUMB_WRK below analyses up to 18 bytes before the start of any
     65    given instruction, and so might depend on the invalidated area.
     66 */
     67 
     68 /* Limitations, etc
     69 
     70    - pretty dodgy exception semantics for {LD,ST}Mxx, no doubt
     71 
     72    - SWP: the restart jump back is Ijk_Boring; it should be
     73      Ijk_NoRedir but that's expensive.  See comments on casLE() in
     74      guest_x86_toIR.c.
     75 */
     76 
     77 /* "Special" instructions.
     78 
     79    This instruction decoder can decode four special instructions
     80    which mean nothing natively (are no-ops as far as regs/mem are
     81    concerned) but have meaning for supporting Valgrind.  A special
     82    instruction is flagged by a 16-byte preamble:
     83 
     84       E1A0C1EC E1A0C6EC E1A0CEEC E1A0C9EC
     85       (mov r12, r12, ROR #3;   mov r12, r12, ROR #13;
     86        mov r12, r12, ROR #29;  mov r12, r12, ROR #19)
     87 
     88    Following that, one of the following 3 are allowed
     89    (standard interpretation in parentheses):
     90 
     91       E18AA00A (orr r10,r10,r10)   R3 = client_request ( R4 )
     92       E18BB00B (orr r11,r11,r11)   R3 = guest_NRADDR
     93       E18CC00C (orr r12,r12,r12)   branch-and-link-to-noredir R4
     94 
     95    Any other bytes following the 16-byte preamble are illegal and
     96    constitute a failure in instruction decoding.  This all assumes
     97    that the preamble will never occur except in specific code
     98    fragments designed for Valgrind to catch.
     99 */
    100 
    101 /* Translates ARM(v5) code to IR. */
    102 
    103 #include "libvex_basictypes.h"
    104 #include "libvex_ir.h"
    105 #include "libvex.h"
    106 #include "libvex_guest_arm.h"
    107 
    108 #include "main_util.h"
    109 #include "main_globals.h"
    110 #include "guest_generic_bb_to_IR.h"
    111 #include "guest_arm_defs.h"
    112 
    113 
    114 /*------------------------------------------------------------*/
    115 /*--- Globals                                              ---*/
    116 /*------------------------------------------------------------*/
    117 
    118 /* These are set at the start of the translation of a instruction, so
    119    that we don't have to pass them around endlessly.  CONST means does
    120    not change during translation of the instruction.
    121 */
    122 
    123 /* CONST: is the host bigendian?  This has to do with float vs double
    124    register accesses on VFP, but it's complex and not properly thought
    125    out. */
    126 static Bool host_is_bigendian;
    127 
    128 /* CONST: The guest address for the instruction currently being
    129    translated.  This is the real, "decoded" address (not subject
    130    to the CPSR.T kludge). */
    131 static Addr32 guest_R15_curr_instr_notENC;
    132 
    133 /* CONST, FOR ASSERTIONS ONLY.  Indicates whether currently processed
    134    insn is Thumb (True) or ARM (False). */
    135 static Bool __curr_is_Thumb;
    136 
    137 /* MOD: The IRSB* into which we're generating code. */
    138 static IRSB* irsb;
    139 
    140 /* These are to do with handling writes to r15.  They are initially
    141    set at the start of disInstr_ARM_WRK to indicate no update,
    142    possibly updated during the routine, and examined again at the end.
    143    If they have been set to indicate a r15 update then a jump is
    144    generated.  Note, "explicit" jumps (b, bx, etc) are generated
    145    directly, not using this mechanism -- this is intended to handle
    146    the implicit-style jumps resulting from (eg) assigning to r15 as
    147    the result of insns we wouldn't normally consider branchy. */
    148 
    149 /* MOD.  Initially False; set to True iff abovementioned handling is
    150    required. */
    151 static Bool r15written;
    152 
    153 /* MOD.  Initially IRTemp_INVALID.  If the r15 branch to be generated
    154    is conditional, this holds the gating IRTemp :: Ity_I32.  If the
    155    branch to be generated is unconditional, this remains
    156    IRTemp_INVALID. */
    157 static IRTemp r15guard; /* :: Ity_I32, 0 or 1 */
    158 
    159 /* MOD.  Initially Ijk_Boring.  If an r15 branch is to be generated,
    160    this holds the jump kind. */
    161 static IRTemp r15kind;
    162 
    163 
    164 /*------------------------------------------------------------*/
    165 /*--- Debugging output                                     ---*/
    166 /*------------------------------------------------------------*/
    167 
    168 #define DIP(format, args...)           \
    169    if (vex_traceflags & VEX_TRACE_FE)  \
    170       vex_printf(format, ## args)
    171 
    172 #define DIS(buf, format, args...)      \
    173    if (vex_traceflags & VEX_TRACE_FE)  \
    174       vex_sprintf(buf, format, ## args)
    175 
    176 #define ASSERT_IS_THUMB \
    177    do { vassert(__curr_is_Thumb); } while (0)
    178 
    179 #define ASSERT_IS_ARM \
    180    do { vassert(! __curr_is_Thumb); } while (0)
    181 
    182 
    183 /*------------------------------------------------------------*/
    184 /*--- Helper bits and pieces for deconstructing the        ---*/
    185 /*--- arm insn stream.                                     ---*/
    186 /*------------------------------------------------------------*/
    187 
    188 /* Do a little-endian load of a 32-bit word, regardless of the
    189    endianness of the underlying host. */
    190 static inline UInt getUIntLittleEndianly ( UChar* p )
    191 {
    192    UInt w = 0;
    193    w = (w << 8) | p[3];
    194    w = (w << 8) | p[2];
    195    w = (w << 8) | p[1];
    196    w = (w << 8) | p[0];
    197    return w;
    198 }
    199 
    200 /* Do a little-endian load of a 16-bit word, regardless of the
    201    endianness of the underlying host. */
    202 static inline UShort getUShortLittleEndianly ( UChar* p )
    203 {
    204    UShort w = 0;
    205    w = (w << 8) | p[1];
    206    w = (w << 8) | p[0];
    207    return w;
    208 }
    209 
    210 static UInt ROR32 ( UInt x, UInt sh ) {
    211    vassert(sh >= 0 && sh < 32);
    212    if (sh == 0)
    213       return x;
    214    else
    215       return (x << (32-sh)) | (x >> sh);
    216 }
    217 
    218 static Int popcount32 ( UInt x )
    219 {
    220    Int res = 0, i;
    221    for (i = 0; i < 32; i++) {
    222       res += (x & 1);
    223       x >>= 1;
    224    }
    225    return res;
    226 }
    227 
    228 static UInt setbit32 ( UInt x, Int ix, UInt b )
    229 {
    230    UInt mask = 1 << ix;
    231    x &= ~mask;
    232    x |= ((b << ix) & mask);
    233    return x;
    234 }
    235 
    236 #define BITS2(_b1,_b0) \
    237    (((_b1) << 1) | (_b0))
    238 
    239 #define BITS3(_b2,_b1,_b0)                      \
    240   (((_b2) << 2) | ((_b1) << 1) | (_b0))
    241 
    242 #define BITS4(_b3,_b2,_b1,_b0) \
    243    (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
    244 
    245 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    246    ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
    247     | BITS4((_b3),(_b2),(_b1),(_b0)))
    248 
    249 #define BITS5(_b4,_b3,_b2,_b1,_b0)  \
    250    (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
    251 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0)  \
    252    (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    253 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    254    (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    255 
    256 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)      \
    257    (((_b8) << 8) \
    258     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    259 
    260 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    261    (((_b9) << 9) | ((_b8) << 8)                                \
    262     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    263 
    264 /* produces _uint[_bMax:_bMin] */
    265 #define SLICE_UInt(_uint,_bMax,_bMin) \
    266    (( ((UInt)(_uint)) >> (_bMin)) \
    267     & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
    268 
    269 
    270 /*------------------------------------------------------------*/
    271 /*--- Helper bits and pieces for creating IR fragments.    ---*/
    272 /*------------------------------------------------------------*/
    273 
    274 static IRExpr* mkU64 ( ULong i )
    275 {
    276    return IRExpr_Const(IRConst_U64(i));
    277 }
    278 
    279 static IRExpr* mkU32 ( UInt i )
    280 {
    281    return IRExpr_Const(IRConst_U32(i));
    282 }
    283 
    284 static IRExpr* mkU8 ( UInt i )
    285 {
    286    vassert(i < 256);
    287    return IRExpr_Const(IRConst_U8( (UChar)i ));
    288 }
    289 
    290 static IRExpr* mkexpr ( IRTemp tmp )
    291 {
    292    return IRExpr_RdTmp(tmp);
    293 }
    294 
    295 static IRExpr* unop ( IROp op, IRExpr* a )
    296 {
    297    return IRExpr_Unop(op, a);
    298 }
    299 
    300 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
    301 {
    302    return IRExpr_Binop(op, a1, a2);
    303 }
    304 
    305 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
    306 {
    307    return IRExpr_Triop(op, a1, a2, a3);
    308 }
    309 
    310 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
    311 {
    312    return IRExpr_Load(Iend_LE, ty, addr);
    313 }
    314 
    315 /* Add a statement to the list held by "irbb". */
    316 static void stmt ( IRStmt* st )
    317 {
    318    addStmtToIRSB( irsb, st );
    319 }
    320 
    321 static void assign ( IRTemp dst, IRExpr* e )
    322 {
    323    stmt( IRStmt_WrTmp(dst, e) );
    324 }
    325 
    326 static void storeLE ( IRExpr* addr, IRExpr* data )
    327 {
    328    stmt( IRStmt_Store(Iend_LE, addr, data) );
    329 }
    330 
    331 /* Generate a new temporary of the given type. */
    332 static IRTemp newTemp ( IRType ty )
    333 {
    334    vassert(isPlausibleIRType(ty));
    335    return newIRTemp( irsb->tyenv, ty );
    336 }
    337 
    338 /* Produces a value in 0 .. 3, which is encoded as per the type
    339    IRRoundingMode. */
    340 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
    341 {
    342    return mkU32(Irrm_NEAREST);
    343 }
    344 
    345 /* Generate an expression for SRC rotated right by ROT. */
    346 static IRExpr* genROR32( IRTemp src, Int rot )
    347 {
    348    vassert(rot >= 0 && rot < 32);
    349    if (rot == 0)
    350       return mkexpr(src);
    351    return
    352       binop(Iop_Or32,
    353             binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
    354             binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
    355 }
    356 
    357 static IRExpr* mkU128 ( ULong i )
    358 {
    359    return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
    360 }
    361 
    362 /* Generate a 4-aligned version of the given expression if
    363    the given condition is true.  Else return it unchanged. */
    364 static IRExpr* align4if ( IRExpr* e, Bool b )
    365 {
    366    if (b)
    367       return binop(Iop_And32, e, mkU32(~3));
    368    else
    369       return e;
    370 }
    371 
    372 
    373 /*------------------------------------------------------------*/
    374 /*--- Helpers for accessing guest registers.               ---*/
    375 /*------------------------------------------------------------*/
    376 
    377 #define OFFB_R0       offsetof(VexGuestARMState,guest_R0)
    378 #define OFFB_R1       offsetof(VexGuestARMState,guest_R1)
    379 #define OFFB_R2       offsetof(VexGuestARMState,guest_R2)
    380 #define OFFB_R3       offsetof(VexGuestARMState,guest_R3)
    381 #define OFFB_R4       offsetof(VexGuestARMState,guest_R4)
    382 #define OFFB_R5       offsetof(VexGuestARMState,guest_R5)
    383 #define OFFB_R6       offsetof(VexGuestARMState,guest_R6)
    384 #define OFFB_R7       offsetof(VexGuestARMState,guest_R7)
    385 #define OFFB_R8       offsetof(VexGuestARMState,guest_R8)
    386 #define OFFB_R9       offsetof(VexGuestARMState,guest_R9)
    387 #define OFFB_R10      offsetof(VexGuestARMState,guest_R10)
    388 #define OFFB_R11      offsetof(VexGuestARMState,guest_R11)
    389 #define OFFB_R12      offsetof(VexGuestARMState,guest_R12)
    390 #define OFFB_R13      offsetof(VexGuestARMState,guest_R13)
    391 #define OFFB_R14      offsetof(VexGuestARMState,guest_R14)
    392 #define OFFB_R15T     offsetof(VexGuestARMState,guest_R15T)
    393 
    394 #define OFFB_CC_OP    offsetof(VexGuestARMState,guest_CC_OP)
    395 #define OFFB_CC_DEP1  offsetof(VexGuestARMState,guest_CC_DEP1)
    396 #define OFFB_CC_DEP2  offsetof(VexGuestARMState,guest_CC_DEP2)
    397 #define OFFB_CC_NDEP  offsetof(VexGuestARMState,guest_CC_NDEP)
    398 #define OFFB_NRADDR   offsetof(VexGuestARMState,guest_NRADDR)
    399 
    400 #define OFFB_D0       offsetof(VexGuestARMState,guest_D0)
    401 #define OFFB_D1       offsetof(VexGuestARMState,guest_D1)
    402 #define OFFB_D2       offsetof(VexGuestARMState,guest_D2)
    403 #define OFFB_D3       offsetof(VexGuestARMState,guest_D3)
    404 #define OFFB_D4       offsetof(VexGuestARMState,guest_D4)
    405 #define OFFB_D5       offsetof(VexGuestARMState,guest_D5)
    406 #define OFFB_D6       offsetof(VexGuestARMState,guest_D6)
    407 #define OFFB_D7       offsetof(VexGuestARMState,guest_D7)
    408 #define OFFB_D8       offsetof(VexGuestARMState,guest_D8)
    409 #define OFFB_D9       offsetof(VexGuestARMState,guest_D9)
    410 #define OFFB_D10      offsetof(VexGuestARMState,guest_D10)
    411 #define OFFB_D11      offsetof(VexGuestARMState,guest_D11)
    412 #define OFFB_D12      offsetof(VexGuestARMState,guest_D12)
    413 #define OFFB_D13      offsetof(VexGuestARMState,guest_D13)
    414 #define OFFB_D14      offsetof(VexGuestARMState,guest_D14)
    415 #define OFFB_D15      offsetof(VexGuestARMState,guest_D15)
    416 #define OFFB_D16      offsetof(VexGuestARMState,guest_D16)
    417 #define OFFB_D17      offsetof(VexGuestARMState,guest_D17)
    418 #define OFFB_D18      offsetof(VexGuestARMState,guest_D18)
    419 #define OFFB_D19      offsetof(VexGuestARMState,guest_D19)
    420 #define OFFB_D20      offsetof(VexGuestARMState,guest_D20)
    421 #define OFFB_D21      offsetof(VexGuestARMState,guest_D21)
    422 #define OFFB_D22      offsetof(VexGuestARMState,guest_D22)
    423 #define OFFB_D23      offsetof(VexGuestARMState,guest_D23)
    424 #define OFFB_D24      offsetof(VexGuestARMState,guest_D24)
    425 #define OFFB_D25      offsetof(VexGuestARMState,guest_D25)
    426 #define OFFB_D26      offsetof(VexGuestARMState,guest_D26)
    427 #define OFFB_D27      offsetof(VexGuestARMState,guest_D27)
    428 #define OFFB_D28      offsetof(VexGuestARMState,guest_D28)
    429 #define OFFB_D29      offsetof(VexGuestARMState,guest_D29)
    430 #define OFFB_D30      offsetof(VexGuestARMState,guest_D30)
    431 #define OFFB_D31      offsetof(VexGuestARMState,guest_D31)
    432 
    433 #define OFFB_FPSCR    offsetof(VexGuestARMState,guest_FPSCR)
    434 #define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO)
    435 #define OFFB_ITSTATE  offsetof(VexGuestARMState,guest_ITSTATE)
    436 #define OFFB_QFLAG32  offsetof(VexGuestARMState,guest_QFLAG32)
    437 #define OFFB_GEFLAG0  offsetof(VexGuestARMState,guest_GEFLAG0)
    438 #define OFFB_GEFLAG1  offsetof(VexGuestARMState,guest_GEFLAG1)
    439 #define OFFB_GEFLAG2  offsetof(VexGuestARMState,guest_GEFLAG2)
    440 #define OFFB_GEFLAG3  offsetof(VexGuestARMState,guest_GEFLAG3)
    441 
    442 
    443 /* ---------------- Integer registers ---------------- */
    444 
    445 static Int integerGuestRegOffset ( UInt iregNo )
    446 {
    447    /* Do we care about endianness here?  We do if sub-parts of integer
    448       registers are accessed, but I don't think that ever happens on
    449       ARM. */
    450    switch (iregNo) {
    451       case 0:  return OFFB_R0;
    452       case 1:  return OFFB_R1;
    453       case 2:  return OFFB_R2;
    454       case 3:  return OFFB_R3;
    455       case 4:  return OFFB_R4;
    456       case 5:  return OFFB_R5;
    457       case 6:  return OFFB_R6;
    458       case 7:  return OFFB_R7;
    459       case 8:  return OFFB_R8;
    460       case 9:  return OFFB_R9;
    461       case 10: return OFFB_R10;
    462       case 11: return OFFB_R11;
    463       case 12: return OFFB_R12;
    464       case 13: return OFFB_R13;
    465       case 14: return OFFB_R14;
    466       case 15: return OFFB_R15T;
    467       default: vassert(0);
    468    }
    469 }
    470 
    471 /* Plain ("low level") read from a reg; no +8 offset magic for r15. */
    472 static IRExpr* llGetIReg ( UInt iregNo )
    473 {
    474    vassert(iregNo < 16);
    475    return IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
    476 }
    477 
    478 /* Architected read from a reg in ARM mode.  This automagically adds 8
    479    to all reads of r15. */
    480 static IRExpr* getIRegA ( UInt iregNo )
    481 {
    482    IRExpr* e;
    483    ASSERT_IS_ARM;
    484    vassert(iregNo < 16);
    485    if (iregNo == 15) {
    486       /* If asked for r15, don't read the guest state value, as that
    487          may not be up to date in the case where loop unrolling has
    488          happened, because the first insn's write to the block is
    489          omitted; hence in the 2nd and subsequent unrollings we don't
    490          have a correct value in guest r15.  Instead produce the
    491          constant that we know would be produced at this point. */
    492       vassert(0 == (guest_R15_curr_instr_notENC & 3));
    493       e = mkU32(guest_R15_curr_instr_notENC + 8);
    494    } else {
    495       e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
    496    }
    497    return e;
    498 }
    499 
    500 /* Architected read from a reg in Thumb mode.  This automagically adds
    501    4 to all reads of r15. */
    502 static IRExpr* getIRegT ( UInt iregNo )
    503 {
    504    IRExpr* e;
    505    ASSERT_IS_THUMB;
    506    vassert(iregNo < 16);
    507    if (iregNo == 15) {
    508       /* Ditto comment in getIReg. */
    509       vassert(0 == (guest_R15_curr_instr_notENC & 1));
    510       e = mkU32(guest_R15_curr_instr_notENC + 4);
    511    } else {
    512       e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
    513    }
    514    return e;
    515 }
    516 
    517 /* Plain ("low level") write to a reg; no jump or alignment magic for
    518    r15. */
    519 static void llPutIReg ( UInt iregNo, IRExpr* e )
    520 {
    521    vassert(iregNo < 16);
    522    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
    523    stmt( IRStmt_Put(integerGuestRegOffset(iregNo), e) );
    524 }
    525 
    526 /* Architected write to an integer register in ARM mode.  If it is to
    527    r15, record info so at the end of this insn's translation, a branch
    528    to it can be made.  Also handles conditional writes to the
    529    register: if guardT == IRTemp_INVALID then the write is
    530    unconditional.  If writing r15, also 4-align it. */
    531 static void putIRegA ( UInt       iregNo,
    532                        IRExpr*    e,
    533                        IRTemp     guardT /* :: Ity_I32, 0 or 1 */,
    534                        IRJumpKind jk /* if a jump is generated */ )
    535 {
    536    /* if writing r15, force e to be 4-aligned. */
    537    // INTERWORKING FIXME.  this needs to be relaxed so that
    538    // puts caused by LDMxx which load r15 interwork right.
    539    // but is no aligned too relaxed?
    540    //if (iregNo == 15)
    541    //   e = binop(Iop_And32, e, mkU32(~3));
    542    ASSERT_IS_ARM;
    543    /* So, generate either an unconditional or a conditional write to
    544       the reg. */
    545    if (guardT == IRTemp_INVALID) {
    546       /* unconditional write */
    547       llPutIReg( iregNo, e );
    548    } else {
    549       llPutIReg( iregNo,
    550                  IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
    551                                llGetIReg(iregNo),
    552                                e ));
    553    }
    554    if (iregNo == 15) {
    555       // assert against competing r15 updates.  Shouldn't
    556       // happen; should be ruled out by the instr matching
    557       // logic.
    558       vassert(r15written == False);
    559       vassert(r15guard   == IRTemp_INVALID);
    560       vassert(r15kind    == Ijk_Boring);
    561       r15written = True;
    562       r15guard   = guardT;
    563       r15kind    = jk;
    564    }
    565 }
    566 
    567 
    568 /* Architected write to an integer register in Thumb mode.  Writes to
    569    r15 are not allowed.  Handles conditional writes to the register:
    570    if guardT == IRTemp_INVALID then the write is unconditional. */
    571 static void putIRegT ( UInt       iregNo,
    572                        IRExpr*    e,
    573                        IRTemp     guardT /* :: Ity_I32, 0 or 1 */ )
    574 {
    575    /* So, generate either an unconditional or a conditional write to
    576       the reg. */
    577    ASSERT_IS_THUMB;
    578    vassert(iregNo >= 0 && iregNo <= 14);
    579    if (guardT == IRTemp_INVALID) {
    580       /* unconditional write */
    581       llPutIReg( iregNo, e );
    582    } else {
    583       llPutIReg( iregNo,
    584                  IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
    585                                llGetIReg(iregNo),
    586                                e ));
    587    }
    588 }
    589 
    590 
    591 /* Thumb16 and Thumb32 only.
    592    Returns true if reg is 13 or 15.  Implements the BadReg
    593    predicate in the ARM ARM. */
    594 static Bool isBadRegT ( UInt r )
    595 {
    596    vassert(r <= 15);
    597    ASSERT_IS_THUMB;
    598    return r == 13 || r == 15;
    599 }
    600 
    601 
    602 /* ---------------- Double registers ---------------- */
    603 
    604 static Int doubleGuestRegOffset ( UInt dregNo )
    605 {
    606    /* Do we care about endianness here?  Probably do if we ever get
    607       into the situation of dealing with the single-precision VFP
    608       registers. */
    609    switch (dregNo) {
    610       case 0:  return OFFB_D0;
    611       case 1:  return OFFB_D1;
    612       case 2:  return OFFB_D2;
    613       case 3:  return OFFB_D3;
    614       case 4:  return OFFB_D4;
    615       case 5:  return OFFB_D5;
    616       case 6:  return OFFB_D6;
    617       case 7:  return OFFB_D7;
    618       case 8:  return OFFB_D8;
    619       case 9:  return OFFB_D9;
    620       case 10: return OFFB_D10;
    621       case 11: return OFFB_D11;
    622       case 12: return OFFB_D12;
    623       case 13: return OFFB_D13;
    624       case 14: return OFFB_D14;
    625       case 15: return OFFB_D15;
    626       case 16: return OFFB_D16;
    627       case 17: return OFFB_D17;
    628       case 18: return OFFB_D18;
    629       case 19: return OFFB_D19;
    630       case 20: return OFFB_D20;
    631       case 21: return OFFB_D21;
    632       case 22: return OFFB_D22;
    633       case 23: return OFFB_D23;
    634       case 24: return OFFB_D24;
    635       case 25: return OFFB_D25;
    636       case 26: return OFFB_D26;
    637       case 27: return OFFB_D27;
    638       case 28: return OFFB_D28;
    639       case 29: return OFFB_D29;
    640       case 30: return OFFB_D30;
    641       case 31: return OFFB_D31;
    642       default: vassert(0);
    643    }
    644 }
    645 
    646 /* Plain ("low level") read from a VFP Dreg. */
    647 static IRExpr* llGetDReg ( UInt dregNo )
    648 {
    649    vassert(dregNo < 32);
    650    return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_F64 );
    651 }
    652 
    653 /* Architected read from a VFP Dreg. */
    654 static IRExpr* getDReg ( UInt dregNo ) {
    655    return llGetDReg( dregNo );
    656 }
    657 
    658 /* Plain ("low level") write to a VFP Dreg. */
    659 static void llPutDReg ( UInt dregNo, IRExpr* e )
    660 {
    661    vassert(dregNo < 32);
    662    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64);
    663    stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
    664 }
    665 
    666 /* Architected write to a VFP Dreg.  Handles conditional writes to the
    667    register: if guardT == IRTemp_INVALID then the write is
    668    unconditional. */
    669 static void putDReg ( UInt    dregNo,
    670                       IRExpr* e,
    671                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    672 {
    673    /* So, generate either an unconditional or a conditional write to
    674       the reg. */
    675    if (guardT == IRTemp_INVALID) {
    676       /* unconditional write */
    677       llPutDReg( dregNo, e );
    678    } else {
    679       llPutDReg( dregNo,
    680                  IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
    681                                llGetDReg(dregNo),
    682                                e ));
    683    }
    684 }
    685 
    686 /* And now exactly the same stuff all over again, but this time
    687    taking/returning I64 rather than F64, to support 64-bit Neon
    688    ops. */
    689 
    690 /* Plain ("low level") read from a Neon Integer Dreg. */
    691 static IRExpr* llGetDRegI64 ( UInt dregNo )
    692 {
    693    vassert(dregNo < 32);
    694    return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_I64 );
    695 }
    696 
    697 /* Architected read from a Neon Integer Dreg. */
    698 static IRExpr* getDRegI64 ( UInt dregNo ) {
    699    return llGetDRegI64( dregNo );
    700 }
    701 
    702 /* Plain ("low level") write to a Neon Integer Dreg. */
    703 static void llPutDRegI64 ( UInt dregNo, IRExpr* e )
    704 {
    705    vassert(dregNo < 32);
    706    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
    707    stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
    708 }
    709 
    710 /* Architected write to a Neon Integer Dreg.  Handles conditional
    711    writes to the register: if guardT == IRTemp_INVALID then the write
    712    is unconditional. */
    713 static void putDRegI64 ( UInt    dregNo,
    714                          IRExpr* e,
    715                          IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    716 {
    717    /* So, generate either an unconditional or a conditional write to
    718       the reg. */
    719    if (guardT == IRTemp_INVALID) {
    720       /* unconditional write */
    721       llPutDRegI64( dregNo, e );
    722    } else {
    723       llPutDRegI64( dregNo,
    724                     IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
    725                                   llGetDRegI64(dregNo),
    726                                   e ));
    727    }
    728 }
    729 
    730 /* ---------------- Quad registers ---------------- */
    731 
    732 static Int quadGuestRegOffset ( UInt qregNo )
    733 {
    734    /* Do we care about endianness here?  Probably do if we ever get
    735       into the situation of dealing with the 64 bit Neon registers. */
    736    switch (qregNo) {
    737       case 0:  return OFFB_D0;
    738       case 1:  return OFFB_D2;
    739       case 2:  return OFFB_D4;
    740       case 3:  return OFFB_D6;
    741       case 4:  return OFFB_D8;
    742       case 5:  return OFFB_D10;
    743       case 6:  return OFFB_D12;
    744       case 7:  return OFFB_D14;
    745       case 8:  return OFFB_D16;
    746       case 9:  return OFFB_D18;
    747       case 10: return OFFB_D20;
    748       case 11: return OFFB_D22;
    749       case 12: return OFFB_D24;
    750       case 13: return OFFB_D26;
    751       case 14: return OFFB_D28;
    752       case 15: return OFFB_D30;
    753       default: vassert(0);
    754    }
    755 }
    756 
    757 /* Plain ("low level") read from a Neon Qreg. */
    758 static IRExpr* llGetQReg ( UInt qregNo )
    759 {
    760    vassert(qregNo < 16);
    761    return IRExpr_Get( quadGuestRegOffset(qregNo), Ity_V128 );
    762 }
    763 
    764 /* Architected read from a Neon Qreg. */
    765 static IRExpr* getQReg ( UInt qregNo ) {
    766    return llGetQReg( qregNo );
    767 }
    768 
    769 /* Plain ("low level") write to a Neon Qreg. */
    770 static void llPutQReg ( UInt qregNo, IRExpr* e )
    771 {
    772    vassert(qregNo < 16);
    773    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
    774    stmt( IRStmt_Put(quadGuestRegOffset(qregNo), e) );
    775 }
    776 
    777 /* Architected write to a Neon Qreg.  Handles conditional writes to the
    778    register: if guardT == IRTemp_INVALID then the write is
    779    unconditional. */
    780 static void putQReg ( UInt    qregNo,
    781                       IRExpr* e,
    782                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    783 {
    784    /* So, generate either an unconditional or a conditional write to
    785       the reg. */
    786    if (guardT == IRTemp_INVALID) {
    787       /* unconditional write */
    788       llPutQReg( qregNo, e );
    789    } else {
    790       llPutQReg( qregNo,
    791                  IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
    792                                llGetQReg(qregNo),
    793                                e ));
    794    }
    795 }
    796 
    797 
    798 /* ---------------- Float registers ---------------- */
    799 
    800 static Int floatGuestRegOffset ( UInt fregNo )
    801 {
    802    /* Start with the offset of the containing double, and then correct
    803       for endianness.  Actually this is completely bogus and needs
    804       careful thought. */
    805    Int off;
    806    vassert(fregNo < 32);
    807    off = doubleGuestRegOffset(fregNo >> 1);
    808    if (host_is_bigendian) {
    809       vassert(0);
    810    } else {
    811       if (fregNo & 1)
    812          off += 4;
    813    }
    814    return off;
    815 }
    816 
    817 /* Plain ("low level") read from a VFP Freg. */
    818 static IRExpr* llGetFReg ( UInt fregNo )
    819 {
    820    vassert(fregNo < 32);
    821    return IRExpr_Get( floatGuestRegOffset(fregNo), Ity_F32 );
    822 }
    823 
    824 /* Architected read from a VFP Freg. */
    825 static IRExpr* getFReg ( UInt fregNo ) {
    826    return llGetFReg( fregNo );
    827 }
    828 
    829 /* Plain ("low level") write to a VFP Freg. */
    830 static void llPutFReg ( UInt fregNo, IRExpr* e )
    831 {
    832    vassert(fregNo < 32);
    833    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32);
    834    stmt( IRStmt_Put(floatGuestRegOffset(fregNo), e) );
    835 }
    836 
    837 /* Architected write to a VFP Freg.  Handles conditional writes to the
    838    register: if guardT == IRTemp_INVALID then the write is
    839    unconditional. */
    840 static void putFReg ( UInt    fregNo,
    841                       IRExpr* e,
    842                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    843 {
    844    /* So, generate either an unconditional or a conditional write to
    845       the reg. */
    846    if (guardT == IRTemp_INVALID) {
    847       /* unconditional write */
    848       llPutFReg( fregNo, e );
    849    } else {
    850       llPutFReg( fregNo,
    851                  IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
    852                                llGetFReg(fregNo),
    853                                e ));
    854    }
    855 }
    856 
    857 
    858 /* ---------------- Misc registers ---------------- */
    859 
    860 static void putMiscReg32 ( UInt    gsoffset,
    861                            IRExpr* e, /* :: Ity_I32 */
    862                            IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
    863 {
    864    switch (gsoffset) {
    865       case OFFB_FPSCR:   break;
    866       case OFFB_QFLAG32: break;
    867       case OFFB_GEFLAG0: break;
    868       case OFFB_GEFLAG1: break;
    869       case OFFB_GEFLAG2: break;
    870       case OFFB_GEFLAG3: break;
    871       default: vassert(0); /* awaiting more cases */
    872    }
    873    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
    874 
    875    if (guardT == IRTemp_INVALID) {
    876       /* unconditional write */
    877       stmt(IRStmt_Put(gsoffset, e));
    878    } else {
    879       stmt(IRStmt_Put(
    880          gsoffset,
    881          IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
    882                        IRExpr_Get(gsoffset, Ity_I32),
    883                        e
    884          )
    885       ));
    886    }
    887 }
    888 
    889 static IRTemp get_ITSTATE ( void )
    890 {
    891    ASSERT_IS_THUMB;
    892    IRTemp t = newTemp(Ity_I32);
    893    assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
    894    return t;
    895 }
    896 
    897 static void put_ITSTATE ( IRTemp t )
    898 {
    899    ASSERT_IS_THUMB;
    900    stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
    901 }
    902 
    903 static IRTemp get_QFLAG32 ( void )
    904 {
    905    IRTemp t = newTemp(Ity_I32);
    906    assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
    907    return t;
    908 }
    909 
    910 static void put_QFLAG32 ( IRTemp t, IRTemp condT )
    911 {
    912    putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
    913 }
    914 
    915 /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
    916    Status Register) to indicate that overflow or saturation occurred.
    917    Nb: t must be zero to denote no saturation, and any nonzero
    918    value to indicate saturation. */
    919 static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
    920 {
    921    IRTemp old = get_QFLAG32();
    922    IRTemp nyu = newTemp(Ity_I32);
    923    assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
    924    put_QFLAG32(nyu, condT);
    925 }
    926 
    927 /* Generate code to set APSR.GE[flagNo]. Each fn call sets 1 bit.
    928    flagNo: which flag bit to set [3...0]
    929    lowbits_to_ignore:  0 = look at all 32 bits
    930                        8 = look at top 24 bits only
    931                       16 = look at top 16 bits only
    932                       31 = look at the top bit only
    933    e: input value to be evaluated.
    934    The new value is taken from 'e' with the lowest 'lowbits_to_ignore'
    935    masked out.  If the resulting value is zero then the GE flag is
    936    set to 0; any other value sets the flag to 1. */
    937 static void put_GEFLAG32 ( Int flagNo,            /* 0, 1, 2 or 3 */
    938                            Int lowbits_to_ignore, /* 0, 8, 16 or 31   */
    939                            IRExpr* e,             /* Ity_I32 */
    940                            IRTemp condT )
    941 {
    942    vassert( flagNo >= 0 && flagNo <= 3 );
    943    vassert( lowbits_to_ignore == 0  ||
    944             lowbits_to_ignore == 8  ||
    945             lowbits_to_ignore == 16 ||
    946             lowbits_to_ignore == 31 );
    947    IRTemp masked = newTemp(Ity_I32);
    948    assign(masked, binop(Iop_Shr32, e, mkU8(lowbits_to_ignore)));
    949 
    950    switch (flagNo) {
    951       case 0: putMiscReg32(OFFB_GEFLAG0, mkexpr(masked), condT); break;
    952       case 1: putMiscReg32(OFFB_GEFLAG1, mkexpr(masked), condT); break;
    953       case 2: putMiscReg32(OFFB_GEFLAG2, mkexpr(masked), condT); break;
    954       case 3: putMiscReg32(OFFB_GEFLAG3, mkexpr(masked), condT); break;
    955       default: vassert(0);
    956    }
    957 }
    958 
    959 /* Return the (32-bit, zero-or-nonzero representation scheme) of
    960    the specified GE flag. */
    961 static IRExpr* get_GEFLAG32( Int flagNo /* 0, 1, 2, 3 */ )
    962 {
    963    switch (flagNo) {
    964       case 0: return IRExpr_Get( OFFB_GEFLAG0, Ity_I32 );
    965       case 1: return IRExpr_Get( OFFB_GEFLAG1, Ity_I32 );
    966       case 2: return IRExpr_Get( OFFB_GEFLAG2, Ity_I32 );
    967       case 3: return IRExpr_Get( OFFB_GEFLAG3, Ity_I32 );
    968       default: vassert(0);
    969    }
    970 }
    971 
    972 /* Set all 4 GE flags from the given 32-bit value as follows: GE 3 and
    973    2 are set from bit 31 of the value, and GE 1 and 0 are set from bit
    974    15 of the value.  All other bits are ignored. */
    975 static void set_GE_32_10_from_bits_31_15 ( IRTemp t32, IRTemp condT )
    976 {
    977    IRTemp ge10 = newTemp(Ity_I32);
    978    IRTemp ge32 = newTemp(Ity_I32);
    979    assign(ge10, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
    980    assign(ge32, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
    981    put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
    982    put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
    983    put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
    984    put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
    985 }
    986 
    987 
    988 /* Set all 4 GE flags from the given 32-bit value as follows: GE 3
    989    from bit 31, GE 2 from bit 23, GE 1 from bit 15, and GE0 from
    990    bit 7.  All other bits are ignored. */
    991 static void set_GE_3_2_1_0_from_bits_31_23_15_7 ( IRTemp t32, IRTemp condT )
    992 {
    993    IRTemp ge0 = newTemp(Ity_I32);
    994    IRTemp ge1 = newTemp(Ity_I32);
    995    IRTemp ge2 = newTemp(Ity_I32);
    996    IRTemp ge3 = newTemp(Ity_I32);
    997    assign(ge0, binop(Iop_And32, mkexpr(t32), mkU32(0x00000080)));
    998    assign(ge1, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
    999    assign(ge2, binop(Iop_And32, mkexpr(t32), mkU32(0x00800000)));
   1000    assign(ge3, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
   1001    put_GEFLAG32( 0, 0, mkexpr(ge0), condT );
   1002    put_GEFLAG32( 1, 0, mkexpr(ge1), condT );
   1003    put_GEFLAG32( 2, 0, mkexpr(ge2), condT );
   1004    put_GEFLAG32( 3, 0, mkexpr(ge3), condT );
   1005 }
   1006 
   1007 
   1008 /* ---------------- FPSCR stuff ---------------- */
   1009 
   1010 /* Generate IR to get hold of the rounding mode bits in FPSCR, and
   1011    convert them to IR format.  Bind the final result to the
   1012    returned temp. */
   1013 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
   1014 {
   1015    /* The ARMvfp encoding for rounding mode bits is:
   1016          00  to nearest
   1017          01  to +infinity
   1018          10  to -infinity
   1019          11  to zero
   1020       We need to convert that to the IR encoding:
   1021          00  to nearest (the default)
   1022          10  to +infinity
   1023          01  to -infinity
   1024          11  to zero
   1025       Which can be done by swapping bits 0 and 1.
   1026       The rmode bits are at 23:22 in FPSCR.
   1027    */
   1028    IRTemp armEncd = newTemp(Ity_I32);
   1029    IRTemp swapped = newTemp(Ity_I32);
   1030    /* Fish FPSCR[23:22] out, and slide to bottom.  Doesn't matter that
   1031       we don't zero out bits 24 and above, since the assignment to
   1032       'swapped' will mask them out anyway. */
   1033    assign(armEncd,
   1034           binop(Iop_Shr32, IRExpr_Get(OFFB_FPSCR, Ity_I32), mkU8(22)));
   1035    /* Now swap them. */
   1036    assign(swapped,
   1037           binop(Iop_Or32,
   1038                 binop(Iop_And32,
   1039                       binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
   1040                       mkU32(2)),
   1041                 binop(Iop_And32,
   1042                       binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
   1043                       mkU32(1))
   1044          ));
   1045    return swapped;
   1046 }
   1047 
   1048 
   1049 /*------------------------------------------------------------*/
   1050 /*--- Helpers for flag handling and conditional insns      ---*/
   1051 /*------------------------------------------------------------*/
   1052 
   1053 static HChar* name_ARMCondcode ( ARMCondcode cond )
   1054 {
   1055    switch (cond) {
   1056       case ARMCondEQ:  return "{eq}";
   1057       case ARMCondNE:  return "{ne}";
   1058       case ARMCondHS:  return "{hs}";  // or 'cs'
   1059       case ARMCondLO:  return "{lo}";  // or 'cc'
   1060       case ARMCondMI:  return "{mi}";
   1061       case ARMCondPL:  return "{pl}";
   1062       case ARMCondVS:  return "{vs}";
   1063       case ARMCondVC:  return "{vc}";
   1064       case ARMCondHI:  return "{hi}";
   1065       case ARMCondLS:  return "{ls}";
   1066       case ARMCondGE:  return "{ge}";
   1067       case ARMCondLT:  return "{lt}";
   1068       case ARMCondGT:  return "{gt}";
   1069       case ARMCondLE:  return "{le}";
   1070       case ARMCondAL:  return ""; // {al}: is the default
   1071       case ARMCondNV:  return "{nv}";
   1072       default: vpanic("name_ARMCondcode");
   1073    }
   1074 }
   1075 /* and a handy shorthand for it */
   1076 static HChar* nCC ( ARMCondcode cond ) {
   1077    return name_ARMCondcode(cond);
   1078 }
   1079 
   1080 
   1081 /* Build IR to calculate some particular condition from stored
   1082    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
   1083    Ity_I32, suitable for narrowing.  Although the return type is
   1084    Ity_I32, the returned value is either 0 or 1.  'cond' must be
   1085    :: Ity_I32 and must denote the condition to compute in
   1086    bits 7:4, and be zero everywhere else.
   1087 */
   1088 static IRExpr* mk_armg_calculate_condition_dyn ( IRExpr* cond )
   1089 {
   1090    vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I32);
   1091    /* And 'cond' had better produce a value in which only bits 7:4
   1092       bits are nonzero.  However, obviously we can't assert for
   1093       that. */
   1094 
   1095    /* So what we're constructing for the first argument is
   1096       "(cond << 4) | stored-operation-operation".  However,
   1097       as per comments above, must be supplied pre-shifted to this
   1098       function.
   1099 
   1100       This pairing scheme requires that the ARM_CC_OP_ values all fit
   1101       in 4 bits.  Hence we are passing a (COND, OP) pair in the lowest
   1102       8 bits of the first argument. */
   1103    IRExpr** args
   1104       = mkIRExprVec_4(
   1105            binop(Iop_Or32, IRExpr_Get(OFFB_CC_OP, Ity_I32), cond),
   1106            IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1107            IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1108            IRExpr_Get(OFFB_CC_NDEP, Ity_I32)
   1109         );
   1110    IRExpr* call
   1111       = mkIRExprCCall(
   1112            Ity_I32,
   1113            0/*regparm*/,
   1114            "armg_calculate_condition", &armg_calculate_condition,
   1115            args
   1116         );
   1117 
   1118    /* Exclude the requested condition, OP and NDEP from definedness
   1119       checking.  We're only interested in DEP1 and DEP2. */
   1120    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1121    return call;
   1122 }
   1123 
   1124 
   1125 /* Build IR to calculate some particular condition from stored
   1126    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
   1127    Ity_I32, suitable for narrowing.  Although the return type is
   1128    Ity_I32, the returned value is either 0 or 1.
   1129 */
   1130 static IRExpr* mk_armg_calculate_condition ( ARMCondcode cond )
   1131 {
   1132   /* First arg is "(cond << 4) | condition".  This requires that the
   1133      ARM_CC_OP_ values all fit in 4 bits.  Hence we are passing a
   1134      (COND, OP) pair in the lowest 8 bits of the first argument. */
   1135    vassert(cond >= 0 && cond <= 15);
   1136    return mk_armg_calculate_condition_dyn( mkU32(cond << 4) );
   1137 }
   1138 
   1139 
   1140 /* Build IR to calculate just the carry flag from stored
   1141    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
   1142    Ity_I32. */
   1143 static IRExpr* mk_armg_calculate_flag_c ( void )
   1144 {
   1145    IRExpr** args
   1146       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
   1147                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1148                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1149                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
   1150    IRExpr* call
   1151       = mkIRExprCCall(
   1152            Ity_I32,
   1153            0/*regparm*/,
   1154            "armg_calculate_flag_c", &armg_calculate_flag_c,
   1155            args
   1156         );
   1157    /* Exclude OP and NDEP from definedness checking.  We're only
   1158       interested in DEP1 and DEP2. */
   1159    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1160    return call;
   1161 }
   1162 
   1163 
   1164 /* Build IR to calculate just the overflow flag from stored
   1165    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
   1166    Ity_I32. */
   1167 static IRExpr* mk_armg_calculate_flag_v ( void )
   1168 {
   1169    IRExpr** args
   1170       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
   1171                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1172                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1173                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
   1174    IRExpr* call
   1175       = mkIRExprCCall(
   1176            Ity_I32,
   1177            0/*regparm*/,
   1178            "armg_calculate_flag_v", &armg_calculate_flag_v,
   1179            args
   1180         );
   1181    /* Exclude OP and NDEP from definedness checking.  We're only
   1182       interested in DEP1 and DEP2. */
   1183    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1184    return call;
   1185 }
   1186 
   1187 
   1188 /* Build IR to calculate N Z C V in bits 31:28 of the
   1189    returned word. */
   1190 static IRExpr* mk_armg_calculate_flags_nzcv ( void )
   1191 {
   1192    IRExpr** args
   1193       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
   1194                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1195                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1196                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
   1197    IRExpr* call
   1198       = mkIRExprCCall(
   1199            Ity_I32,
   1200            0/*regparm*/,
   1201            "armg_calculate_flags_nzcv", &armg_calculate_flags_nzcv,
   1202            args
   1203         );
   1204    /* Exclude OP and NDEP from definedness checking.  We're only
   1205       interested in DEP1 and DEP2. */
   1206    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1207    return call;
   1208 }
   1209 
   1210 static IRExpr* mk_armg_calculate_flag_qc ( IRExpr* resL, IRExpr* resR, Bool Q )
   1211 {
   1212    IRExpr** args1;
   1213    IRExpr** args2;
   1214    IRExpr *call1, *call2, *res;
   1215 
   1216    if (Q) {
   1217       args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(0)),
   1218                               binop(Iop_GetElem32x4, resL, mkU8(1)),
   1219                               binop(Iop_GetElem32x4, resR, mkU8(0)),
   1220                               binop(Iop_GetElem32x4, resR, mkU8(1)) );
   1221       args2 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(2)),
   1222                               binop(Iop_GetElem32x4, resL, mkU8(3)),
   1223                               binop(Iop_GetElem32x4, resR, mkU8(2)),
   1224                               binop(Iop_GetElem32x4, resR, mkU8(3)) );
   1225    } else {
   1226       args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x2, resL, mkU8(0)),
   1227                               binop(Iop_GetElem32x2, resL, mkU8(1)),
   1228                               binop(Iop_GetElem32x2, resR, mkU8(0)),
   1229                               binop(Iop_GetElem32x2, resR, mkU8(1)) );
   1230    }
   1231 
   1232 #if 1
   1233    call1 = mkIRExprCCall(
   1234              Ity_I32,
   1235              0/*regparm*/,
   1236              "armg_calculate_flag_qc", &armg_calculate_flag_qc,
   1237              args1
   1238           );
   1239    if (Q) {
   1240       call2 = mkIRExprCCall(
   1241                 Ity_I32,
   1242                 0/*regparm*/,
   1243                 "armg_calculate_flag_qc", &armg_calculate_flag_qc,
   1244                 args2
   1245              );
   1246    }
   1247    if (Q) {
   1248       res = binop(Iop_Or32, call1, call2);
   1249    } else {
   1250       res = call1;
   1251    }
   1252 #else
   1253    if (Q) {
   1254       res = unop(Iop_1Uto32,
   1255                  binop(Iop_CmpNE32,
   1256                        binop(Iop_Or32,
   1257                              binop(Iop_Or32,
   1258                                    binop(Iop_Xor32,
   1259                                          args1[0],
   1260                                          args1[2]),
   1261                                    binop(Iop_Xor32,
   1262                                          args1[1],
   1263                                          args1[3])),
   1264                              binop(Iop_Or32,
   1265                                    binop(Iop_Xor32,
   1266                                          args2[0],
   1267                                          args2[2]),
   1268                                    binop(Iop_Xor32,
   1269                                          args2[1],
   1270                                          args2[3]))),
   1271                        mkU32(0)));
   1272    } else {
   1273       res = unop(Iop_1Uto32,
   1274                  binop(Iop_CmpNE32,
   1275                        binop(Iop_Or32,
   1276                              binop(Iop_Xor32,
   1277                                    args1[0],
   1278                                    args1[2]),
   1279                              binop(Iop_Xor32,
   1280                                    args1[1],
   1281                                    args1[3])),
   1282                        mkU32(0)));
   1283    }
   1284 #endif
   1285    return res;
   1286 }
   1287 
   1288 // FIXME: this is named wrongly .. looks like a sticky set of
   1289 // QC, not a write to it.
   1290 static void setFlag_QC ( IRExpr* resL, IRExpr* resR, Bool Q,
   1291                          IRTemp condT )
   1292 {
   1293    putMiscReg32 (OFFB_FPSCR,
   1294                  binop(Iop_Or32,
   1295                        IRExpr_Get(OFFB_FPSCR, Ity_I32),
   1296                        binop(Iop_Shl32,
   1297                              mk_armg_calculate_flag_qc(resL, resR, Q),
   1298                              mkU8(27))),
   1299                  condT);
   1300 }
   1301 
   1302 /* Build IR to conditionally set the flags thunk.  As with putIReg, if
   1303    guard is IRTemp_INVALID then it's unconditional, else it holds a
   1304    condition :: Ity_I32. */
   1305 static
   1306 void setFlags_D1_D2_ND ( UInt cc_op, IRTemp t_dep1,
   1307                          IRTemp t_dep2, IRTemp t_ndep,
   1308                          IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   1309 {
   1310    IRTemp c8;
   1311    vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I32));
   1312    vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I32));
   1313    vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I32));
   1314    vassert(cc_op >= ARMG_CC_OP_COPY && cc_op < ARMG_CC_OP_NUMBER);
   1315    if (guardT == IRTemp_INVALID) {
   1316       /* unconditional */
   1317       stmt( IRStmt_Put( OFFB_CC_OP,   mkU32(cc_op) ));
   1318       stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
   1319       stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
   1320       stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
   1321    } else {
   1322       /* conditional */
   1323       c8 = newTemp(Ity_I8);
   1324       assign( c8, unop(Iop_32to8, mkexpr(guardT)) );
   1325       stmt( IRStmt_Put(
   1326                OFFB_CC_OP,
   1327                IRExpr_Mux0X( mkexpr(c8),
   1328                              IRExpr_Get(OFFB_CC_OP, Ity_I32),
   1329                              mkU32(cc_op) )));
   1330       stmt( IRStmt_Put(
   1331                OFFB_CC_DEP1,
   1332                IRExpr_Mux0X( mkexpr(c8),
   1333                              IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1334                              mkexpr(t_dep1) )));
   1335       stmt( IRStmt_Put(
   1336                OFFB_CC_DEP2,
   1337                IRExpr_Mux0X( mkexpr(c8),
   1338                              IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1339                              mkexpr(t_dep2) )));
   1340       stmt( IRStmt_Put(
   1341                OFFB_CC_NDEP,
   1342                IRExpr_Mux0X( mkexpr(c8),
   1343                              IRExpr_Get(OFFB_CC_NDEP, Ity_I32),
   1344                              mkexpr(t_ndep) )));
   1345    }
   1346 }
   1347 
   1348 
   1349 /* Minor variant of the above that sets NDEP to zero (if it
   1350    sets it at all) */
   1351 static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
   1352                              IRTemp t_dep2,
   1353                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   1354 {
   1355    IRTemp z32 = newTemp(Ity_I32);
   1356    assign( z32, mkU32(0) );
   1357    setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
   1358 }
   1359 
   1360 
   1361 /* Minor variant of the above that sets DEP2 to zero (if it
   1362    sets it at all) */
   1363 static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
   1364                              IRTemp t_ndep,
   1365                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   1366 {
   1367    IRTemp z32 = newTemp(Ity_I32);
   1368    assign( z32, mkU32(0) );
   1369    setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
   1370 }
   1371 
   1372 
   1373 /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
   1374    sets them at all) */
   1375 static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
   1376                           IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   1377 {
   1378    IRTemp z32 = newTemp(Ity_I32);
   1379    assign( z32, mkU32(0) );
   1380    setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
   1381 }
   1382 
   1383 
   1384 /* ARM only */
   1385 /* Generate a side-exit to the next instruction, if the given guard
   1386    expression :: Ity_I32 is 0 (note!  the side exit is taken if the
   1387    condition is false!)  This is used to skip over conditional
   1388    instructions which we can't generate straight-line code for, either
   1389    because they are too complex or (more likely) they potentially
   1390    generate exceptions.
   1391 */
   1392 static void mk_skip_over_A32_if_cond_is_false (
   1393                IRTemp guardT /* :: Ity_I32, 0 or 1 */
   1394             )
   1395 {
   1396    ASSERT_IS_ARM;
   1397    vassert(guardT != IRTemp_INVALID);
   1398    vassert(0 == (guest_R15_curr_instr_notENC & 3));
   1399    stmt( IRStmt_Exit(
   1400             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
   1401             Ijk_Boring,
   1402             IRConst_U32(toUInt(guest_R15_curr_instr_notENC + 4))
   1403        ));
   1404 }
   1405 
   1406 /* Thumb16 only */
   1407 /* ditto, but jump over a 16-bit thumb insn */
   1408 static void mk_skip_over_T16_if_cond_is_false (
   1409                IRTemp guardT /* :: Ity_I32, 0 or 1 */
   1410             )
   1411 {
   1412    ASSERT_IS_THUMB;
   1413    vassert(guardT != IRTemp_INVALID);
   1414    vassert(0 == (guest_R15_curr_instr_notENC & 1));
   1415    stmt( IRStmt_Exit(
   1416             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
   1417             Ijk_Boring,
   1418             IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 2) | 1))
   1419        ));
   1420 }
   1421 
   1422 
   1423 /* Thumb32 only */
   1424 /* ditto, but jump over a 32-bit thumb insn */
   1425 static void mk_skip_over_T32_if_cond_is_false (
   1426                IRTemp guardT /* :: Ity_I32, 0 or 1 */
   1427             )
   1428 {
   1429    ASSERT_IS_THUMB;
   1430    vassert(guardT != IRTemp_INVALID);
   1431    vassert(0 == (guest_R15_curr_instr_notENC & 1));
   1432    stmt( IRStmt_Exit(
   1433             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
   1434             Ijk_Boring,
   1435             IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 4) | 1))
   1436        ));
   1437 }
   1438 
   1439 
   1440 /* Thumb16 and Thumb32 only
   1441    Generate a SIGILL followed by a restart of the current instruction
   1442    if the given temp is nonzero. */
   1443 static void gen_SIGILL_T_if_nonzero ( IRTemp t /* :: Ity_I32 */ )
   1444 {
   1445    ASSERT_IS_THUMB;
   1446    vassert(t != IRTemp_INVALID);
   1447    vassert(0 == (guest_R15_curr_instr_notENC & 1));
   1448    stmt(
   1449       IRStmt_Exit(
   1450          binop(Iop_CmpNE32, mkexpr(t), mkU32(0)),
   1451          Ijk_NoDecode,
   1452          IRConst_U32(toUInt(guest_R15_curr_instr_notENC | 1))
   1453       )
   1454    );
   1455 }
   1456 
   1457 
   1458 /* Inspect the old_itstate, and generate a SIGILL if it indicates that
   1459    we are currently in an IT block and are not the last in the block.
   1460    This also rolls back guest_ITSTATE to its old value before the exit
   1461    and restores it to its new value afterwards.  This is so that if
   1462    the exit is taken, we have an up to date version of ITSTATE
   1463    available.  Without doing that, we have no hope of making precise
   1464    exceptions work. */
   1465 static void gen_SIGILL_T_if_in_but_NLI_ITBlock (
   1466                IRTemp old_itstate /* :: Ity_I32 */,
   1467                IRTemp new_itstate /* :: Ity_I32 */
   1468             )
   1469 {
   1470    ASSERT_IS_THUMB;
   1471    put_ITSTATE(old_itstate); // backout
   1472    IRTemp guards_for_next3 = newTemp(Ity_I32);
   1473    assign(guards_for_next3,
   1474           binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
   1475    gen_SIGILL_T_if_nonzero(guards_for_next3);
   1476    put_ITSTATE(new_itstate); //restore
   1477 }
   1478 
   1479 
   1480 /* Simpler version of the above, which generates a SIGILL if
   1481    we're anywhere within an IT block. */
   1482 static void gen_SIGILL_T_if_in_ITBlock (
   1483                IRTemp old_itstate /* :: Ity_I32 */,
   1484                IRTemp new_itstate /* :: Ity_I32 */
   1485             )
   1486 {
   1487    put_ITSTATE(old_itstate); // backout
   1488    gen_SIGILL_T_if_nonzero(old_itstate);
   1489    put_ITSTATE(new_itstate); //restore
   1490 }
   1491 
   1492 
   1493 /* Generate an APSR value, from the NZCV thunk, and
   1494    from QFLAG32 and GEFLAG0 .. GEFLAG3. */
   1495 static IRTemp synthesise_APSR ( void )
   1496 {
   1497    IRTemp res1 = newTemp(Ity_I32);
   1498    // Get NZCV
   1499    assign( res1, mk_armg_calculate_flags_nzcv() );
   1500    // OR in the Q value
   1501    IRTemp res2 = newTemp(Ity_I32);
   1502    assign(
   1503       res2,
   1504       binop(Iop_Or32,
   1505             mkexpr(res1),
   1506             binop(Iop_Shl32,
   1507                   unop(Iop_1Uto32,
   1508                        binop(Iop_CmpNE32,
   1509                              mkexpr(get_QFLAG32()),
   1510                              mkU32(0))),
   1511                   mkU8(ARMG_CC_SHIFT_Q)))
   1512    );
   1513    // OR in GE0 .. GE3
   1514    IRExpr* ge0
   1515       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(0), mkU32(0)));
   1516    IRExpr* ge1
   1517       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(1), mkU32(0)));
   1518    IRExpr* ge2
   1519       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(2), mkU32(0)));
   1520    IRExpr* ge3
   1521       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(3), mkU32(0)));
   1522    IRTemp res3 = newTemp(Ity_I32);
   1523    assign(res3,
   1524           binop(Iop_Or32,
   1525                 mkexpr(res2),
   1526                 binop(Iop_Or32,
   1527                       binop(Iop_Or32,
   1528                             binop(Iop_Shl32, ge0, mkU8(16)),
   1529                             binop(Iop_Shl32, ge1, mkU8(17))),
   1530                       binop(Iop_Or32,
   1531                             binop(Iop_Shl32, ge2, mkU8(18)),
   1532                             binop(Iop_Shl32, ge3, mkU8(19))) )));
   1533    return res3;
   1534 }
   1535 
   1536 
   1537 /* and the inverse transformation: given an APSR value,
   1538    set the NZCV thunk, the Q flag, and the GE flags. */
   1539 static void desynthesise_APSR ( Bool write_nzcvq, Bool write_ge,
   1540                                 IRTemp apsrT, IRTemp condT )
   1541 {
   1542    vassert(write_nzcvq || write_ge);
   1543    if (write_nzcvq) {
   1544       // Do NZCV
   1545       IRTemp immT = newTemp(Ity_I32);
   1546       assign(immT, binop(Iop_And32, mkexpr(apsrT), mkU32(0xF0000000)) );
   1547       setFlags_D1(ARMG_CC_OP_COPY, immT, condT);
   1548       // Do Q
   1549       IRTemp qnewT = newTemp(Ity_I32);
   1550       assign(qnewT, binop(Iop_And32, mkexpr(apsrT), mkU32(ARMG_CC_MASK_Q)));
   1551       put_QFLAG32(qnewT, condT);
   1552    }
   1553    if (write_ge) {
   1554       // Do GE3..0
   1555       put_GEFLAG32(0, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<16)),
   1556                    condT);
   1557       put_GEFLAG32(1, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<17)),
   1558                    condT);
   1559       put_GEFLAG32(2, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<18)),
   1560                    condT);
   1561       put_GEFLAG32(3, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<19)),
   1562                    condT);
   1563    }
   1564 }
   1565 
   1566 
   1567 /*------------------------------------------------------------*/
   1568 /*--- Helpers for saturation                               ---*/
   1569 /*------------------------------------------------------------*/
   1570 
   1571 /* FIXME: absolutely the only diff. between (a) armUnsignedSatQ and
   1572    (b) armSignedSatQ is that in (a) the floor is set to 0, whereas in
   1573    (b) the floor is computed from the value of imm5.  these two fnsn
   1574    should be commoned up. */
   1575 
   1576 /* UnsignedSatQ(): 'clamp' each value so it lies between 0 <= x <= (2^N)-1
   1577    Optionally return flag resQ saying whether saturation occurred.
   1578    See definition in manual, section A2.2.1, page 41
   1579    (bits(N), boolean) UnsignedSatQ( integer i, integer N )
   1580    {
   1581      if ( i > (2^N)-1 ) { result = (2^N)-1; saturated = TRUE; }
   1582      elsif ( i < 0 )    { result = 0; saturated = TRUE; }
   1583      else               { result = i; saturated = FALSE; }
   1584      return ( result<N-1:0>, saturated );
   1585    }
   1586 */
   1587 static void armUnsignedSatQ( IRTemp* res,  /* OUT - Ity_I32 */
   1588                              IRTemp* resQ, /* OUT - Ity_I32  */
   1589                              IRTemp regT,  /* value to clamp - Ity_I32 */
   1590                              UInt imm5 )   /* saturation ceiling */
   1591 {
   1592    UInt ceil  = (1 << imm5) - 1;    // (2^imm5)-1
   1593    UInt floor = 0;
   1594 
   1595    IRTemp node0 = newTemp(Ity_I32);
   1596    IRTemp node1 = newTemp(Ity_I32);
   1597    IRTemp node2 = newTemp(Ity_I1);
   1598    IRTemp node3 = newTemp(Ity_I32);
   1599    IRTemp node4 = newTemp(Ity_I32);
   1600    IRTemp node5 = newTemp(Ity_I1);
   1601    IRTemp node6 = newTemp(Ity_I32);
   1602 
   1603    assign( node0, mkexpr(regT) );
   1604    assign( node1, mkU32(ceil) );
   1605    assign( node2, binop( Iop_CmpLT32S, mkexpr(node1), mkexpr(node0) ) );
   1606    assign( node3, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node2)),
   1607                                 mkexpr(node0),
   1608                                 mkexpr(node1) ) );
   1609    assign( node4, mkU32(floor) );
   1610    assign( node5, binop( Iop_CmpLT32S, mkexpr(node3), mkexpr(node4) ) );
   1611    assign( node6, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node5)),
   1612                                 mkexpr(node3),
   1613                                 mkexpr(node4) ) );
   1614    assign( *res, mkexpr(node6) );
   1615 
   1616    /* if saturation occurred, then resQ is set to some nonzero value
   1617       if sat did not occur, resQ is guaranteed to be zero. */
   1618    if (resQ) {
   1619       assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
   1620    }
   1621 }
   1622 
   1623 
   1624 /* SignedSatQ(): 'clamp' each value so it lies between  -2^N <= x <= (2^N) - 1
   1625    Optionally return flag resQ saying whether saturation occurred.
   1626    - see definition in manual, section A2.2.1, page 41
   1627    (bits(N), boolean ) SignedSatQ( integer i, integer N )
   1628    {
   1629      if ( i > 2^(N-1) - 1 )    { result = 2^(N-1) - 1; saturated = TRUE; }
   1630      elsif ( i < -(2^(N-1)) )  { result = -(2^(N-1));  saturated = FALSE; }
   1631      else                      { result = i;           saturated = FALSE; }
   1632      return ( result[N-1:0], saturated );
   1633    }
   1634 */
   1635 static void armSignedSatQ( IRTemp regT,    /* value to clamp - Ity_I32 */
   1636                            UInt imm5,      /* saturation ceiling */
   1637                            IRTemp* res,    /* OUT - Ity_I32 */
   1638                            IRTemp* resQ )  /* OUT - Ity_I32  */
   1639 {
   1640    Int ceil  =  (1 << (imm5-1)) - 1;  //  (2^(imm5-1))-1
   1641    Int floor = -(1 << (imm5-1));      // -(2^(imm5-1))
   1642 
   1643    IRTemp node0 = newTemp(Ity_I32);
   1644    IRTemp node1 = newTemp(Ity_I32);
   1645    IRTemp node2 = newTemp(Ity_I1);
   1646    IRTemp node3 = newTemp(Ity_I32);
   1647    IRTemp node4 = newTemp(Ity_I32);
   1648    IRTemp node5 = newTemp(Ity_I1);
   1649    IRTemp node6 = newTemp(Ity_I32);
   1650 
   1651    assign( node0, mkexpr(regT) );
   1652    assign( node1, mkU32(ceil) );
   1653    assign( node2, binop( Iop_CmpLT32S, mkexpr(node1), mkexpr(node0) ) );
   1654    assign( node3, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node2)),
   1655                                 mkexpr(node0),  mkexpr(node1) ) );
   1656    assign( node4, mkU32(floor) );
   1657    assign( node5, binop( Iop_CmpLT32S, mkexpr(node3), mkexpr(node4) ) );
   1658    assign( node6, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node5)),
   1659                                 mkexpr(node3),  mkexpr(node4) ) );
   1660    assign( *res, mkexpr(node6) );
   1661 
   1662    /* if saturation occurred, then resQ is set to some nonzero value
   1663       if sat did not occur, resQ is guaranteed to be zero. */
   1664    if (resQ) {
   1665      assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
   1666    }
   1667 }
   1668 
   1669 
   1670 /* Compute a value 0 :: I32 or 1 :: I32, indicating whether signed
   1671    overflow occurred for 32-bit addition.  Needs both args and the
   1672    result.  HD p27. */
   1673 static
   1674 IRExpr* signed_overflow_after_Add32 ( IRExpr* resE,
   1675                                       IRTemp argL, IRTemp argR )
   1676 {
   1677    IRTemp res = newTemp(Ity_I32);
   1678    assign(res, resE);
   1679    return
   1680       binop( Iop_Shr32,
   1681              binop( Iop_And32,
   1682                     binop( Iop_Xor32, mkexpr(res), mkexpr(argL) ),
   1683                     binop( Iop_Xor32, mkexpr(res), mkexpr(argR) )),
   1684              mkU8(31) );
   1685 }
   1686 
   1687 
   1688 /*------------------------------------------------------------*/
   1689 /*--- Larger helpers                                       ---*/
   1690 /*------------------------------------------------------------*/
   1691 
   1692 /* Compute both the result and new C flag value for a LSL by an imm5
   1693    or by a register operand.  May generate reads of the old C value
   1694    (hence only safe to use before any writes to guest state happen).
   1695    Are factored out so can be used by both ARM and Thumb.
   1696 
   1697    Note that in compute_result_and_C_after_{LSL,LSR,ASR}_by{imm5,reg},
   1698    "res" (the result)  is a.k.a. "shop", shifter operand
   1699    "newC" (the new C)  is a.k.a. "shco", shifter carry out
   1700 
   1701    The calling convention for res and newC is a bit funny.  They could
   1702    be passed by value, but instead are passed by ref.
   1703 */
   1704 
   1705 static void compute_result_and_C_after_LSL_by_imm5 (
   1706                /*OUT*/HChar* buf,
   1707                IRTemp* res,
   1708                IRTemp* newC,
   1709                IRTemp rMt, UInt shift_amt, /* operands */
   1710                UInt rM      /* only for debug printing */
   1711             )
   1712 {
   1713    if (shift_amt == 0) {
   1714       if (newC) {
   1715          assign( *newC, mk_armg_calculate_flag_c() );
   1716       }
   1717       assign( *res, mkexpr(rMt) );
   1718       DIS(buf, "r%u", rM);
   1719    } else {
   1720       vassert(shift_amt >= 1 && shift_amt <= 31);
   1721       if (newC) {
   1722          assign( *newC,
   1723                  binop(Iop_And32,
   1724                        binop(Iop_Shr32, mkexpr(rMt),
   1725                                         mkU8(32 - shift_amt)),
   1726                        mkU32(1)));
   1727       }
   1728       assign( *res,
   1729               binop(Iop_Shl32, mkexpr(rMt), mkU8(shift_amt)) );
   1730       DIS(buf, "r%u, LSL #%u", rM, shift_amt);
   1731    }
   1732 }
   1733 
   1734 
   1735 static void compute_result_and_C_after_LSL_by_reg (
   1736                /*OUT*/HChar* buf,
   1737                IRTemp* res,
   1738                IRTemp* newC,
   1739                IRTemp rMt, IRTemp rSt,  /* operands */
   1740                UInt rM,    UInt rS      /* only for debug printing */
   1741             )
   1742 {
   1743    // shift left in range 0 .. 255
   1744    // amt  = rS & 255
   1745    // res  = amt < 32 ?  Rm << amt  : 0
   1746    // newC = amt == 0     ? oldC  :
   1747    //        amt in 1..32 ?  Rm[32-amt]  : 0
   1748    IRTemp amtT = newTemp(Ity_I32);
   1749    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   1750    if (newC) {
   1751       /* mux0X(amt == 0,
   1752                mux0X(amt < 32,
   1753                      0,
   1754                      Rm[(32-amt) & 31])
   1755                oldC)
   1756       */
   1757       /* About the best you can do is pray that iropt is able
   1758          to nuke most or all of the following junk. */
   1759       IRTemp oldC = newTemp(Ity_I32);
   1760       assign(oldC, mk_armg_calculate_flag_c() );
   1761       assign(
   1762          *newC,
   1763          IRExpr_Mux0X(
   1764             unop(Iop_1Uto8,
   1765                  binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0))),
   1766             IRExpr_Mux0X(
   1767                unop(Iop_1Uto8,
   1768                     binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32))),
   1769                mkU32(0),
   1770                binop(Iop_Shr32,
   1771                      mkexpr(rMt),
   1772                      unop(Iop_32to8,
   1773                           binop(Iop_And32,
   1774                                 binop(Iop_Sub32,
   1775                                       mkU32(32),
   1776                                       mkexpr(amtT)),
   1777                                 mkU32(31)
   1778                           )
   1779                      )
   1780                )
   1781             ),
   1782             mkexpr(oldC)
   1783          )
   1784       );
   1785    }
   1786    // (Rm << (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
   1787    // Lhs of the & limits the shift to 31 bits, so as to
   1788    // give known IR semantics.  Rhs of the & is all 1s for
   1789    // Rs <= 31 and all 0s for Rs >= 32.
   1790    assign(
   1791       *res,
   1792       binop(
   1793          Iop_And32,
   1794          binop(Iop_Shl32,
   1795                mkexpr(rMt),
   1796                unop(Iop_32to8,
   1797                     binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
   1798          binop(Iop_Sar32,
   1799                binop(Iop_Sub32,
   1800                      mkexpr(amtT),
   1801                      mkU32(32)),
   1802                mkU8(31))));
   1803     DIS(buf, "r%u, LSL r%u", rM, rS);
   1804 }
   1805 
   1806 
   1807 static void compute_result_and_C_after_LSR_by_imm5 (
   1808                /*OUT*/HChar* buf,
   1809                IRTemp* res,
   1810                IRTemp* newC,
   1811                IRTemp rMt, UInt shift_amt, /* operands */
   1812                UInt rM      /* only for debug printing */
   1813             )
   1814 {
   1815    if (shift_amt == 0) {
   1816       // conceptually a 32-bit shift, however:
   1817       // res  = 0
   1818       // newC = Rm[31]
   1819       if (newC) {
   1820          assign( *newC,
   1821                  binop(Iop_And32,
   1822                        binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
   1823                        mkU32(1)));
   1824       }
   1825       assign( *res, mkU32(0) );
   1826       DIS(buf, "r%u, LSR #0(a.k.a. 32)", rM);
   1827    } else {
   1828       // shift in range 1..31
   1829       // res  = Rm >>u shift_amt
   1830       // newC = Rm[shift_amt - 1]
   1831       vassert(shift_amt >= 1 && shift_amt <= 31);
   1832       if (newC) {
   1833          assign( *newC,
   1834                  binop(Iop_And32,
   1835                        binop(Iop_Shr32, mkexpr(rMt),
   1836                                         mkU8(shift_amt - 1)),
   1837                        mkU32(1)));
   1838       }
   1839       assign( *res,
   1840               binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)) );
   1841       DIS(buf, "r%u, LSR #%u", rM, shift_amt);
   1842    }
   1843 }
   1844 
   1845 
   1846 static void compute_result_and_C_after_LSR_by_reg (
   1847                /*OUT*/HChar* buf,
   1848                IRTemp* res,
   1849                IRTemp* newC,
   1850                IRTemp rMt, IRTemp rSt,  /* operands */
   1851                UInt rM,    UInt rS      /* only for debug printing */
   1852             )
   1853 {
   1854    // shift right in range 0 .. 255
   1855    // amt = rS & 255
   1856    // res  = amt < 32 ?  Rm >>u amt  : 0
   1857    // newC = amt == 0     ? oldC  :
   1858    //        amt in 1..32 ?  Rm[amt-1]  : 0
   1859    IRTemp amtT = newTemp(Ity_I32);
   1860    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   1861    if (newC) {
   1862       /* mux0X(amt == 0,
   1863                mux0X(amt < 32,
   1864                      0,
   1865                      Rm[(amt-1) & 31])
   1866                oldC)
   1867       */
   1868       IRTemp oldC = newTemp(Ity_I32);
   1869       assign(oldC, mk_armg_calculate_flag_c() );
   1870       assign(
   1871          *newC,
   1872          IRExpr_Mux0X(
   1873             unop(Iop_1Uto8,
   1874                  binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0))),
   1875             IRExpr_Mux0X(
   1876                unop(Iop_1Uto8,
   1877                     binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32))),
   1878                mkU32(0),
   1879                binop(Iop_Shr32,
   1880                      mkexpr(rMt),
   1881                      unop(Iop_32to8,
   1882                           binop(Iop_And32,
   1883                                 binop(Iop_Sub32,
   1884                                       mkexpr(amtT),
   1885                                       mkU32(1)),
   1886                                 mkU32(31)
   1887                           )
   1888                      )
   1889                )
   1890             ),
   1891             mkexpr(oldC)
   1892          )
   1893       );
   1894    }
   1895    // (Rm >>u (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
   1896    // Lhs of the & limits the shift to 31 bits, so as to
   1897    // give known IR semantics.  Rhs of the & is all 1s for
   1898    // Rs <= 31 and all 0s for Rs >= 32.
   1899    assign(
   1900       *res,
   1901       binop(
   1902          Iop_And32,
   1903          binop(Iop_Shr32,
   1904                mkexpr(rMt),
   1905                unop(Iop_32to8,
   1906                     binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
   1907          binop(Iop_Sar32,
   1908                binop(Iop_Sub32,
   1909                      mkexpr(amtT),
   1910                      mkU32(32)),
   1911                mkU8(31))));
   1912     DIS(buf, "r%u, LSR r%u", rM, rS);
   1913 }
   1914 
   1915 
   1916 static void compute_result_and_C_after_ASR_by_imm5 (
   1917                /*OUT*/HChar* buf,
   1918                IRTemp* res,
   1919                IRTemp* newC,
   1920                IRTemp rMt, UInt shift_amt, /* operands */
   1921                UInt rM      /* only for debug printing */
   1922             )
   1923 {
   1924    if (shift_amt == 0) {
   1925       // conceptually a 32-bit shift, however:
   1926       // res  = Rm >>s 31
   1927       // newC = Rm[31]
   1928       if (newC) {
   1929          assign( *newC,
   1930                  binop(Iop_And32,
   1931                        binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
   1932                        mkU32(1)));
   1933       }
   1934       assign( *res, binop(Iop_Sar32, mkexpr(rMt), mkU8(31)) );
   1935       DIS(buf, "r%u, ASR #0(a.k.a. 32)", rM);
   1936    } else {
   1937       // shift in range 1..31
   1938       // res = Rm >>s shift_amt
   1939       // newC = Rm[shift_amt - 1]
   1940       vassert(shift_amt >= 1 && shift_amt <= 31);
   1941       if (newC) {
   1942          assign( *newC,
   1943                  binop(Iop_And32,
   1944                        binop(Iop_Shr32, mkexpr(rMt),
   1945                                         mkU8(shift_amt - 1)),
   1946                        mkU32(1)));
   1947       }
   1948       assign( *res,
   1949               binop(Iop_Sar32, mkexpr(rMt), mkU8(shift_amt)) );
   1950       DIS(buf, "r%u, ASR #%u", rM, shift_amt);
   1951    }
   1952 }
   1953 
   1954 
   1955 static void compute_result_and_C_after_ASR_by_reg (
   1956                /*OUT*/HChar* buf,
   1957                IRTemp* res,
   1958                IRTemp* newC,
   1959                IRTemp rMt, IRTemp rSt,  /* operands */
   1960                UInt rM,    UInt rS      /* only for debug printing */
   1961             )
   1962 {
   1963    // arithmetic shift right in range 0 .. 255
   1964    // amt = rS & 255
   1965    // res  = amt < 32 ?  Rm >>s amt  : Rm >>s 31
   1966    // newC = amt == 0     ? oldC  :
   1967    //        amt in 1..32 ?  Rm[amt-1]  : Rm[31]
   1968    IRTemp amtT = newTemp(Ity_I32);
   1969    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   1970    if (newC) {
   1971       /* mux0X(amt == 0,
   1972                mux0X(amt < 32,
   1973                      Rm[31],
   1974                      Rm[(amt-1) & 31])
   1975                oldC)
   1976       */
   1977       IRTemp oldC = newTemp(Ity_I32);
   1978       assign(oldC, mk_armg_calculate_flag_c() );
   1979       assign(
   1980          *newC,
   1981          IRExpr_Mux0X(
   1982             unop(Iop_1Uto8,
   1983                  binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0))),
   1984             IRExpr_Mux0X(
   1985                unop(Iop_1Uto8,
   1986                     binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32))),
   1987                binop(Iop_Shr32,
   1988                      mkexpr(rMt),
   1989                      mkU8(31)
   1990                ),
   1991                binop(Iop_Shr32,
   1992                      mkexpr(rMt),
   1993                      unop(Iop_32to8,
   1994                           binop(Iop_And32,
   1995                                 binop(Iop_Sub32,
   1996                                       mkexpr(amtT),
   1997                                       mkU32(1)),
   1998                                 mkU32(31)
   1999                           )
   2000                      )
   2001                )
   2002             ),
   2003             mkexpr(oldC)
   2004          )
   2005       );
   2006    }
   2007    // (Rm >>s (amt <u 32 ? amt : 31))
   2008    assign(
   2009       *res,
   2010       binop(
   2011          Iop_Sar32,
   2012          mkexpr(rMt),
   2013          unop(
   2014             Iop_32to8,
   2015             IRExpr_Mux0X(
   2016                unop(
   2017                  Iop_1Uto8,
   2018                  binop(Iop_CmpLT32U, mkexpr(amtT), mkU32(32))),
   2019                mkU32(31),
   2020                mkexpr(amtT)))));
   2021     DIS(buf, "r%u, ASR r%u", rM, rS);
   2022 }
   2023 
   2024 
   2025 static void compute_result_and_C_after_ROR_by_reg (
   2026                /*OUT*/HChar* buf,
   2027                IRTemp* res,
   2028                IRTemp* newC,
   2029                IRTemp rMt, IRTemp rSt,  /* operands */
   2030                UInt rM,    UInt rS      /* only for debug printing */
   2031             )
   2032 {
   2033    // rotate right in range 0 .. 255
   2034    // amt = rS & 255
   2035    // shop =  Rm `ror` (amt & 31)
   2036    // shco =  amt == 0 ? oldC : Rm[(amt-1) & 31]
   2037    IRTemp amtT = newTemp(Ity_I32);
   2038    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   2039    IRTemp amt5T = newTemp(Ity_I32);
   2040    assign( amt5T, binop(Iop_And32, mkexpr(rSt), mkU32(31)) );
   2041    IRTemp oldC = newTemp(Ity_I32);
   2042    assign(oldC, mk_armg_calculate_flag_c() );
   2043    if (newC) {
   2044       assign(
   2045          *newC,
   2046          IRExpr_Mux0X(
   2047             unop(Iop_32to8, mkexpr(amtT)),
   2048             mkexpr(oldC),
   2049             binop(Iop_And32,
   2050                   binop(Iop_Shr32,
   2051                         mkexpr(rMt),
   2052                         unop(Iop_32to8,
   2053                              binop(Iop_And32,
   2054                                    binop(Iop_Sub32,
   2055                                          mkexpr(amtT),
   2056                                          mkU32(1)
   2057                                    ),
   2058                                    mkU32(31)
   2059                              )
   2060                         )
   2061                   ),
   2062                   mkU32(1)
   2063             )
   2064          )
   2065       );
   2066    }
   2067    assign(
   2068       *res,
   2069       IRExpr_Mux0X(
   2070          unop(Iop_32to8, mkexpr(amt5T)), mkexpr(rMt),
   2071          binop(Iop_Or32,
   2072                binop(Iop_Shr32,
   2073                      mkexpr(rMt),
   2074                      unop(Iop_32to8, mkexpr(amt5T))
   2075                ),
   2076                binop(Iop_Shl32,
   2077                      mkexpr(rMt),
   2078                      unop(Iop_32to8,
   2079                           binop(Iop_Sub32, mkU32(32), mkexpr(amt5T))
   2080                      )
   2081                )
   2082          )
   2083       )
   2084    );
   2085    DIS(buf, "r%u, ROR r#%u", rM, rS);
   2086 }
   2087 
   2088 
   2089 /* Generate an expression corresponding to the immediate-shift case of
   2090    a shifter operand.  This is used both for ARM and Thumb2.
   2091 
   2092    Bind it to a temporary, and return that via *res.  If newC is
   2093    non-NULL, also compute a value for the shifter's carry out (in the
   2094    LSB of a word), bind it to a temporary, and return that via *shco.
   2095 
   2096    Generates GETs from the guest state and is therefore not safe to
   2097    use once we start doing PUTs to it, for any given instruction.
   2098 
   2099    'how' is encoded thusly:
   2100       00b LSL,  01b LSR,  10b ASR,  11b ROR
   2101    Most but not all ARM and Thumb integer insns use this encoding.
   2102    Be careful to ensure the right value is passed here.
   2103 */
   2104 static void compute_result_and_C_after_shift_by_imm5 (
   2105                /*OUT*/HChar* buf,
   2106                /*OUT*/IRTemp* res,
   2107                /*OUT*/IRTemp* newC,
   2108                IRTemp  rMt,       /* reg to shift */
   2109                UInt    how,       /* what kind of shift */
   2110                UInt    shift_amt, /* shift amount (0..31) */
   2111                UInt    rM         /* only for debug printing */
   2112             )
   2113 {
   2114    vassert(shift_amt < 32);
   2115    vassert(how < 4);
   2116 
   2117    switch (how) {
   2118 
   2119       case 0:
   2120          compute_result_and_C_after_LSL_by_imm5(
   2121             buf, res, newC, rMt, shift_amt, rM
   2122          );
   2123          break;
   2124 
   2125       case 1:
   2126          compute_result_and_C_after_LSR_by_imm5(
   2127             buf, res, newC, rMt, shift_amt, rM
   2128          );
   2129          break;
   2130 
   2131       case 2:
   2132          compute_result_and_C_after_ASR_by_imm5(
   2133             buf, res, newC, rMt, shift_amt, rM
   2134          );
   2135          break;
   2136 
   2137       case 3:
   2138          if (shift_amt == 0) {
   2139             IRTemp oldcT = newTemp(Ity_I32);
   2140             // rotate right 1 bit through carry (?)
   2141             // RRX -- described at ARM ARM A5-17
   2142             // res  = (oldC << 31) | (Rm >>u 1)
   2143             // newC = Rm[0]
   2144             if (newC) {
   2145                assign( *newC,
   2146                        binop(Iop_And32, mkexpr(rMt), mkU32(1)));
   2147             }
   2148             assign( oldcT, mk_armg_calculate_flag_c() );
   2149             assign( *res,
   2150                     binop(Iop_Or32,
   2151                           binop(Iop_Shl32, mkexpr(oldcT), mkU8(31)),
   2152                           binop(Iop_Shr32, mkexpr(rMt), mkU8(1))) );
   2153             DIS(buf, "r%u, RRX", rM);
   2154          } else {
   2155             // rotate right in range 1..31
   2156             // res  = Rm `ror` shift_amt
   2157             // newC = Rm[shift_amt - 1]
   2158             vassert(shift_amt >= 1 && shift_amt <= 31);
   2159             if (newC) {
   2160                assign( *newC,
   2161                        binop(Iop_And32,
   2162                              binop(Iop_Shr32, mkexpr(rMt),
   2163                                               mkU8(shift_amt - 1)),
   2164                              mkU32(1)));
   2165             }
   2166             assign( *res,
   2167                     binop(Iop_Or32,
   2168                           binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)),
   2169                           binop(Iop_Shl32, mkexpr(rMt),
   2170                                            mkU8(32-shift_amt))));
   2171             DIS(buf, "r%u, ROR #%u", rM, shift_amt);
   2172          }
   2173          break;
   2174 
   2175       default:
   2176          /*NOTREACHED*/
   2177          vassert(0);
   2178    }
   2179 }
   2180 
   2181 
   2182 /* Generate an expression corresponding to the register-shift case of
   2183    a shifter operand.  This is used both for ARM and Thumb2.
   2184 
   2185    Bind it to a temporary, and return that via *res.  If newC is
   2186    non-NULL, also compute a value for the shifter's carry out (in the
   2187    LSB of a word), bind it to a temporary, and return that via *shco.
   2188 
   2189    Generates GETs from the guest state and is therefore not safe to
   2190    use once we start doing PUTs to it, for any given instruction.
   2191 
   2192    'how' is encoded thusly:
   2193       00b LSL,  01b LSR,  10b ASR,  11b ROR
   2194    Most but not all ARM and Thumb integer insns use this encoding.
   2195    Be careful to ensure the right value is passed here.
   2196 */
   2197 static void compute_result_and_C_after_shift_by_reg (
   2198                /*OUT*/HChar*  buf,
   2199                /*OUT*/IRTemp* res,
   2200                /*OUT*/IRTemp* newC,
   2201                IRTemp  rMt,       /* reg to shift */
   2202                UInt    how,       /* what kind of shift */
   2203                IRTemp  rSt,       /* shift amount */
   2204                UInt    rM,        /* only for debug printing */
   2205                UInt    rS         /* only for debug printing */
   2206             )
   2207 {
   2208    vassert(how < 4);
   2209    switch (how) {
   2210       case 0: { /* LSL */
   2211          compute_result_and_C_after_LSL_by_reg(
   2212             buf, res, newC, rMt, rSt, rM, rS
   2213          );
   2214          break;
   2215       }
   2216       case 1: { /* LSR */
   2217          compute_result_and_C_after_LSR_by_reg(
   2218             buf, res, newC, rMt, rSt, rM, rS
   2219          );
   2220          break;
   2221       }
   2222       case 2: { /* ASR */
   2223          compute_result_and_C_after_ASR_by_reg(
   2224             buf, res, newC, rMt, rSt, rM, rS
   2225          );
   2226          break;
   2227       }
   2228       case 3: { /* ROR */
   2229          compute_result_and_C_after_ROR_by_reg(
   2230              buf, res, newC, rMt, rSt, rM, rS
   2231          );
   2232          break;
   2233       }
   2234       default:
   2235          /*NOTREACHED*/
   2236          vassert(0);
   2237    }
   2238 }
   2239 
   2240 
   2241 /* Generate an expression corresponding to a shifter_operand, bind it
   2242    to a temporary, and return that via *shop.  If shco is non-NULL,
   2243    also compute a value for the shifter's carry out (in the LSB of a
   2244    word), bind it to a temporary, and return that via *shco.
   2245 
   2246    If for some reason we can't come up with a shifter operand (missing
   2247    case?  not really a shifter operand?) return False.
   2248 
   2249    Generates GETs from the guest state and is therefore not safe to
   2250    use once we start doing PUTs to it, for any given instruction.
   2251 
   2252    For ARM insns only; not for Thumb.
   2253 */
   2254 static Bool mk_shifter_operand ( UInt insn_25, UInt insn_11_0,
   2255                                  /*OUT*/IRTemp* shop,
   2256                                  /*OUT*/IRTemp* shco,
   2257                                  /*OUT*/HChar* buf )
   2258 {
   2259    UInt insn_4 = (insn_11_0 >> 4) & 1;
   2260    UInt insn_7 = (insn_11_0 >> 7) & 1;
   2261    vassert(insn_25 <= 0x1);
   2262    vassert(insn_11_0 <= 0xFFF);
   2263 
   2264    vassert(shop && *shop == IRTemp_INVALID);
   2265    *shop = newTemp(Ity_I32);
   2266 
   2267    if (shco) {
   2268       vassert(*shco == IRTemp_INVALID);
   2269       *shco = newTemp(Ity_I32);
   2270    }
   2271 
   2272    /* 32-bit immediate */
   2273 
   2274    if (insn_25 == 1) {
   2275       /* immediate: (7:0) rotated right by 2 * (11:8) */
   2276       UInt imm = (insn_11_0 >> 0) & 0xFF;
   2277       UInt rot = 2 * ((insn_11_0 >> 8) & 0xF);
   2278       vassert(rot <= 30);
   2279       imm = ROR32(imm, rot);
   2280       if (shco) {
   2281          if (rot == 0) {
   2282             assign( *shco, mk_armg_calculate_flag_c() );
   2283          } else {
   2284             assign( *shco, mkU32( (imm >> 31) & 1 ) );
   2285          }
   2286       }
   2287       DIS(buf, "#0x%x", imm);
   2288       assign( *shop, mkU32(imm) );
   2289       return True;
   2290    }
   2291 
   2292    /* Shift/rotate by immediate */
   2293 
   2294    if (insn_25 == 0 && insn_4 == 0) {
   2295       /* Rm (3:0) shifted (6:5) by immediate (11:7) */
   2296       UInt shift_amt = (insn_11_0 >> 7) & 0x1F;
   2297       UInt rM        = (insn_11_0 >> 0) & 0xF;
   2298       UInt how       = (insn_11_0 >> 5) & 3;
   2299       /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
   2300       IRTemp rMt = newTemp(Ity_I32);
   2301       assign(rMt, getIRegA(rM));
   2302 
   2303       vassert(shift_amt <= 31);
   2304 
   2305       compute_result_and_C_after_shift_by_imm5(
   2306          buf, shop, shco, rMt, how, shift_amt, rM
   2307       );
   2308       return True;
   2309    }
   2310 
   2311    /* Shift/rotate by register */
   2312    if (insn_25 == 0 && insn_4 == 1) {
   2313       /* Rm (3:0) shifted (6:5) by Rs (11:8) */
   2314       UInt rM  = (insn_11_0 >> 0) & 0xF;
   2315       UInt rS  = (insn_11_0 >> 8) & 0xF;
   2316       UInt how = (insn_11_0 >> 5) & 3;
   2317       /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
   2318       IRTemp rMt = newTemp(Ity_I32);
   2319       IRTemp rSt = newTemp(Ity_I32);
   2320 
   2321       if (insn_7 == 1)
   2322          return False; /* not really a shifter operand */
   2323 
   2324       assign(rMt, getIRegA(rM));
   2325       assign(rSt, getIRegA(rS));
   2326 
   2327       compute_result_and_C_after_shift_by_reg(
   2328          buf, shop, shco, rMt, how, rSt, rM, rS
   2329       );
   2330       return True;
   2331    }
   2332 
   2333    vex_printf("mk_shifter_operand(0x%x,0x%x)\n", insn_25, insn_11_0 );
   2334    return False;
   2335 }
   2336 
   2337 
   2338 /* ARM only */
   2339 static
   2340 IRExpr* mk_EA_reg_plusminus_imm12 ( UInt rN, UInt bU, UInt imm12,
   2341                                     /*OUT*/HChar* buf )
   2342 {
   2343    vassert(rN < 16);
   2344    vassert(bU < 2);
   2345    vassert(imm12 < 0x1000);
   2346    UChar opChar = bU == 1 ? '+' : '-';
   2347    DIS(buf, "[r%u, #%c%u]", rN, opChar, imm12);
   2348    return
   2349       binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
   2350              getIRegA(rN),
   2351              mkU32(imm12) );
   2352 }
   2353 
   2354 
   2355 /* ARM only.
   2356    NB: This is "DecodeImmShift" in newer versions of the the ARM ARM.
   2357 */
   2358 static
   2359 IRExpr* mk_EA_reg_plusminus_shifted_reg ( UInt rN, UInt bU, UInt rM,
   2360                                           UInt sh2, UInt imm5,
   2361                                           /*OUT*/HChar* buf )
   2362 {
   2363    vassert(rN < 16);
   2364    vassert(bU < 2);
   2365    vassert(rM < 16);
   2366    vassert(sh2 < 4);
   2367    vassert(imm5 < 32);
   2368    UChar   opChar = bU == 1 ? '+' : '-';
   2369    IRExpr* index  = NULL;
   2370    switch (sh2) {
   2371       case 0: /* LSL */
   2372          /* imm5 can be in the range 0 .. 31 inclusive. */
   2373          index = binop(Iop_Shl32, getIRegA(rM), mkU8(imm5));
   2374          DIS(buf, "[r%u, %c r%u LSL #%u]", rN, opChar, rM, imm5);
   2375          break;
   2376       case 1: /* LSR */
   2377          if (imm5 == 0) {
   2378             index = mkU32(0);
   2379             vassert(0); // ATC
   2380          } else {
   2381             index = binop(Iop_Shr32, getIRegA(rM), mkU8(imm5));
   2382          }
   2383          DIS(buf, "[r%u, %cr%u, LSR #%u]",
   2384                   rN, opChar, rM, imm5 == 0 ? 32 : imm5);
   2385          break;
   2386       case 2: /* ASR */
   2387          /* Doesn't this just mean that the behaviour with imm5 == 0
   2388             is the same as if it had been 31 ? */
   2389          if (imm5 == 0) {
   2390             index = binop(Iop_Sar32, getIRegA(rM), mkU8(31));
   2391             vassert(0); // ATC
   2392          } else {
   2393             index = binop(Iop_Sar32, getIRegA(rM), mkU8(imm5));
   2394          }
   2395          DIS(buf, "[r%u, %cr%u, ASR #%u]",
   2396                   rN, opChar, rM, imm5 == 0 ? 32 : imm5);
   2397          break;
   2398       case 3: /* ROR or RRX */
   2399          if (imm5 == 0) {
   2400             IRTemp rmT    = newTemp(Ity_I32);
   2401             IRTemp cflagT = newTemp(Ity_I32);
   2402             assign(rmT, getIRegA(rM));
   2403             assign(cflagT, mk_armg_calculate_flag_c());
   2404             index = binop(Iop_Or32,
   2405                           binop(Iop_Shl32, mkexpr(cflagT), mkU8(31)),
   2406                           binop(Iop_Shr32, mkexpr(rmT), mkU8(1)));
   2407             DIS(buf, "[r%u, %cr%u, RRX]", rN, opChar, rM);
   2408          } else {
   2409             IRTemp rmT = newTemp(Ity_I32);
   2410             assign(rmT, getIRegA(rM));
   2411             vassert(imm5 >= 1 && imm5 <= 31);
   2412             index = binop(Iop_Or32,
   2413                           binop(Iop_Shl32, mkexpr(rmT), mkU8(32-imm5)),
   2414                           binop(Iop_Shr32, mkexpr(rmT), mkU8(imm5)));
   2415             DIS(buf, "[r%u, %cr%u, ROR #%u]", rN, opChar, rM, imm5);
   2416          }
   2417          break;
   2418       default:
   2419          vassert(0);
   2420    }
   2421    vassert(index);
   2422    return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
   2423                 getIRegA(rN), index);
   2424 }
   2425 
   2426 
   2427 /* ARM only */
   2428 static
   2429 IRExpr* mk_EA_reg_plusminus_imm8 ( UInt rN, UInt bU, UInt imm8,
   2430                                    /*OUT*/HChar* buf )
   2431 {
   2432    vassert(rN < 16);
   2433    vassert(bU < 2);
   2434    vassert(imm8 < 0x100);
   2435    UChar opChar = bU == 1 ? '+' : '-';
   2436    DIS(buf, "[r%u, #%c%u]", rN, opChar, imm8);
   2437    return
   2438       binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
   2439              getIRegA(rN),
   2440              mkU32(imm8) );
   2441 }
   2442 
   2443 
   2444 /* ARM only */
   2445 static
   2446 IRExpr* mk_EA_reg_plusminus_reg ( UInt rN, UInt bU, UInt rM,
   2447                                   /*OUT*/HChar* buf )
   2448 {
   2449    vassert(rN < 16);
   2450    vassert(bU < 2);
   2451    vassert(rM < 16);
   2452    UChar   opChar = bU == 1 ? '+' : '-';
   2453    IRExpr* index  = getIRegA(rM);
   2454    DIS(buf, "[r%u, %c r%u]", rN, opChar, rM);
   2455    return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
   2456                 getIRegA(rN), index);
   2457 }
   2458 
   2459 
   2460 /* irRes :: Ity_I32 holds a floating point comparison result encoded
   2461    as an IRCmpF64Result.  Generate code to convert it to an
   2462    ARM-encoded (N,Z,C,V) group in the lowest 4 bits of an I32 value.
   2463    Assign a new temp to hold that value, and return the temp. */
   2464 static
   2465 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes )
   2466 {
   2467    IRTemp ix       = newTemp(Ity_I32);
   2468    IRTemp termL    = newTemp(Ity_I32);
   2469    IRTemp termR    = newTemp(Ity_I32);
   2470    IRTemp nzcv     = newTemp(Ity_I32);
   2471 
   2472    /* This is where the fun starts.  We have to convert 'irRes' from
   2473       an IR-convention return result (IRCmpF64Result) to an
   2474       ARM-encoded (N,Z,C,V) group.  The final result is in the bottom
   2475       4 bits of 'nzcv'. */
   2476    /* Map compare result from IR to ARM(nzcv) */
   2477    /*
   2478       FP cmp result | IR   | ARM(nzcv)
   2479       --------------------------------
   2480       UN              0x45   0011
   2481       LT              0x01   1000
   2482       GT              0x00   0010
   2483       EQ              0x40   0110
   2484    */
   2485    /* Now since you're probably wondering WTF ..
   2486 
   2487       ix fishes the useful bits out of the IR value, bits 6 and 0, and
   2488       places them side by side, giving a number which is 0, 1, 2 or 3.
   2489 
   2490       termL is a sequence cooked up by GNU superopt.  It converts ix
   2491          into an almost correct value NZCV value (incredibly), except
   2492          for the case of UN, where it produces 0100 instead of the
   2493          required 0011.
   2494 
   2495       termR is therefore a correction term, also computed from ix.  It
   2496          is 1 in the UN case and 0 for LT, GT and UN.  Hence, to get
   2497          the final correct value, we subtract termR from termL.
   2498 
   2499       Don't take my word for it.  There's a test program at the bottom
   2500       of this file, to try this out with.
   2501    */
   2502    assign(
   2503       ix,
   2504       binop(Iop_Or32,
   2505             binop(Iop_And32,
   2506                   binop(Iop_Shr32, mkexpr(irRes), mkU8(5)),
   2507                   mkU32(3)),
   2508             binop(Iop_And32, mkexpr(irRes), mkU32(1))));
   2509 
   2510    assign(
   2511       termL,
   2512       binop(Iop_Add32,
   2513             binop(Iop_Shr32,
   2514                   binop(Iop_Sub32,
   2515                         binop(Iop_Shl32,
   2516                               binop(Iop_Xor32, mkexpr(ix), mkU32(1)),
   2517                               mkU8(30)),
   2518                         mkU32(1)),
   2519                   mkU8(29)),
   2520             mkU32(1)));
   2521 
   2522    assign(
   2523       termR,
   2524       binop(Iop_And32,
   2525             binop(Iop_And32,
   2526                   mkexpr(ix),
   2527                   binop(Iop_Shr32, mkexpr(ix), mkU8(1))),
   2528             mkU32(1)));
   2529 
   2530    assign(nzcv, binop(Iop_Sub32, mkexpr(termL), mkexpr(termR)));
   2531    return nzcv;
   2532 }
   2533 
   2534 
   2535 /* Thumb32 only.  This is "ThumbExpandImm" in the ARM ARM.  If
   2536    updatesC is non-NULL, a boolean is written to it indicating whether
   2537    or not the C flag is updated, as per ARM ARM "ThumbExpandImm_C".
   2538 */
   2539 static UInt thumbExpandImm ( Bool* updatesC,
   2540                              UInt imm1, UInt imm3, UInt imm8 )
   2541 {
   2542    vassert(imm1 < (1<<1));
   2543    vassert(imm3 < (1<<3));
   2544    vassert(imm8 < (1<<8));
   2545    UInt i_imm3_a = (imm1 << 4) | (imm3 << 1) | ((imm8 >> 7) & 1);
   2546    UInt abcdefgh = imm8;
   2547    UInt lbcdefgh = imm8 | 0x80;
   2548    if (updatesC) {
   2549       *updatesC = i_imm3_a >= 8;
   2550    }
   2551    switch (i_imm3_a) {
   2552       case 0: case 1:
   2553          return abcdefgh;
   2554       case 2: case 3:
   2555          return (abcdefgh << 16) | abcdefgh;
   2556       case 4: case 5:
   2557          return (abcdefgh << 24) | (abcdefgh << 8);
   2558       case 6: case 7:
   2559          return (abcdefgh << 24) | (abcdefgh << 16)
   2560                 | (abcdefgh << 8) | abcdefgh;
   2561       case 8 ... 31:
   2562          return lbcdefgh << (32 - i_imm3_a);
   2563       default:
   2564          break;
   2565    }
   2566    /*NOTREACHED*/vassert(0);
   2567 }
   2568 
   2569 
   2570 /* Version of thumbExpandImm where we simply feed it the
   2571    instruction halfwords (the lowest addressed one is I0). */
   2572 static UInt thumbExpandImm_from_I0_I1 ( Bool* updatesC,
   2573                                         UShort i0s, UShort i1s )
   2574 {
   2575    UInt i0    = (UInt)i0s;
   2576    UInt i1    = (UInt)i1s;
   2577    UInt imm1  = SLICE_UInt(i0,10,10);
   2578    UInt imm3  = SLICE_UInt(i1,14,12);
   2579    UInt imm8  = SLICE_UInt(i1,7,0);
   2580    return thumbExpandImm(updatesC, imm1, imm3, imm8);
   2581 }
   2582 
   2583 
   2584 /* Thumb16 only.  Given the firstcond and mask fields from an IT
   2585    instruction, compute the 32-bit ITSTATE value implied, as described
   2586    in libvex_guest_arm.h.  This is not the ARM ARM representation.
   2587    Also produce the t/e chars for the 2nd, 3rd, 4th insns, for
   2588    disassembly printing.  Returns False if firstcond or mask
   2589    denote something invalid.
   2590 
   2591    The number and conditions for the instructions to be
   2592    conditionalised depend on firstcond and mask:
   2593 
   2594    mask      cond 1    cond 2      cond 3      cond 4
   2595 
   2596    1000      fc[3:0]
   2597    x100      fc[3:0]   fc[3:1]:x
   2598    xy10      fc[3:0]   fc[3:1]:x   fc[3:1]:y
   2599    xyz1      fc[3:0]   fc[3:1]:x   fc[3:1]:y   fc[3:1]:z
   2600 
   2601    The condition fields are assembled in *itstate backwards (cond 4 at
   2602    the top, cond 1 at the bottom).  Conditions are << 4'd and then
   2603    ^0xE'd, and those fields that correspond to instructions in the IT
   2604    block are tagged with a 1 bit.
   2605 */
   2606 static Bool compute_ITSTATE ( /*OUT*/UInt*  itstate,
   2607                               /*OUT*/UChar* ch1,
   2608                               /*OUT*/UChar* ch2,
   2609                               /*OUT*/UChar* ch3,
   2610                               UInt firstcond, UInt mask )
   2611 {
   2612    vassert(firstcond <= 0xF);
   2613    vassert(mask <= 0xF);
   2614    *itstate = 0;
   2615    *ch1 = *ch2 = *ch3 = '.';
   2616    if (mask == 0)
   2617       return False; /* the logic below actually ensures this anyway,
   2618                        but clearer to make it explicit. */
   2619    if (firstcond == 0xF)
   2620       return False; /* NV is not allowed */
   2621    if (firstcond == 0xE && popcount32(mask) != 1)
   2622       return False; /* if firstcond is AL then all the rest must be too */
   2623 
   2624    UInt m3 = (mask >> 3) & 1;
   2625    UInt m2 = (mask >> 2) & 1;
   2626    UInt m1 = (mask >> 1) & 1;
   2627    UInt m0 = (mask >> 0) & 1;
   2628 
   2629    UInt fc = (firstcond << 4) | 1/*in-IT-block*/;
   2630    UInt ni = (0xE/*AL*/ << 4) | 0/*not-in-IT-block*/;
   2631 
   2632    if (m3 == 1 && (m2|m1|m0) == 0) {
   2633       *itstate = (ni << 24) | (ni << 16) | (ni << 8) | fc;
   2634       *itstate ^= 0xE0E0E0E0;
   2635       return True;
   2636    }
   2637 
   2638    if (m2 == 1 && (m1|m0) == 0) {
   2639       *itstate = (ni << 24) | (ni << 16) | (setbit32(fc, 4, m3) << 8) | fc;
   2640       *itstate ^= 0xE0E0E0E0;
   2641       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
   2642       return True;
   2643    }
   2644 
   2645    if (m1 == 1 && m0 == 0) {
   2646       *itstate = (ni << 24)
   2647                  | (setbit32(fc, 4, m2) << 16)
   2648                  | (setbit32(fc, 4, m3) << 8) | fc;
   2649       *itstate ^= 0xE0E0E0E0;
   2650       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
   2651       *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
   2652       return True;
   2653    }
   2654 
   2655    if (m0 == 1) {
   2656       *itstate = (setbit32(fc, 4, m1) << 24)
   2657                  | (setbit32(fc, 4, m2) << 16)
   2658                  | (setbit32(fc, 4, m3) << 8) | fc;
   2659       *itstate ^= 0xE0E0E0E0;
   2660       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
   2661       *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
   2662       *ch3 = m1 == (firstcond & 1) ? 't' : 'e';
   2663       return True;
   2664    }
   2665 
   2666    return False;
   2667 }
   2668 
   2669 
   2670 /* Generate IR to do 32-bit bit reversal, a la Hacker's Delight
   2671    Chapter 7 Section 1. */
   2672 static IRTemp gen_BITREV ( IRTemp x0 )
   2673 {
   2674    IRTemp x1 = newTemp(Ity_I32);
   2675    IRTemp x2 = newTemp(Ity_I32);
   2676    IRTemp x3 = newTemp(Ity_I32);
   2677    IRTemp x4 = newTemp(Ity_I32);
   2678    IRTemp x5 = newTemp(Ity_I32);
   2679    UInt   c1 = 0x55555555;
   2680    UInt   c2 = 0x33333333;
   2681    UInt   c3 = 0x0F0F0F0F;
   2682    UInt   c4 = 0x00FF00FF;
   2683    UInt   c5 = 0x0000FFFF;
   2684    assign(x1,
   2685           binop(Iop_Or32,
   2686                 binop(Iop_Shl32,
   2687                       binop(Iop_And32, mkexpr(x0), mkU32(c1)),
   2688                       mkU8(1)),
   2689                 binop(Iop_Shr32,
   2690                       binop(Iop_And32, mkexpr(x0), mkU32(~c1)),
   2691                       mkU8(1))
   2692    ));
   2693    assign(x2,
   2694           binop(Iop_Or32,
   2695                 binop(Iop_Shl32,
   2696                       binop(Iop_And32, mkexpr(x1), mkU32(c2)),
   2697                       mkU8(2)),
   2698                 binop(Iop_Shr32,
   2699                       binop(Iop_And32, mkexpr(x1), mkU32(~c2)),
   2700                       mkU8(2))
   2701    ));
   2702    assign(x3,
   2703           binop(Iop_Or32,
   2704                 binop(Iop_Shl32,
   2705                       binop(Iop_And32, mkexpr(x2), mkU32(c3)),
   2706                       mkU8(4)),
   2707                 binop(Iop_Shr32,
   2708                       binop(Iop_And32, mkexpr(x2), mkU32(~c3)),
   2709                       mkU8(4))
   2710    ));
   2711    assign(x4,
   2712           binop(Iop_Or32,
   2713                 binop(Iop_Shl32,
   2714                       binop(Iop_And32, mkexpr(x3), mkU32(c4)),
   2715                       mkU8(8)),
   2716                 binop(Iop_Shr32,
   2717                       binop(Iop_And32, mkexpr(x3), mkU32(~c4)),
   2718                       mkU8(8))
   2719    ));
   2720    assign(x5,
   2721           binop(Iop_Or32,
   2722                 binop(Iop_Shl32,
   2723                       binop(Iop_And32, mkexpr(x4), mkU32(c5)),
   2724                       mkU8(16)),
   2725                 binop(Iop_Shr32,
   2726                       binop(Iop_And32, mkexpr(x4), mkU32(~c5)),
   2727                       mkU8(16))
   2728    ));
   2729    return x5;
   2730 }
   2731 
   2732 
   2733 /* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
   2734    0:1:2:3 (aka byte-swap). */
   2735 static IRTemp gen_REV ( IRTemp arg )
   2736 {
   2737    IRTemp res = newTemp(Ity_I32);
   2738    assign(res,
   2739           binop(Iop_Or32,
   2740                 binop(Iop_Shl32, mkexpr(arg), mkU8(24)),
   2741           binop(Iop_Or32,
   2742                 binop(Iop_And32, binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
   2743                                  mkU32(0x00FF0000)),
   2744           binop(Iop_Or32,
   2745                 binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
   2746                                        mkU32(0x0000FF00)),
   2747                 binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(24)),
   2748                                        mkU32(0x000000FF) )
   2749    ))));
   2750    return res;
   2751 }
   2752 
   2753 
   2754 /* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
   2755    2:3:0:1 (swap within lo and hi halves). */
   2756 static IRTemp gen_REV16 ( IRTemp arg )
   2757 {
   2758    IRTemp res = newTemp(Ity_I32);
   2759    assign(res,
   2760           binop(Iop_Or32,
   2761                 binop(Iop_And32,
   2762                       binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
   2763                       mkU32(0xFF00FF00)),
   2764                 binop(Iop_And32,
   2765                       binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
   2766                       mkU32(0x00FF00FF))));
   2767    return res;
   2768 }
   2769 
   2770 
   2771 /*------------------------------------------------------------*/
   2772 /*--- Advanced SIMD (NEON) instructions                    ---*/
   2773 /*------------------------------------------------------------*/
   2774 
   2775 /*------------------------------------------------------------*/
   2776 /*--- NEON data processing                                 ---*/
   2777 /*------------------------------------------------------------*/
   2778 
   2779 /* For all NEON DP ops, we use the normal scheme to handle conditional
   2780    writes to registers -- pass in condT and hand that on to the
   2781    put*Reg functions.  In ARM mode condT is always IRTemp_INVALID
   2782    since NEON is unconditional for ARM.  In Thumb mode condT is
   2783    derived from the ITSTATE shift register in the normal way. */
   2784 
   2785 static
   2786 UInt get_neon_d_regno(UInt theInstr)
   2787 {
   2788    UInt x = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
   2789    if (theInstr & 0x40) {
   2790       if (x & 1) {
   2791          x = x + 0x100;
   2792       } else {
   2793          x = x >> 1;
   2794       }
   2795    }
   2796    return x;
   2797 }
   2798 
   2799 static
   2800 UInt get_neon_n_regno(UInt theInstr)
   2801 {
   2802    UInt x = ((theInstr >> 3) & 0x10) | ((theInstr >> 16) & 0xF);
   2803    if (theInstr & 0x40) {
   2804       if (x & 1) {
   2805          x = x + 0x100;
   2806       } else {
   2807          x = x >> 1;
   2808       }
   2809    }
   2810    return x;
   2811 }
   2812 
   2813 static
   2814 UInt get_neon_m_regno(UInt theInstr)
   2815 {
   2816    UInt x = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
   2817    if (theInstr & 0x40) {
   2818       if (x & 1) {
   2819          x = x + 0x100;
   2820       } else {
   2821          x = x >> 1;
   2822       }
   2823    }
   2824    return x;
   2825 }
   2826 
   2827 static
   2828 Bool dis_neon_vext ( UInt theInstr, IRTemp condT )
   2829 {
   2830    UInt dreg = get_neon_d_regno(theInstr);
   2831    UInt mreg = get_neon_m_regno(theInstr);
   2832    UInt nreg = get_neon_n_regno(theInstr);
   2833    UInt imm4 = (theInstr >> 8) & 0xf;
   2834    UInt Q = (theInstr >> 6) & 1;
   2835    HChar reg_t = Q ? 'q' : 'd';
   2836 
   2837    if (Q) {
   2838       putQReg(dreg, triop(Iop_ExtractV128, getQReg(nreg),
   2839                getQReg(mreg), mkU8(imm4)), condT);
   2840    } else {
   2841       putDRegI64(dreg, triop(Iop_Extract64, getDRegI64(nreg),
   2842                  getDRegI64(mreg), mkU8(imm4)), condT);
   2843    }
   2844    DIP("vext.8 %c%d, %c%d, %c%d, #%d\n", reg_t, dreg, reg_t, nreg,
   2845                                          reg_t, mreg, imm4);
   2846    return True;
   2847 }
   2848 
   2849 /* VTBL, VTBX */
   2850 static
   2851 Bool dis_neon_vtb ( UInt theInstr, IRTemp condT )
   2852 {
   2853    UInt op = (theInstr >> 6) & 1;
   2854    UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
   2855    UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
   2856    UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
   2857    UInt len = (theInstr >> 8) & 3;
   2858    Int i;
   2859    IROp cmp;
   2860    ULong imm;
   2861    IRTemp arg_l;
   2862    IRTemp old_mask, new_mask, cur_mask;
   2863    IRTemp old_res, new_res;
   2864    IRTemp old_arg, new_arg;
   2865 
   2866    if (dreg >= 0x100 || mreg >= 0x100 || nreg >= 0x100)
   2867       return False;
   2868    if (nreg + len > 31)
   2869       return False;
   2870 
   2871    cmp = Iop_CmpGT8Ux8;
   2872 
   2873    old_mask = newTemp(Ity_I64);
   2874    old_res = newTemp(Ity_I64);
   2875    old_arg = newTemp(Ity_I64);
   2876    assign(old_mask, mkU64(0));
   2877    assign(old_res, mkU64(0));
   2878    assign(old_arg, getDRegI64(mreg));
   2879    imm = 8;
   2880    imm = (imm <<  8) | imm;
   2881    imm = (imm << 16) | imm;
   2882    imm = (imm << 32) | imm;
   2883 
   2884    for (i = 0; i <= len; i++) {
   2885       arg_l = newTemp(Ity_I64);
   2886       new_mask = newTemp(Ity_I64);
   2887       cur_mask = newTemp(Ity_I64);
   2888       new_res = newTemp(Ity_I64);
   2889       new_arg = newTemp(Ity_I64);
   2890       assign(arg_l, getDRegI64(nreg+i));
   2891       assign(new_arg, binop(Iop_Sub8x8, mkexpr(old_arg), mkU64(imm)));
   2892       assign(cur_mask, binop(cmp, mkU64(imm), mkexpr(old_arg)));
   2893       assign(new_mask, binop(Iop_Or64, mkexpr(old_mask), mkexpr(cur_mask)));
   2894       assign(new_res, binop(Iop_Or64,
   2895                             mkexpr(old_res),
   2896                             binop(Iop_And64,
   2897                                   binop(Iop_Perm8x8,
   2898                                         mkexpr(arg_l),
   2899                                         binop(Iop_And64,
   2900                                               mkexpr(old_arg),
   2901                                               mkexpr(cur_mask))),
   2902                                   mkexpr(cur_mask))));
   2903 
   2904       old_arg = new_arg;
   2905       old_mask = new_mask;
   2906       old_res = new_res;
   2907    }
   2908    if (op) {
   2909       new_res = newTemp(Ity_I64);
   2910       assign(new_res, binop(Iop_Or64,
   2911                             binop(Iop_And64,
   2912                                   getDRegI64(dreg),
   2913                                   unop(Iop_Not64, mkexpr(old_mask))),
   2914                             mkexpr(old_res)));
   2915       old_res = new_res;
   2916    }
   2917 
   2918    putDRegI64(dreg, mkexpr(old_res), condT);
   2919    DIP("vtb%c.8 d%u, {", op ? 'x' : 'l', dreg);
   2920    if (len > 0) {
   2921       DIP("d%u-d%u", nreg, nreg + len);
   2922    } else {
   2923       DIP("d%u", nreg);
   2924    }
   2925    DIP("}, d%u\n", mreg);
   2926    return True;
   2927 }
   2928 
   2929 /* VDUP (scalar)  */
   2930 static
   2931 Bool dis_neon_vdup ( UInt theInstr, IRTemp condT )
   2932 {
   2933    UInt Q = (theInstr >> 6) & 1;
   2934    UInt dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
   2935    UInt mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
   2936    UInt imm4 = (theInstr >> 16) & 0xF;
   2937    UInt index;
   2938    UInt size;
   2939    IRTemp arg_m;
   2940    IRTemp res;
   2941    IROp op, op2;
   2942 
   2943    if ((imm4 == 0) || (imm4 == 8))
   2944       return False;
   2945    if ((Q == 1) && ((dreg & 1) == 1))
   2946       return False;
   2947    if (Q)
   2948       dreg >>= 1;
   2949    arg_m = newTemp(Ity_I64);
   2950    assign(arg_m, getDRegI64(mreg));
   2951    if (Q)
   2952       res = newTemp(Ity_V128);
   2953    else
   2954       res = newTemp(Ity_I64);
   2955    if ((imm4 & 1) == 1) {
   2956       op = Q ? Iop_Dup8x16 : Iop_Dup8x8;
   2957       op2 = Iop_GetElem8x8;
   2958       index = imm4 >> 1;
   2959       size = 8;
   2960    } else if ((imm4 & 3) == 2) {
   2961       op = Q ? Iop_Dup16x8 : Iop_Dup16x4;
   2962       op2 = Iop_GetElem16x4;
   2963       index = imm4 >> 2;
   2964       size = 16;
   2965    } else if ((imm4 & 7) == 4) {
   2966       op = Q ? Iop_Dup32x4 : Iop_Dup32x2;
   2967       op2 = Iop_GetElem32x2;
   2968       index = imm4 >> 3;
   2969       size = 32;
   2970    } else {
   2971       return False; // can this ever happen?
   2972    }
   2973    assign(res, unop(op, binop(op2, mkexpr(arg_m), mkU8(index))));
   2974    if (Q) {
   2975       putQReg(dreg, mkexpr(res), condT);
   2976    } else {
   2977       putDRegI64(dreg, mkexpr(res), condT);
   2978    }
   2979    DIP("vdup.%d %c%d, d%d[%d]\n", size, Q ? 'q' : 'd', dreg, mreg, index);
   2980    return True;
   2981 }
   2982 
   2983 /* A7.4.1 Three registers of the same length */
   2984 static
   2985 Bool dis_neon_data_3same ( UInt theInstr, IRTemp condT )
   2986 {
   2987    UInt Q = (theInstr >> 6) & 1;
   2988    UInt dreg = get_neon_d_regno(theInstr);
   2989    UInt nreg = get_neon_n_regno(theInstr);
   2990    UInt mreg = get_neon_m_regno(theInstr);
   2991    UInt A = (theInstr >> 8) & 0xF;
   2992    UInt B = (theInstr >> 4) & 1;
   2993    UInt C = (theInstr >> 20) & 0x3;
   2994    UInt U = (theInstr >> 24) & 1;
   2995    UInt size = C;
   2996 
   2997    IRTemp arg_n;
   2998    IRTemp arg_m;
   2999    IRTemp res;
   3000 
   3001    if (Q) {
   3002       arg_n = newTemp(Ity_V128);
   3003       arg_m = newTemp(Ity_V128);
   3004       res = newTemp(Ity_V128);
   3005       assign(arg_n, getQReg(nreg));
   3006       assign(arg_m, getQReg(mreg));
   3007    } else {
   3008       arg_n = newTemp(Ity_I64);
   3009       arg_m = newTemp(Ity_I64);
   3010       res = newTemp(Ity_I64);
   3011       assign(arg_n, getDRegI64(nreg));
   3012       assign(arg_m, getDRegI64(mreg));
   3013    }
   3014 
   3015    switch(A) {
   3016       case 0:
   3017          if (B == 0) {
   3018             /* VHADD */
   3019             ULong imm = 0;
   3020             IRExpr *imm_val;
   3021             IROp addOp;
   3022             IROp andOp;
   3023             IROp shOp;
   3024             char regType = Q ? 'q' : 'd';
   3025 
   3026             if (size == 3)
   3027                return False;
   3028             switch(size) {
   3029                case 0: imm = 0x101010101010101LL; break;
   3030                case 1: imm = 0x1000100010001LL; break;
   3031                case 2: imm = 0x100000001LL; break;
   3032                default: vassert(0);
   3033             }
   3034             if (Q) {
   3035                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
   3036                andOp = Iop_AndV128;
   3037             } else {
   3038                imm_val = mkU64(imm);
   3039                andOp = Iop_And64;
   3040             }
   3041             if (U) {
   3042                switch(size) {
   3043                   case 0:
   3044                      addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
   3045                      shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3046                      break;
   3047                   case 1:
   3048                      addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
   3049                      shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3050                      break;
   3051                   case 2:
   3052                      addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
   3053                      shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3054                      break;
   3055                   default:
   3056                      vassert(0);
   3057                }
   3058             } else {
   3059                switch(size) {
   3060                   case 0:
   3061                      addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
   3062                      shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
   3063                      break;
   3064                   case 1:
   3065                      addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
   3066                      shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
   3067                      break;
   3068                   case 2:
   3069                      addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
   3070                      shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
   3071                      break;
   3072                   default:
   3073                      vassert(0);
   3074                }
   3075             }
   3076             assign(res,
   3077                    binop(addOp,
   3078                          binop(addOp,
   3079                                binop(shOp, mkexpr(arg_m), mkU8(1)),
   3080                                binop(shOp, mkexpr(arg_n), mkU8(1))),
   3081                          binop(shOp,
   3082                                binop(addOp,
   3083                                      binop(andOp, mkexpr(arg_m), imm_val),
   3084                                      binop(andOp, mkexpr(arg_n), imm_val)),
   3085                                mkU8(1))));
   3086             DIP("vhadd.%c%d %c%d, %c%d, %c%d\n",
   3087                 U ? 'u' : 's', 8 << size, regType,
   3088                 dreg, regType, nreg, regType, mreg);
   3089          } else {
   3090             /* VQADD */
   3091             IROp op, op2;
   3092             IRTemp tmp;
   3093             char reg_t = Q ? 'q' : 'd';
   3094             if (Q) {
   3095                switch (size) {
   3096                   case 0:
   3097                      op = U ? Iop_QAdd8Ux16 : Iop_QAdd8Sx16;
   3098                      op2 = Iop_Add8x16;
   3099                      break;
   3100                   case 1:
   3101                      op = U ? Iop_QAdd16Ux8 : Iop_QAdd16Sx8;
   3102                      op2 = Iop_Add16x8;
   3103                      break;
   3104                   case 2:
   3105                      op = U ? Iop_QAdd32Ux4 : Iop_QAdd32Sx4;
   3106                      op2 = Iop_Add32x4;
   3107                      break;
   3108                   case 3:
   3109                      op = U ? Iop_QAdd64Ux2 : Iop_QAdd64Sx2;
   3110                      op2 = Iop_Add64x2;
   3111                      break;
   3112                   default:
   3113                      vassert(0);
   3114                }
   3115             } else {
   3116                switch (size) {
   3117                   case 0:
   3118                      op = U ? Iop_QAdd8Ux8 : Iop_QAdd8Sx8;
   3119                      op2 = Iop_Add8x8;
   3120                      break;
   3121                   case 1:
   3122                      op = U ? Iop_QAdd16Ux4 : Iop_QAdd16Sx4;
   3123                      op2 = Iop_Add16x4;
   3124                      break;
   3125                   case 2:
   3126                      op = U ? Iop_QAdd32Ux2 : Iop_QAdd32Sx2;
   3127                      op2 = Iop_Add32x2;
   3128                      break;
   3129                   case 3:
   3130                      op = U ? Iop_QAdd64Ux1 : Iop_QAdd64Sx1;
   3131                      op2 = Iop_Add64;
   3132                      break;
   3133                   default:
   3134                      vassert(0);
   3135                }
   3136             }
   3137             if (Q) {
   3138                tmp = newTemp(Ity_V128);
   3139             } else {
   3140                tmp = newTemp(Ity_I64);
   3141             }
   3142             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   3143 #ifndef DISABLE_QC_FLAG
   3144             assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
   3145             setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
   3146 #endif
   3147             DIP("vqadd.%c%d %c%d, %c%d, %c%d\n",
   3148                 U ? 'u' : 's',
   3149                 8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3150          }
   3151          break;
   3152       case 1:
   3153          if (B == 0) {
   3154             /* VRHADD */
   3155             /* VRHADD C, A, B ::=
   3156                  C = (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1) */
   3157             IROp shift_op, add_op;
   3158             IRTemp cc;
   3159             ULong one = 1;
   3160             HChar reg_t = Q ? 'q' : 'd';
   3161             switch (size) {
   3162                case 0: one = (one <<  8) | one; /* fall through */
   3163                case 1: one = (one << 16) | one; /* fall through */
   3164                case 2: one = (one << 32) | one; break;
   3165                case 3: return False;
   3166                default: vassert(0);
   3167             }
   3168             if (Q) {
   3169                switch (size) {
   3170                   case 0:
   3171                      shift_op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
   3172                      add_op = Iop_Add8x16;
   3173                      break;
   3174                   case 1:
   3175                      shift_op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
   3176                      add_op = Iop_Add16x8;
   3177                      break;
   3178                   case 2:
   3179                      shift_op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
   3180                      add_op = Iop_Add32x4;
   3181                      break;
   3182                   case 3:
   3183                      return False;
   3184                   default:
   3185                      vassert(0);
   3186                }
   3187             } else {
   3188                switch (size) {
   3189                   case 0:
   3190                      shift_op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
   3191                      add_op = Iop_Add8x8;
   3192                      break;
   3193                   case 1:
   3194                      shift_op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
   3195                      add_op = Iop_Add16x4;
   3196                      break;
   3197                   case 2:
   3198                      shift_op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
   3199                      add_op = Iop_Add32x2;
   3200                      break;
   3201                   case 3:
   3202                      return False;
   3203                   default:
   3204                      vassert(0);
   3205                }
   3206             }
   3207             if (Q) {
   3208                cc = newTemp(Ity_V128);
   3209                assign(cc, binop(shift_op,
   3210                                 binop(add_op,
   3211                                       binop(add_op,
   3212                                             binop(Iop_AndV128,
   3213                                                   mkexpr(arg_n),
   3214                                                   binop(Iop_64HLtoV128,
   3215                                                         mkU64(one),
   3216                                                         mkU64(one))),
   3217                                             binop(Iop_AndV128,
   3218                                                   mkexpr(arg_m),
   3219                                                   binop(Iop_64HLtoV128,
   3220                                                         mkU64(one),
   3221                                                         mkU64(one)))),
   3222                                       binop(Iop_64HLtoV128,
   3223                                             mkU64(one),
   3224                                             mkU64(one))),
   3225                                 mkU8(1)));
   3226                assign(res, binop(add_op,
   3227                                  binop(add_op,
   3228                                        binop(shift_op,
   3229                                              mkexpr(arg_n),
   3230                                              mkU8(1)),
   3231                                        binop(shift_op,
   3232                                              mkexpr(arg_m),
   3233                                              mkU8(1))),
   3234                                  mkexpr(cc)));
   3235             } else {
   3236                cc = newTemp(Ity_I64);
   3237                assign(cc, binop(shift_op,
   3238                                 binop(add_op,
   3239                                       binop(add_op,
   3240                                             binop(Iop_And64,
   3241                                                   mkexpr(arg_n),
   3242                                                   mkU64(one)),
   3243                                             binop(Iop_And64,
   3244                                                   mkexpr(arg_m),
   3245                                                   mkU64(one))),
   3246                                       mkU64(one)),
   3247                                 mkU8(1)));
   3248                assign(res, binop(add_op,
   3249                                  binop(add_op,
   3250                                        binop(shift_op,
   3251                                              mkexpr(arg_n),
   3252                                              mkU8(1)),
   3253                                        binop(shift_op,
   3254                                              mkexpr(arg_m),
   3255                                              mkU8(1))),
   3256                                  mkexpr(cc)));
   3257             }
   3258             DIP("vrhadd.%c%d %c%d, %c%d, %c%d\n",
   3259                 U ? 'u' : 's',
   3260                 8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3261          } else {
   3262             if (U == 0)  {
   3263                switch(C) {
   3264                   case 0: {
   3265                      /* VAND  */
   3266                      HChar reg_t = Q ? 'q' : 'd';
   3267                      if (Q) {
   3268                         assign(res, binop(Iop_AndV128, mkexpr(arg_n),
   3269                                                        mkexpr(arg_m)));
   3270                      } else {
   3271                         assign(res, binop(Iop_And64, mkexpr(arg_n),
   3272                                                      mkexpr(arg_m)));
   3273                      }
   3274                      DIP("vand %c%d, %c%d, %c%d\n",
   3275                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3276                      break;
   3277                   }
   3278                   case 1: {
   3279                      /* VBIC  */
   3280                      HChar reg_t = Q ? 'q' : 'd';
   3281                      if (Q) {
   3282                         assign(res, binop(Iop_AndV128,mkexpr(arg_n),
   3283                                unop(Iop_NotV128, mkexpr(arg_m))));
   3284                      } else {
   3285                         assign(res, binop(Iop_And64, mkexpr(arg_n),
   3286                                unop(Iop_Not64, mkexpr(arg_m))));
   3287                      }
   3288                      DIP("vbic %c%d, %c%d, %c%d\n",
   3289                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3290                      break;
   3291                   }
   3292                   case 2:
   3293                      if ( nreg != mreg) {
   3294                         /* VORR  */
   3295                         HChar reg_t = Q ? 'q' : 'd';
   3296                         if (Q) {
   3297                            assign(res, binop(Iop_OrV128, mkexpr(arg_n),
   3298                                                          mkexpr(arg_m)));
   3299                         } else {
   3300                            assign(res, binop(Iop_Or64, mkexpr(arg_n),
   3301                                                        mkexpr(arg_m)));
   3302                         }
   3303                         DIP("vorr %c%d, %c%d, %c%d\n",
   3304                             reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3305                      } else {
   3306                         /* VMOV  */
   3307                         HChar reg_t = Q ? 'q' : 'd';
   3308                         assign(res, mkexpr(arg_m));
   3309                         DIP("vmov %c%d, %c%d\n", reg_t, dreg, reg_t, mreg);
   3310                      }
   3311                      break;
   3312                   case 3:{
   3313                      /* VORN  */
   3314                      HChar reg_t = Q ? 'q' : 'd';
   3315                      if (Q) {
   3316                         assign(res, binop(Iop_OrV128,mkexpr(arg_n),
   3317                                unop(Iop_NotV128, mkexpr(arg_m))));
   3318                      } else {
   3319                         assign(res, binop(Iop_Or64, mkexpr(arg_n),
   3320                                unop(Iop_Not64, mkexpr(arg_m))));
   3321                      }
   3322                      DIP("vorn %c%d, %c%d, %c%d\n",
   3323                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
   3324                      break;
   3325                   }
   3326                }
   3327             } else {
   3328                switch(C) {
   3329                   case 0:
   3330                      /* VEOR (XOR)  */
   3331                      if (Q) {
   3332                         assign(res, binop(Iop_XorV128, mkexpr(arg_n),
   3333                                                        mkexpr(arg_m)));
   3334                      } else {
   3335                         assign(res, binop(Iop_Xor64, mkexpr(arg_n),
   3336                                                      mkexpr(arg_m)));
   3337                      }
   3338                      DIP("veor %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
   3339                            Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   3340                      break;
   3341                   case 1:
   3342                      /* VBSL  */
   3343                      if (Q) {
   3344                         IRTemp reg_d = newTemp(Ity_V128);
   3345                         assign(reg_d, getQReg(dreg));
   3346                         assign(res,
   3347                                binop(Iop_OrV128,
   3348                                      binop(Iop_AndV128, mkexpr(arg_n),
   3349                                                         mkexpr(reg_d)),
   3350                                      binop(Iop_AndV128,
   3351                                            mkexpr(arg_m),
   3352                                            unop(Iop_NotV128,
   3353                                                  mkexpr(reg_d)) ) ) );
   3354                      } else {
   3355                         IRTemp reg_d = newTemp(Ity_I64);
   3356                         assign(reg_d, getDRegI64(dreg));
   3357                         assign(res,
   3358                                binop(Iop_Or64,
   3359                                      binop(Iop_And64, mkexpr(arg_n),
   3360                                                       mkexpr(reg_d)),
   3361                                      binop(Iop_And64,
   3362                                            mkexpr(arg_m),
   3363                                            unop(Iop_Not64, mkexpr(reg_d)))));
   3364                      }
   3365                      DIP("vbsl %c%u, %c%u, %c%u\n",
   3366                          Q ? 'q' : 'd', dreg,
   3367                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   3368                      break;
   3369                   case 2:
   3370                      /* VBIT  */
   3371                      if (Q) {
   3372                         IRTemp reg_d = newTemp(Ity_V128);
   3373                         assign(reg_d, getQReg(dreg));
   3374                         assign(res,
   3375                                binop(Iop_OrV128,
   3376                                      binop(Iop_AndV128, mkexpr(arg_n),
   3377                                                         mkexpr(arg_m)),
   3378                                      binop(Iop_AndV128,
   3379                                            mkexpr(reg_d),
   3380                                            unop(Iop_NotV128, mkexpr(arg_m)))));
   3381                      } else {
   3382                         IRTemp reg_d = newTemp(Ity_I64);
   3383                         assign(reg_d, getDRegI64(dreg));
   3384                         assign(res,
   3385                                binop(Iop_Or64,
   3386                                      binop(Iop_And64, mkexpr(arg_n),
   3387                                                       mkexpr(arg_m)),
   3388                                      binop(Iop_And64,
   3389                                            mkexpr(reg_d),
   3390                                            unop(Iop_Not64, mkexpr(arg_m)))));
   3391                      }
   3392                      DIP("vbit %c%u, %c%u, %c%u\n",
   3393                          Q ? 'q' : 'd', dreg,
   3394                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   3395                      break;
   3396                   case 3:
   3397                      /* VBIF  */
   3398                      if (Q) {
   3399                         IRTemp reg_d = newTemp(Ity_V128);
   3400                         assign(reg_d, getQReg(dreg));
   3401                         assign(res,
   3402                                binop(Iop_OrV128,
   3403                                      binop(Iop_AndV128, mkexpr(reg_d),
   3404                                                         mkexpr(arg_m)),
   3405                                      binop(Iop_AndV128,
   3406                                            mkexpr(arg_n),
   3407                                            unop(Iop_NotV128, mkexpr(arg_m)))));
   3408                      } else {
   3409                         IRTemp reg_d = newTemp(Ity_I64);
   3410                         assign(reg_d, getDRegI64(dreg));
   3411                         assign(res,
   3412                                binop(Iop_Or64,
   3413                                      binop(Iop_And64, mkexpr(reg_d),
   3414                                                       mkexpr(arg_m)),
   3415                                      binop(Iop_And64,
   3416                                            mkexpr(arg_n),
   3417                                            unop(Iop_Not64, mkexpr(arg_m)))));
   3418                      }
   3419                      DIP("vbif %c%u, %c%u, %c%u\n",
   3420                          Q ? 'q' : 'd', dreg,
   3421                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
   3422                      break;
   3423                }
   3424             }
   3425          }
   3426          break;
   3427       case 2:
   3428          if (B == 0) {
   3429             /* VHSUB */
   3430             /* (A >> 1) - (B >> 1) - (NOT (A) & B & 1)   */
   3431             ULong imm = 0;
   3432             IRExpr *imm_val;
   3433             IROp subOp;
   3434             IROp notOp;
   3435             IROp andOp;
   3436             IROp shOp;
   3437             if (size == 3)
   3438                return False;
   3439             switch(size) {
   3440                case 0: imm = 0x101010101010101LL; break;
   3441                case 1: imm = 0x1000100010001LL; break;
   3442                case 2: imm = 0x100000001LL; break;
   3443                default: vassert(0);
   3444             }
   3445             if (Q) {
   3446                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
   3447                andOp = Iop_AndV128;
   3448                notOp = Iop_NotV128;
   3449             } else {
   3450                imm_val = mkU64(imm);
   3451                andOp = Iop_And64;
   3452                notOp = Iop_Not64;
   3453             }
   3454             if (U) {
   3455                switch(size) {
   3456                   case 0:
   3457                      subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   3458                      shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3459                      break;
   3460                   case 1:
   3461                      subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   3462                      shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3463                      break;
   3464                   case 2:
   3465                      subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   3466                      shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3467                      break;
   3468                   default:
   3469                      vassert(0);
   3470                }
   3471             } else {
   3472                switch(size) {
   3473                   case 0:
   3474                      subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   3475                      shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
   3476                      break;
   3477                   case 1:
   3478                      subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   3479                      shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
   3480                      break;
   3481                   case 2:
   3482                      subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   3483                      shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
   3484                      break;
   3485                   default:
   3486                      vassert(0);
   3487                }
   3488             }
   3489             assign(res,
   3490                    binop(subOp,
   3491                          binop(subOp,
   3492                                binop(shOp, mkexpr(arg_n), mkU8(1)),
   3493                                binop(shOp, mkexpr(arg_m), mkU8(1))),
   3494                          binop(andOp,
   3495                                binop(andOp,
   3496                                      unop(notOp, mkexpr(arg_n)),
   3497                                      mkexpr(arg_m)),
   3498                                imm_val)));
   3499             DIP("vhsub.%c%u %c%u, %c%u, %c%u\n",
   3500                 U ? 'u' : 's', 8 << size,
   3501                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   3502                 mreg);
   3503          } else {
   3504             /* VQSUB */
   3505             IROp op, op2;
   3506             IRTemp tmp;
   3507             if (Q) {
   3508                switch (size) {
   3509                   case 0:
   3510                      op = U ? Iop_QSub8Ux16 : Iop_QSub8Sx16;
   3511                      op2 = Iop_Sub8x16;
   3512                      break;
   3513                   case 1:
   3514                      op = U ? Iop_QSub16Ux8 : Iop_QSub16Sx8;
   3515                      op2 = Iop_Sub16x8;
   3516                      break;
   3517                   case 2:
   3518                      op = U ? Iop_QSub32Ux4 : Iop_QSub32Sx4;
   3519                      op2 = Iop_Sub32x4;
   3520                      break;
   3521                   case 3:
   3522                      op = U ? Iop_QSub64Ux2 : Iop_QSub64Sx2;
   3523                      op2 = Iop_Sub64x2;
   3524                      break;
   3525                   default:
   3526                      vassert(0);
   3527                }
   3528             } else {
   3529                switch (size) {
   3530                   case 0:
   3531                      op = U ? Iop_QSub8Ux8 : Iop_QSub8Sx8;
   3532                      op2 = Iop_Sub8x8;
   3533                      break;
   3534                   case 1:
   3535                      op = U ? Iop_QSub16Ux4 : Iop_QSub16Sx4;
   3536                      op2 = Iop_Sub16x4;
   3537                      break;
   3538                   case 2:
   3539                      op = U ? Iop_QSub32Ux2 : Iop_QSub32Sx2;
   3540                      op2 = Iop_Sub32x2;
   3541                      break;
   3542                   case 3:
   3543                      op = U ? Iop_QSub64Ux1 : Iop_QSub64Sx1;
   3544                      op2 = Iop_Sub64;
   3545                      break;
   3546                   default:
   3547                      vassert(0);
   3548                }
   3549             }
   3550             if (Q)
   3551                tmp = newTemp(Ity_V128);
   3552             else
   3553                tmp = newTemp(Ity_I64);
   3554             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   3555 #ifndef DISABLE_QC_FLAG
   3556             assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
   3557             setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
   3558 #endif
   3559             DIP("vqsub.%c%u %c%u, %c%u, %c%u\n",
   3560                 U ? 'u' : 's', 8 << size,
   3561                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   3562                 mreg);
   3563          }
   3564          break;
   3565       case 3: {
   3566             IROp op;
   3567             if (Q) {
   3568                switch (size) {
   3569                   case 0: op = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16; break;
   3570                   case 1: op = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8; break;
   3571                   case 2: op = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4; break;
   3572                   case 3: return False;
   3573                   default: vassert(0);
   3574                }
   3575             } else {
   3576                switch (size) {
   3577                   case 0: op = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8; break;
   3578                   case 1: op = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4; break;
   3579                   case 2: op = U ? Iop_CmpGT32Ux2: Iop_CmpGT32Sx2; break;
   3580                   case 3: return False;
   3581                   default: vassert(0);
   3582                }
   3583             }
   3584             if (B == 0) {
   3585                /* VCGT  */
   3586                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   3587                DIP("vcgt.%c%u %c%u, %c%u, %c%u\n",
   3588                    U ? 'u' : 's', 8 << size,
   3589                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   3590                    mreg);
   3591             } else {
   3592                /* VCGE  */
   3593                /* VCGE res, argn, argm
   3594                     is equal to
   3595                   VCGT tmp, argm, argn
   3596                   VNOT res, tmp */
   3597                assign(res,
   3598                       unop(Q ? Iop_NotV128 : Iop_Not64,
   3599                            binop(op, mkexpr(arg_m), mkexpr(arg_n))));
   3600                DIP("vcge.%c%u %c%u, %c%u, %c%u\n",
   3601                    U ? 'u' : 's', 8 << size,
   3602                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   3603                    mreg);
   3604             }
   3605          }
   3606          break;
   3607       case 4:
   3608          if (B == 0) {
   3609             /* VSHL */
   3610             IROp op, sub_op;
   3611             IRTemp tmp;
   3612             if (U) {
   3613                switch (size) {
   3614                   case 0: op = Q ? Iop_Shl8x16 : Iop_Shl8x8; break;
   3615                   case 1: op = Q ? Iop_Shl16x8 : Iop_Shl16x4; break;
   3616                   case 2: op = Q ? Iop_Shl32x4 : Iop_Shl32x2; break;
   3617                   case 3: op = Q ? Iop_Shl64x2 : Iop_Shl64; break;
   3618                   default: vassert(0);
   3619                }
   3620             } else {
   3621                tmp = newTemp(Q ? Ity_V128 : Ity_I64);
   3622                switch (size) {
   3623                   case 0:
   3624                      op = Q ? Iop_Sar8x16 : Iop_Sar8x8;
   3625                      sub_op = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   3626                      break;
   3627                   case 1:
   3628                      op = Q ? Iop_Sar16x8 : Iop_Sar16x4;
   3629                      sub_op = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   3630                      break;
   3631                   case 2:
   3632                      op = Q ? Iop_Sar32x4 : Iop_Sar32x2;
   3633                      sub_op = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   3634                      break;
   3635                   case 3:
   3636                      op = Q ? Iop_Sar64x2 : Iop_Sar64;
   3637                      sub_op = Q ? Iop_Sub64x2 : Iop_Sub64;
   3638                      break;
   3639                   default:
   3640                      vassert(0);
   3641                }
   3642             }
   3643             if (U) {
   3644                if (!Q && (size == 3))
   3645                   assign(res, binop(op, mkexpr(arg_m),
   3646                                         unop(Iop_64to8, mkexpr(arg_n))));
   3647                else
   3648                   assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
   3649             } else {
   3650                if (Q)
   3651                   assign(tmp, binop(sub_op,
   3652                                     binop(Iop_64HLtoV128, mkU64(0), mkU64(0)),
   3653                                     mkexpr(arg_n)));
   3654                else
   3655                   assign(tmp, binop(sub_op, mkU64(0), mkexpr(arg_n)));
   3656                if (!Q && (size == 3))
   3657                   assign(res, binop(op, mkexpr(arg_m),
   3658                                         unop(Iop_64to8, mkexpr(tmp))));
   3659                else
   3660                   assign(res, binop(op, mkexpr(arg_m), mkexpr(tmp)));
   3661             }
   3662             DIP("vshl.%c%u %c%u, %c%u, %c%u\n",
   3663                 U ? 'u' : 's', 8 << size,
   3664                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
   3665                 nreg);
   3666          } else {
   3667             /* VQSHL */
   3668             IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt;
   3669             IRTemp tmp, shval, mask, old_shval;
   3670             UInt i;
   3671             ULong esize;
   3672             cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
   3673             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
   3674             if (U) {
   3675                switch (size) {
   3676                   case 0:
   3677                      op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
   3678                      op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
   3679                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3680                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   3681                      break;
   3682                   case 1:
   3683                      op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
   3684                      op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
   3685                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3686                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   3687                      break;
   3688                   case 2:
   3689                      op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
   3690                      op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
   3691                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3692                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   3693                      break;
   3694                   case 3:
   3695                      op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
   3696                      op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
   3697                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   3698                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   3699                      break;
   3700                   default:
   3701                      vassert(0);
   3702                }
   3703             } else {
   3704                switch (size) {
   3705                   case 0:
   3706                      op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
   3707                      op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
   3708                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3709                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   3710                      break;
   3711                   case 1:
   3712                      op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
   3713                      op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
   3714                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3715                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   3716                      break;
   3717                   case 2:
   3718                      op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
   3719                      op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
   3720                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3721                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   3722                      break;
   3723                   case 3:
   3724                      op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
   3725                      op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
   3726                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   3727                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   3728                      break;
   3729                   default:
   3730                      vassert(0);
   3731                }
   3732             }
   3733             if (Q) {
   3734                tmp = newTemp(Ity_V128);
   3735                shval = newTemp(Ity_V128);
   3736                mask = newTemp(Ity_V128);
   3737             } else {
   3738                tmp = newTemp(Ity_I64);
   3739                shval = newTemp(Ity_I64);
   3740                mask = newTemp(Ity_I64);
   3741             }
   3742             assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
   3743 #ifndef DISABLE_QC_FLAG
   3744             /* Only least significant byte from second argument is used.
   3745                Copy this byte to the whole vector element. */
   3746             assign(shval, binop(op_shrn,
   3747                                 binop(op_shln,
   3748                                        mkexpr(arg_n),
   3749                                        mkU8((8 << size) - 8)),
   3750                                 mkU8((8 << size) - 8)));
   3751             for(i = 0; i < size; i++) {
   3752                old_shval = shval;
   3753                shval = newTemp(Q ? Ity_V128 : Ity_I64);
   3754                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
   3755                                    mkexpr(old_shval),
   3756                                    binop(op_shln,
   3757                                          mkexpr(old_shval),
   3758                                          mkU8(8 << i))));
   3759             }
   3760             /* If shift is greater or equal to the element size and
   3761                element is non-zero, then QC flag should be set. */
   3762             esize = (8 << size) - 1;
   3763             esize = (esize <<  8) | esize;
   3764             esize = (esize << 16) | esize;
   3765             esize = (esize << 32) | esize;
   3766             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   3767                              binop(cmp_gt, mkexpr(shval),
   3768                                            Q ? mkU128(esize) : mkU64(esize)),
   3769                              unop(cmp_neq, mkexpr(arg_m))),
   3770                        Q ? mkU128(0) : mkU64(0),
   3771                        Q, condT);
   3772             /* Othervise QC flag should be set if shift value is positive and
   3773                result beign rightshifted the same value is not equal to left
   3774                argument. */
   3775             assign(mask, binop(cmp_gt, mkexpr(shval),
   3776                                        Q ? mkU128(0) : mkU64(0)));
   3777             if (!Q && size == 3)
   3778                assign(tmp, binop(op_rev, mkexpr(res),
   3779                                          unop(Iop_64to8, mkexpr(arg_n))));
   3780             else
   3781                assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
   3782             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   3783                              mkexpr(tmp), mkexpr(mask)),
   3784                        binop(Q ? Iop_AndV128 : Iop_And64,
   3785                              mkexpr(arg_m), mkexpr(mask)),
   3786                        Q, condT);
   3787 #endif
   3788             DIP("vqshl.%c%u %c%u, %c%u, %c%u\n",
   3789                 U ? 'u' : 's', 8 << size,
   3790                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
   3791                 nreg);
   3792          }
   3793          break;
   3794       case 5:
   3795          if (B == 0) {
   3796             /* VRSHL */
   3797             IROp op, op_shrn, op_shln, cmp_gt, op_sub, op_add;
   3798             IRTemp shval, old_shval, imm_val, round;
   3799             UInt i;
   3800             ULong imm;
   3801             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
   3802             imm = 1L;
   3803             switch (size) {
   3804                case 0: imm = (imm <<  8) | imm; /* fall through */
   3805                case 1: imm = (imm << 16) | imm; /* fall through */
   3806                case 2: imm = (imm << 32) | imm; /* fall through */
   3807                case 3: break;
   3808                default: vassert(0);
   3809             }
   3810             imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
   3811             round = newTemp(Q ? Ity_V128 : Ity_I64);
   3812             assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
   3813             if (U) {
   3814                switch (size) {
   3815                   case 0:
   3816                      op = Q ? Iop_Shl8x16 : Iop_Shl8x8;
   3817                      op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   3818                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
   3819                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3820                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   3821                      break;
   3822                   case 1:
   3823                      op = Q ? Iop_Shl16x8 : Iop_Shl16x4;
   3824                      op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   3825                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
   3826                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3827                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   3828                      break;
   3829                   case 2:
   3830                      op = Q ? Iop_Shl32x4 : Iop_Shl32x2;
   3831                      op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   3832                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
   3833                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3834                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   3835                      break;
   3836                   case 3:
   3837                      op = Q ? Iop_Shl64x2 : Iop_Shl64;
   3838                      op_sub = Q ? Iop_Sub64x2 : Iop_Sub64;
   3839                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
   3840                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   3841                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   3842                      break;
   3843                   default:
   3844                      vassert(0);
   3845                }
   3846             } else {
   3847                switch (size) {
   3848                   case 0:
   3849                      op = Q ? Iop_Sal8x16 : Iop_Sal8x8;
   3850                      op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   3851                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
   3852                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3853                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   3854                      break;
   3855                   case 1:
   3856                      op = Q ? Iop_Sal16x8 : Iop_Sal16x4;
   3857                      op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   3858                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
   3859                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3860                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   3861                      break;
   3862                   case 2:
   3863                      op = Q ? Iop_Sal32x4 : Iop_Sal32x2;
   3864                      op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   3865                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
   3866                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3867                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   3868                      break;
   3869                   case 3:
   3870                      op = Q ? Iop_Sal64x2 : Iop_Sal64x1;
   3871                      op_sub = Q ? Iop_Sub64x2 : Iop_Sub64;
   3872                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
   3873                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   3874                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   3875                      break;
   3876                   default:
   3877                      vassert(0);
   3878                }
   3879             }
   3880             if (Q) {
   3881                shval = newTemp(Ity_V128);
   3882             } else {
   3883                shval = newTemp(Ity_I64);
   3884             }
   3885             /* Only least significant byte from second argument is used.
   3886                Copy this byte to the whole vector element. */
   3887             assign(shval, binop(op_shrn,
   3888                                 binop(op_shln,
   3889                                        mkexpr(arg_n),
   3890                                        mkU8((8 << size) - 8)),
   3891                                 mkU8((8 << size) - 8)));
   3892             for (i = 0; i < size; i++) {
   3893                old_shval = shval;
   3894                shval = newTemp(Q ? Ity_V128 : Ity_I64);
   3895                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
   3896                                    mkexpr(old_shval),
   3897                                    binop(op_shln,
   3898                                          mkexpr(old_shval),
   3899                                          mkU8(8 << i))));
   3900             }
   3901             /* Compute the result */
   3902             if (!Q && size == 3 && U) {
   3903                assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
   3904                                    binop(op,
   3905                                          mkexpr(arg_m),
   3906                                          unop(Iop_64to8,
   3907                                               binop(op_add,
   3908                                                     mkexpr(arg_n),
   3909                                                     mkexpr(imm_val)))),
   3910                                    binop(Q ? Iop_AndV128 : Iop_And64,
   3911                                          mkexpr(imm_val),
   3912                                          binop(cmp_gt,
   3913                                                Q ? mkU128(0) : mkU64(0),
   3914                                                mkexpr(arg_n)))));
   3915                assign(res, binop(op_add,
   3916                                  binop(op,
   3917                                        mkexpr(arg_m),
   3918                                        unop(Iop_64to8, mkexpr(arg_n))),
   3919                                  mkexpr(round)));
   3920             } else {
   3921                assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
   3922                                    binop(op,
   3923                                          mkexpr(arg_m),
   3924                                          binop(op_add,
   3925                                                mkexpr(arg_n),
   3926                                                mkexpr(imm_val))),
   3927                                    binop(Q ? Iop_AndV128 : Iop_And64,
   3928                                          mkexpr(imm_val),
   3929                                          binop(cmp_gt,
   3930                                                Q ? mkU128(0) : mkU64(0),
   3931                                                mkexpr(arg_n)))));
   3932                assign(res, binop(op_add,
   3933                                  binop(op, mkexpr(arg_m), mkexpr(arg_n)),
   3934                                  mkexpr(round)));
   3935             }
   3936             DIP("vrshl.%c%u %c%u, %c%u, %c%u\n",
   3937                 U ? 'u' : 's', 8 << size,
   3938                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
   3939                 nreg);
   3940          } else {
   3941             /* VQRSHL */
   3942             IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt, op_sub, op_add;
   3943             IRTemp tmp, shval, mask, old_shval, imm_val, round;
   3944             UInt i;
   3945             ULong esize, imm;
   3946             cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
   3947             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
   3948             imm = 1L;
   3949             switch (size) {
   3950                case 0: imm = (imm <<  8) | imm; /* fall through */
   3951                case 1: imm = (imm << 16) | imm; /* fall through */
   3952                case 2: imm = (imm << 32) | imm; /* fall through */
   3953                case 3: break;
   3954                default: vassert(0);
   3955             }
   3956             imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
   3957             round = newTemp(Q ? Ity_V128 : Ity_I64);
   3958             assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
   3959             if (U) {
   3960                switch (size) {
   3961                   case 0:
   3962                      op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
   3963                      op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   3964                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
   3965                      op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
   3966                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   3967                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   3968                      break;
   3969                   case 1:
   3970                      op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
   3971                      op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   3972                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
   3973                      op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
   3974                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   3975                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   3976                      break;
   3977                   case 2:
   3978                      op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
   3979                      op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   3980                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
   3981                      op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
   3982                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   3983                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   3984                      break;
   3985                   case 3:
   3986                      op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
   3987                      op_sub = Q ? Iop_Sub64x2 : Iop_Sub64;
   3988                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
   3989                      op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
   3990                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   3991                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   3992                      break;
   3993                   default:
   3994                      vassert(0);
   3995                }
   3996             } else {
   3997                switch (size) {
   3998                   case 0:
   3999                      op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
   4000                      op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
   4001                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
   4002                      op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
   4003                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
   4004                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
   4005                      break;
   4006                   case 1:
   4007                      op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
   4008                      op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
   4009                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
   4010                      op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
   4011                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
   4012                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
   4013                      break;
   4014                   case 2:
   4015                      op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
   4016                      op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
   4017                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
   4018                      op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
   4019                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
   4020                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
   4021                      break;
   4022                   case 3:
   4023                      op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
   4024                      op_sub = Q ? Iop_Sub64x2 : Iop_Sub64;
   4025                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
   4026                      op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
   4027                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
   4028                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
   4029                      break;
   4030                   default:
   4031                      vassert(0);
   4032                }
   4033             }
   4034             if (Q) {
   4035                tmp = newTemp(Ity_V128);
   4036                shval = newTemp(Ity_V128);
   4037                mask = newTemp(Ity_V128);
   4038             } else {
   4039                tmp = newTemp(Ity_I64);
   4040                shval = newTemp(Ity_I64);
   4041                mask = newTemp(Ity_I64);
   4042             }
   4043             /* Only least significant byte from second argument is used.
   4044                Copy this byte to the whole vector element. */
   4045             assign(shval, binop(op_shrn,
   4046                                 binop(op_shln,
   4047                                        mkexpr(arg_n),
   4048                                        mkU8((8 << size) - 8)),
   4049                                 mkU8((8 << size) - 8)));
   4050             for (i = 0; i < size; i++) {
   4051                old_shval = shval;
   4052                shval = newTemp(Q ? Ity_V128 : Ity_I64);
   4053                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
   4054                                    mkexpr(old_shval),
   4055                                    binop(op_shln,
   4056                                          mkexpr(old_shval),
   4057                                          mkU8(8 << i))));
   4058             }
   4059             /* Compute the result */
   4060             assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
   4061                                 binop(op,
   4062                                       mkexpr(arg_m),
   4063                                       binop(op_add,
   4064                                             mkexpr(arg_n),
   4065                                             mkexpr(imm_val))),
   4066                                 binop(Q ? Iop_AndV128 : Iop_And64,
   4067                                       mkexpr(imm_val),
   4068                                       binop(cmp_gt,
   4069                                             Q ? mkU128(0) : mkU64(0),
   4070                                             mkexpr(arg_n)))));
   4071             assign(res, binop(op_add,
   4072                               binop(op, mkexpr(arg_m), mkexpr(arg_n)),
   4073                               mkexpr(round)));
   4074 #ifndef DISABLE_QC_FLAG
   4075             /* If shift is greater or equal to the element size and element is
   4076                non-zero, then QC flag should be set. */
   4077             esize = (8 << size) - 1;
   4078             esize = (esize <<  8) | esize;
   4079             esize = (esize << 16) | esize;
   4080             esize = (esize << 32) | esize;
   4081             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   4082                              binop(cmp_gt, mkexpr(shval),
   4083                                            Q ? mkU128(esize) : mkU64(esize)),
   4084                              unop(cmp_neq, mkexpr(arg_m))),
   4085                        Q ? mkU128(0) : mkU64(0),
   4086                        Q, condT);
   4087             /* Othervise QC flag should be set if shift value is positive and
   4088                result beign rightshifted the same value is not equal to left
   4089                argument. */
   4090             assign(mask, binop(cmp_gt, mkexpr(shval),
   4091                                Q ? mkU128(0) : mkU64(0)));
   4092             if (!Q && size == 3)
   4093                assign(tmp, binop(op_rev, mkexpr(res),
   4094                                          unop(Iop_64to8, mkexpr(arg_n))));
   4095             else
   4096                assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
   4097             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
   4098                              mkexpr(tmp), mkexpr(mask)),
   4099                        binop(Q ? Iop_AndV128 : Iop_And64,
   4100                              mkexpr(arg_m), mkexpr(mask)),
   4101                        Q, condT);
   4102 #endif
   4103             DIP("vqrshl.%c%u %c%u, %c%u, %c%u\n",
   4104                 U ? 'u' : 's', 8 << size,
   4105                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
   4106                 nreg);
   4107          }
   4108          break;
   4109       case 6:
   4110          /* VMAX, VMIN  */
   4111          if (B == 0) {
   4112             /* VMAX */
   4113             IROp op;
   4114             if (U == 0) {
   4115                switch (size) {
   4116                   case 0: op = Q ? Iop_Max8Sx16 : Iop_Max8Sx8; break;
   4117                   case 1: op = Q ? Iop_Max16Sx8 : Iop_Max16Sx4; break;
   4118                   case 2: op = Q ? Iop_Max32Sx4 : Iop_Max32Sx2; break;
   4119                   case 3: return False;
   4120                   default: vassert(0);
   4121                }
   4122             } else {
   4123                switch (size) {
   4124                   case 0: op = Q ? Iop_Max8Ux16 : Iop_Max8Ux8; break;
   4125                   case 1: op = Q ? Iop_Max16Ux8 : Iop_Max16Ux4; break;
   4126                   case 2: op = Q ? Iop_Max32Ux4 : Iop_Max32Ux2; break;
   4127                   case 3: return False;
   4128                   default: vassert(0);
   4129                }
   4130             }
   4131             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4132             DIP("vmax.%c%u %c%u, %c%u, %c%u\n",
   4133                 U ? 'u' : 's', 8 << size,
   4134                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4135                 mreg);
   4136          } else {
   4137             /* VMIN */
   4138             IROp op;
   4139             if (U == 0) {
   4140                switch (size) {
   4141                   case 0: op = Q ? Iop_Min8Sx16 : Iop_Min8Sx8; break;
   4142                   case 1: op = Q ? Iop_Min16Sx8 : Iop_Min16Sx4; break;
   4143                   case 2: op = Q ? Iop_Min32Sx4 : Iop_Min32Sx2; break;
   4144                   case 3: return False;
   4145                   default: vassert(0);
   4146                }
   4147             } else {
   4148                switch (size) {
   4149                   case 0: op = Q ? Iop_Min8Ux16 : Iop_Min8Ux8; break;
   4150                   case 1: op = Q ? Iop_Min16Ux8 : Iop_Min16Ux4; break;
   4151                   case 2: op = Q ? Iop_Min32Ux4 : Iop_Min32Ux2; break;
   4152                   case 3: return False;
   4153                   default: vassert(0);
   4154                }
   4155             }
   4156             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
   4157             DIP("vmin.%c%u %c%u, %c%u, %c%u\n",
   4158                 U ? 'u' : 's', 8 << size,
   4159                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4160                 mreg);
   4161          }
   4162          break;
   4163       case 7:
   4164          if (B == 0) {
   4165             /* VABD */
   4166             IROp op_cmp, op_sub;
   4167             IRTemp cond;
   4168             if ((theInstr >> 23) & 1) {
   4169                vpanic("VABDL should not be in dis_neon_data_3same\n");
   4170             }
   4171             if (Q) {
   4172                switch (size) {
   4173                   case 0:
   4174                      op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
   4175                      op_sub = Iop_Sub8x16;
   4176                      break;
   4177                   case 1:
   4178                      op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
   4179                      op_sub = Iop_Sub16x8;
   4180                      break;
   4181                   case 2:
   4182                      op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
   4183                      op_sub = Iop_Sub32x4;
   4184                      break;
   4185                   case 3:
   4186                      return False;
   4187                   default:
   4188                      vassert(0);
   4189                }
   4190             } else {
   4191                switch (size) {
   4192                   case 0:
   4193                      op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
   4194                      op_sub = Iop_Sub8x8;
   4195                      break;
   4196                   case 1:
   4197                      op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
   4198                      op_sub = Iop_Sub16x4;
   4199                      break;
   4200                   case 2:
   4201                      op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
   4202                      op_sub = Iop_Sub32x2;
   4203                      break;
   4204                   case 3:
   4205                      return False;
   4206                   default:
   4207                      vassert(0);
   4208                }
   4209             }
   4210             if (Q) {
   4211                cond = newTemp(Ity_V128);
   4212             } else {
   4213                cond = newTemp(Ity_I64);
   4214             }
   4215             assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
   4216             assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
   4217                               binop(Q ? Iop_AndV128 : Iop_And64,
   4218                                     binop(op_sub, mkexpr(arg_n),
   4219                                                   mkexpr(arg_m)),
   4220                                     mkexpr(cond)),
   4221                               binop(Q ? Iop_AndV128 : Iop_And64,
   4222                                     binop(op_sub, mkexpr(arg_m),
   4223                                                   mkexpr(arg_n)),
   4224                                     unop(Q ? Iop_NotV128 : Iop_Not64,
   4225                                          mkexpr(cond)))));
   4226             DIP("vabd.%c%u %c%u, %c%u, %c%u\n",
   4227                 U ? 'u' : 's', 8 << size,
   4228                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
   4229                 mreg);
   4230          } else {
   4231             /* VABA */
   4232             IROp op_cmp, op_sub, op_add;
   4233             IRTemp cond, acc, tmp;
   4234             if ((theInstr >> 23) & 1) {
   4235                vpanic("VABAL should not be in dis_neon_data_3same");
   4236             }
   4237             if (Q) {
   4238                switch (size) {
   4239                   case 0:
   4240                      op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
   4241                      op_sub = Iop_Sub8x16;
   4242                      op_add = Iop_Add8x16;
   4243                      break;
   4244                   case 1:
   4245                      op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
   4246                      op_sub = Iop_Sub16x8;
   4247                      op_add = Iop_Add16x8;
   4248                      break;
   4249                   case 2:
   4250                      op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
   4251                      op_sub = Iop_Sub32x4;
   4252                      op_add = Iop_Add32x4;
   4253                      break;
   4254                   case 3:
   4255                      return False;
   4256                   default:
   4257                      vassert(0);
   4258                }
   4259             } else {
   4260                switch (size) {
   4261                   case 0:
   4262                      op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
   4263                      op_sub = Iop_Sub8x8;
   4264                      op_add = Iop_Add8x8;
   4265                      break;
   4266                   case 1:
   4267                      op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
   4268                      op_sub = Iop_Sub16x4;
   4269                      op_add = Iop_Add16x4;
   4270                      break;
   4271                   case 2:
   4272                      op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
   4273                      op_sub = Iop_Sub32x2;
   4274                      op_add = Iop_Add32x2;
   4275                      break;
   4276                   case 3:
   4277                      return False;
   4278                   default:
   4279                      vassert(0);
   4280                }
   4281             }
   4282             if (Q) {
   4283                cond = newTemp(Ity_V128);
   4284                acc = newTemp(Ity_V128);
   4285                tmp = newTemp(Ity_V128);
   4286                assign(acc, getQReg(dreg));
   4287             } else {
   4288                cond = newTemp(Ity_I64);
   4289                acc = newTemp(Ity_I64);
   4290                tmp = newTemp(Ity_I64);
   4291                assign(acc, getDRegI64(dreg));
   4292             }
   4293             assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));