android-4.0.4_r1.0/s

/*--------------------------------------------------------------------*/
/*--- begin                                       guest_arm_toIR.c ---*/
/*--------------------------------------------------------------------*/

/*
   This file is part of Valgrind, a dynamic binary instrumentation
   framework.

   Copyright (C) 2004-2010 OpenWorks LLP
      info (at) open-works.net

   NEON support is
   Copyright (C) 2010-2010 Samsung Electronics
   contributed by Dmitry Zhurikhin <zhur (at) ispras.ru>
              and Kirill Batuzov <batuzovk (at) ispras.ru>

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
   02110-1301, USA.

   The GNU General Public License is contained in the file COPYING.
*/

/* XXXX thumb to check:
   that all cases where putIRegT writes r15, we generate a jump.

   All uses of newTemp assign to an IRTemp and not a UInt

   For all thumb loads and stores, including VFP ones, new-ITSTATE is
   backed out before the memory op, and restored afterwards.  This
   needs to happen even after we go uncond.  (and for sure it doesn't
   happen for VFP loads/stores right now).

   VFP on thumb: check that we exclude all r13/r15 cases that we
   should.

   XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
   taking into account the number of insns guarded by an IT.

   remove the nasty hack, in the spechelper, of looking for Or32(...,
   0xE0) in as the first arg to armg_calculate_condition, and instead
   use Slice44 as specified in comments in the spechelper.

   add specialisations for armg_calculate_flag_c and _v, as they
   are moderately often needed in Thumb code.

   Correctness: ITSTATE handling in Thumb SVCs is wrong.

   Correctness (obscure): in m_transtab, when invalidating code
   address ranges, invalidate up to 18 bytes after the end of the
   range.  This is because the ITSTATE optimisation at the top of
   _THUMB_WRK below analyses up to 18 bytes before the start of any
   given instruction, and so might depend on the invalidated area.
*/

/* Limitations, etc

   - pretty dodgy exception semantics for {LD,ST}Mxx, no doubt

   - SWP: the restart jump back is Ijk_Boring; it should be
     Ijk_NoRedir but that's expensive.  See comments on casLE() in
     guest_x86_toIR.c.
*/

/* "Special" instructions.

   This instruction decoder can decode four special instructions
   which mean nothing natively (are no-ops as far as regs/mem are
   concerned) but have meaning for supporting Valgrind.  A special
   instruction is flagged by a 16-byte preamble:

      E1A0C1EC E1A0C6EC E1A0CEEC E1A0C9EC
      (mov r12, r12, ROR #3;   mov r12, r12, ROR #13;
       mov r12, r12, ROR #29;  mov r12, r12, ROR #19)

   Following that, one of the following 3 are allowed
   (standard interpretation in parentheses):

      E18AA00A (orr r10,r10,r10)   R3 = client_request ( R4 )
      E18BB00B (orr r11,r11,r11)   R3 = guest_NRADDR
      E18CC00C (orr r12,r12,r12)   branch-and-link-to-noredir R4

   Any other bytes following the 16-byte preamble are illegal and
   constitute a failure in instruction decoding.  This all assumes
   that the preamble will never occur except in specific code
   fragments designed for Valgrind to catch.
*/

/* Translates ARM(v5) code to IR. */

#include "libvex_basictypes.h"
#include "libvex_ir.h"
#include "libvex.h"
#include "libvex_guest_arm.h"

#include "main_util.h"
#include "main_globals.h"
#include "guest_generic_bb_to_IR.h"
#include "guest_arm_defs.h"


/*------------------------------------------------------------*/
/*--- Globals                                              ---*/
/*------------------------------------------------------------*/

/* These are set at the start of the translation of a instruction, so
   that we don't have to pass them around endlessly.  CONST means does
   not change during translation of the instruction.
*/

/* CONST: is the host bigendian?  This has to do with float vs double
   register accesses on VFP, but it's complex and not properly thought
   out. */
static Bool host_is_bigendian;

/* CONST: The guest address for the instruction currently being
   translated.  This is the real, "decoded" address (not subject
   to the CPSR.T kludge). */
static Addr32 guest_R15_curr_instr_notENC;

/* CONST, FOR ASSERTIONS ONLY.  Indicates whether currently processed
   insn is Thumb (True) or ARM (False). */
static Bool __curr_is_Thumb;

/* MOD: The IRSB* into which we're generating code. */
static IRSB* irsb;

/* These are to do with handling writes to r15.  They are initially
   set at the start of disInstr_ARM_WRK to indicate no update,
   possibly updated during the routine, and examined again at the end.
   If they have been set to indicate a r15 update then a jump is
   generated.  Note, "explicit" jumps (b, bx, etc) are generated
   directly, not using this mechanism -- this is intended to handle
   the implicit-style jumps resulting from (eg) assigning to r15 as
   the result of insns we wouldn't normally consider branchy. */

/* MOD.  Initially False; set to True iff abovementioned handling is
   required. */
static Bool r15written;

/* MOD.  Initially IRTemp_INVALID.  If the r15 branch to be generated
   is conditional, this holds the gating IRTemp :: Ity_I32.  If the
   branch to be generated is unconditional, this remains
   IRTemp_INVALID. */
static IRTemp r15guard; /* :: Ity_I32, 0 or 1 */

/* MOD.  Initially Ijk_Boring.  If an r15 branch is to be generated,
   this holds the jump kind. */
static IRTemp r15kind;


/*------------------------------------------------------------*/
/*--- Debugging output                                     ---*/
/*------------------------------------------------------------*/

#define DIP(format, args...)           \
   if (vex_traceflags & VEX_TRACE_FE)  \
      vex_printf(format, ## args)

#define DIS(buf, format, args...)      \
   if (vex_traceflags & VEX_TRACE_FE)  \
      vex_sprintf(buf, format, ## args)

#define ASSERT_IS_THUMB \
   do { vassert(__curr_is_Thumb); } while (0)

#define ASSERT_IS_ARM \
   do { vassert(! __curr_is_Thumb); } while (0)


/*------------------------------------------------------------*/
/*--- Helper bits and pieces for deconstructing the        ---*/
/*--- arm insn stream.                                     ---*/
/*------------------------------------------------------------*/

/* Do a little-endian load of a 32-bit word, regardless of the
   endianness of the underlying host. */
static inline UInt getUIntLittleEndianly ( UChar* p )
{
   UInt w = 0;
   w = (w << 8) | p[3];
   w = (w << 8) | p[2];
   w = (w << 8) | p[1];
   w = (w << 8) | p[0];
   return w;
}

/* Do a little-endian load of a 16-bit word, regardless of the
   endianness of the underlying host. */
static inline UShort getUShortLittleEndianly ( UChar* p )
{
   UShort w = 0;
   w = (w << 8) | p[1];
   w = (w << 8) | p[0];
   return w;
}

static UInt ROR32 ( UInt x, UInt sh ) {
   vassert(sh >= 0 && sh < 32);
   if (sh == 0)
      return x;
   else
      return (x << (32-sh)) | (x >> sh);
}

static Int popcount32 ( UInt x )
{
   Int res = 0, i;
   for (i = 0; i < 32; i++) {
      res += (x & 1);
      x >>= 1;
   }
   return res;
}

static UInt setbit32 ( UInt x, Int ix, UInt b )
{
   UInt mask = 1 << ix;
   x &= ~mask;
   x |= ((b << ix) & mask);
   return x;
}

#define BITS2(_b1,_b0) \
   (((_b1) << 1) | (_b0))

#define BITS3(_b2,_b1,_b0)                      \
  (((_b2) << 2) | ((_b1) << 1) | (_b0))

#define BITS4(_b3,_b2,_b1,_b0) \
   (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))

#define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
   ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
    | BITS4((_b3),(_b2),(_b1),(_b0)))

#define BITS5(_b4,_b3,_b2,_b1,_b0)  \
   (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
#define BITS6(_b5,_b4,_b3,_b2,_b1,_b0)  \
   (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
#define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
   (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))

#define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)      \
   (((_b8) << 8) \
    | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))

#define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
   (((_b9) << 9) | ((_b8) << 8)                                \
    | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))

/* produces _uint[_bMax:_bMin] */
#define SLICE_UInt(_uint,_bMax,_bMin) \
   (( ((UInt)(_uint)) >> (_bMin)) \
    & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))


/*------------------------------------------------------------*/
/*--- Helper bits and pieces for creating IR fragments.    ---*/
/*------------------------------------------------------------*/

static IRExpr* mkU64 ( ULong i )
{
   return IRExpr_Const(IRConst_U64(i));
}

static IRExpr* mkU32 ( UInt i )
{
   return IRExpr_Const(IRConst_U32(i));
}

static IRExpr* mkU8 ( UInt i )
{
   vassert(i < 256);
   return IRExpr_Const(IRConst_U8( (UChar)i ));
}

static IRExpr* mkexpr ( IRTemp tmp )
{
   return IRExpr_RdTmp(tmp);
}

static IRExpr* unop ( IROp op, IRExpr* a )
{
   return IRExpr_Unop(op, a);
}

static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
{
   return IRExpr_Binop(op, a1, a2);
}

static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
{
   return IRExpr_Triop(op, a1, a2, a3);
}

static IRExpr* loadLE ( IRType ty, IRExpr* addr )
{
   return IRExpr_Load(Iend_LE, ty, addr);
}

/* Add a statement to the list held by "irbb". */
static void stmt ( IRStmt* st )
{
   addStmtToIRSB( irsb, st );
}

static void assign ( IRTemp dst, IRExpr* e )
{
   stmt( IRStmt_WrTmp(dst, e) );
}

static void storeLE ( IRExpr* addr, IRExpr* data )
{
   stmt( IRStmt_Store(Iend_LE, addr, data) );
}

/* Generate a new temporary of the given type. */
static IRTemp newTemp ( IRType ty )
{
   vassert(isPlausibleIRType(ty));
   return newIRTemp( irsb->tyenv, ty );
}

/* Produces a value in 0 .. 3, which is encoded as per the type
   IRRoundingMode. */
static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
{
   return mkU32(Irrm_NEAREST);
}

/* Generate an expression for SRC rotated right by ROT. */
static IRExpr* genROR32( IRTemp src, Int rot )
{
   vassert(rot >= 0 && rot < 32);
   if (rot == 0)
      return mkexpr(src);
   return
      binop(Iop_Or32,
            binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
            binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
}

static IRExpr* mkU128 ( ULong i )
{
   return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
}

/* Generate a 4-aligned version of the given expression if
   the given condition is true.  Else return it unchanged. */
static IRExpr* align4if ( IRExpr* e, Bool b )
{
   if (b)
      return binop(Iop_And32, e, mkU32(~3));
   else
      return e;
}


/*------------------------------------------------------------*/
/*--- Helpers for accessing guest registers.               ---*/
/*------------------------------------------------------------*/

#define OFFB_R0       offsetof(VexGuestARMState,guest_R0)
#define OFFB_R1       offsetof(VexGuestARMState,guest_R1)
#define OFFB_R2       offsetof(VexGuestARMState,guest_R2)
#define OFFB_R3       offsetof(VexGuestARMState,guest_R3)
#define OFFB_R4       offsetof(VexGuestARMState,guest_R4)
#define OFFB_R5       offsetof(VexGuestARMState,guest_R5)
#define OFFB_R6       offsetof(VexGuestARMState,guest_R6)
#define OFFB_R7       offsetof(VexGuestARMState,guest_R7)
#define OFFB_R8       offsetof(VexGuestARMState,guest_R8)
#define OFFB_R9       offsetof(VexGuestARMState,guest_R9)
#define OFFB_R10      offsetof(VexGuestARMState,guest_R10)
#define OFFB_R11      offsetof(VexGuestARMState,guest_R11)
#define OFFB_R12      offsetof(VexGuestARMState,guest_R12)
#define OFFB_R13      offsetof(VexGuestARMState,guest_R13)
#define OFFB_R14      offsetof(VexGuestARMState,guest_R14)
#define OFFB_R15T     offsetof(VexGuestARMState,guest_R15T)

#define OFFB_CC_OP    offsetof(VexGuestARMState,guest_CC_OP)
#define OFFB_CC_DEP1  offsetof(VexGuestARMState,guest_CC_DEP1)
#define OFFB_CC_DEP2  offsetof(VexGuestARMState,guest_CC_DEP2)
#define OFFB_CC_NDEP  offsetof(VexGuestARMState,guest_CC_NDEP)
#define OFFB_NRADDR   offsetof(VexGuestARMState,guest_NRADDR)

#define OFFB_D0       offsetof(VexGuestARMState,guest_D0)
#define OFFB_D1       offsetof(VexGuestARMState,guest_D1)
#define OFFB_D2       offsetof(VexGuestARMState,guest_D2)
#define OFFB_D3       offsetof(VexGuestARMState,guest_D3)
#define OFFB_D4       offsetof(VexGuestARMState,guest_D4)
#define OFFB_D5       offsetof(VexGuestARMState,guest_D5)
#define OFFB_D6       offsetof(VexGuestARMState,guest_D6)
#define OFFB_D7       offsetof(VexGuestARMState,guest_D7)
#define OFFB_D8       offsetof(VexGuestARMState,guest_D8)
#define OFFB_D9       offsetof(VexGuestARMState,guest_D9)
#define OFFB_D10      offsetof(VexGuestARMState,guest_D10)
#define OFFB_D11      offsetof(VexGuestARMState,guest_D11)
#define OFFB_D12      offsetof(VexGuestARMState,guest_D12)
#define OFFB_D13      offsetof(VexGuestARMState,guest_D13)
#define OFFB_D14      offsetof(VexGuestARMState,guest_D14)
#define OFFB_D15      offsetof(VexGuestARMState,guest_D15)
#define OFFB_D16      offsetof(VexGuestARMState,guest_D16)
#define OFFB_D17      offsetof(VexGuestARMState,guest_D17)
#define OFFB_D18      offsetof(VexGuestARMState,guest_D18)
#define OFFB_D19      offsetof(VexGuestARMState,guest_D19)
#define OFFB_D20      offsetof(VexGuestARMState,guest_D20)
#define OFFB_D21      offsetof(VexGuestARMState,guest_D21)
#define OFFB_D22      offsetof(VexGuestARMState,guest_D22)
#define OFFB_D23      offsetof(VexGuestARMState,guest_D23)
#define OFFB_D24      offsetof(VexGuestARMState,guest_D24)
#define OFFB_D25      offsetof(VexGuestARMState,guest_D25)
#define OFFB_D26      offsetof(VexGuestARMState,guest_D26)
#define OFFB_D27      offsetof(VexGuestARMState,guest_D27)
#define OFFB_D28      offsetof(VexGuestARMState,guest_D28)
#define OFFB_D29      offsetof(VexGuestARMState,guest_D29)
#define OFFB_D30      offsetof(VexGuestARMState,guest_D30)
#define OFFB_D31      offsetof(VexGuestARMState,guest_D31)

#define OFFB_FPSCR    offsetof(VexGuestARMState,guest_FPSCR)
#define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO)
#define OFFB_ITSTATE  offsetof(VexGuestARMState,guest_ITSTATE)
#define OFFB_QFLAG32  offsetof(VexGuestARMState,guest_QFLAG32)
#define OFFB_GEFLAG0  offsetof(VexGuestARMState,guest_GEFLAG0)
#define OFFB_GEFLAG1  offsetof(VexGuestARMState,guest_GEFLAG1)
#define OFFB_GEFLAG2  offsetof(VexGuestARMState,guest_GEFLAG2)
#define OFFB_GEFLAG3  offsetof(VexGuestARMState,guest_GEFLAG3)


/* ---------------- Integer registers ---------------- */

static Int integerGuestRegOffset ( UInt iregNo )
{
   /* Do we care about endianness here?  We do if sub-parts of integer
      registers are accessed, but I don't think that ever happens on
      ARM. */
   switch (iregNo) {
      case 0:  return OFFB_R0;
      case 1:  return OFFB_R1;
      case 2:  return OFFB_R2;
      case 3:  return OFFB_R3;
      case 4:  return OFFB_R4;
      case 5:  return OFFB_R5;
      case 6:  return OFFB_R6;
      case 7:  return OFFB_R7;
      case 8:  return OFFB_R8;
      case 9:  return OFFB_R9;
      case 10: return OFFB_R10;
      case 11: return OFFB_R11;
      case 12: return OFFB_R12;
      case 13: return OFFB_R13;
      case 14: return OFFB_R14;
      case 15: return OFFB_R15T;
      default: vassert(0);
   }
}

/* Plain ("low level") read from a reg; no +8 offset magic for r15. */
static IRExpr* llGetIReg ( UInt iregNo )
{
   vassert(iregNo < 16);
   return IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
}

/* Architected read from a reg in ARM mode.  This automagically adds 8
   to all reads of r15. */
static IRExpr* getIRegA ( UInt iregNo )
{
   IRExpr* e;
   ASSERT_IS_ARM;
   vassert(iregNo < 16);
   if (iregNo == 15) {
      /* If asked for r15, don't read the guest state value, as that
         may not be up to date in the case where loop unrolling has
         happened, because the first insn's write to the block is
         omitted; hence in the 2nd and subsequent unrollings we don't
         have a correct value in guest r15.  Instead produce the
         constant that we know would be produced at this point. */
      vassert(0 == (guest_R15_curr_instr_notENC & 3));
      e = mkU32(guest_R15_curr_instr_notENC + 8);
   } else {
      e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
   }
   return e;
}

/* Architected read from a reg in Thumb mode.  This automagically adds
   4 to all reads of r15. */
static IRExpr* getIRegT ( UInt iregNo )
{
   IRExpr* e;
   ASSERT_IS_THUMB;
   vassert(iregNo < 16);
   if (iregNo == 15) {
      /* Ditto comment in getIReg. */
      vassert(0 == (guest_R15_curr_instr_notENC & 1));
      e = mkU32(guest_R15_curr_instr_notENC + 4);
   } else {
      e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
   }
   return e;
}

/* Plain ("low level") write to a reg; no jump or alignment magic for
   r15. */
static void llPutIReg ( UInt iregNo, IRExpr* e )
{
   vassert(iregNo < 16);
   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
   stmt( IRStmt_Put(integerGuestRegOffset(iregNo), e) );
}

/* Architected write to an integer register in ARM mode.  If it is to
   r15, record info so at the end of this insn's translation, a branch
   to it can be made.  Also handles conditional writes to the
   register: if guardT == IRTemp_INVALID then the write is
   unconditional.  If writing r15, also 4-align it. */
static void putIRegA ( UInt       iregNo,
                       IRExpr*    e,
                       IRTemp     guardT /* :: Ity_I32, 0 or 1 */,
                       IRJumpKind jk /* if a jump is generated */ )
{
   /* if writing r15, force e to be 4-aligned. */
   // INTERWORKING FIXME.  this needs to be relaxed so that
   // puts caused by LDMxx which load r15 interwork right.
   // but is no aligned too relaxed?
   //if (iregNo == 15)
   //   e = binop(Iop_And32, e, mkU32(~3));
   ASSERT_IS_ARM;
   /* So, generate either an unconditional or a conditional write to
      the reg. */
   if (guardT == IRTemp_INVALID) {
      /* unconditional write */
      llPutIReg( iregNo, e );
   } else {
      llPutIReg( iregNo,
                 IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
                               llGetIReg(iregNo),
                               e ));
   }
   if (iregNo == 15) {
      // assert against competing r15 updates.  Shouldn't
      // happen; should be ruled out by the instr matching
      // logic.
      vassert(r15written == False);
      vassert(r15guard   == IRTemp_INVALID);
      vassert(r15kind    == Ijk_Boring);
      r15written = True;
      r15guard   = guardT;
      r15kind    = jk;
   }
}


/* Architected write to an integer register in Thumb mode.  Writes to
   r15 are not allowed.  Handles conditional writes to the register:
   if guardT == IRTemp_INVALID then the write is unconditional. */
static void putIRegT ( UInt       iregNo,
                       IRExpr*    e,
                       IRTemp     guardT /* :: Ity_I32, 0 or 1 */ )
{
   /* So, generate either an unconditional or a conditional write to
      the reg. */
   ASSERT_IS_THUMB;
   vassert(iregNo >= 0 && iregNo <= 14);
   if (guardT == IRTemp_INVALID) {
      /* unconditional write */
      llPutIReg( iregNo, e );
   } else {
      llPutIReg( iregNo,
                 IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
                               llGetIReg(iregNo),
                               e ));
   }
}


/* Thumb16 and Thumb32 only.
   Returns true if reg is 13 or 15.  Implements the BadReg
   predicate in the ARM ARM. */
static Bool isBadRegT ( UInt r )
{
   vassert(r <= 15);
   ASSERT_IS_THUMB;
   return r == 13 || r == 15;
}


/* ---------------- Double registers ---------------- */

static Int doubleGuestRegOffset ( UInt dregNo )
{
   /* Do we care about endianness here?  Probably do if we ever get
      into the situation of dealing with the single-precision VFP
      registers. */
   switch (dregNo) {
      case 0:  return OFFB_D0;
      case 1:  return OFFB_D1;
      case 2:  return OFFB_D2;
      case 3:  return OFFB_D3;
      case 4:  return OFFB_D4;
      case 5:  return OFFB_D5;
      case 6:  return OFFB_D6;
      case 7:  return OFFB_D7;
      case 8:  return OFFB_D8;
      case 9:  return OFFB_D9;
      case 10: return OFFB_D10;
      case 11: return OFFB_D11;
      case 12: return OFFB_D12;
      case 13: return OFFB_D13;
      case 14: return OFFB_D14;
      case 15: return OFFB_D15;
      case 16: return OFFB_D16;
      case 17: return OFFB_D17;
      case 18: return OFFB_D18;
      case 19: return OFFB_D19;
      case 20: return OFFB_D20;
      case 21: return OFFB_D21;
      case 22: return OFFB_D22;
      case 23: return OFFB_D23;
      case 24: return OFFB_D24;
      case 25: return OFFB_D25;
      case 26: return OFFB_D26;
      case 27: return OFFB_D27;
      case 28: return OFFB_D28;
      case 29: return OFFB_D29;
      case 30: return OFFB_D30;
      case 31: return OFFB_D31;
      default: vassert(0);
   }
}

/* Plain ("low level") read from a VFP Dreg. */
static IRExpr* llGetDReg ( UInt dregNo )
{
   vassert(dregNo < 32);
   return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_F64 );
}

/* Architected read from a VFP Dreg. */
static IRExpr* getDReg ( UInt dregNo ) {
   return llGetDReg( dregNo );
}

/* Plain ("low level") write to a VFP Dreg. */
static void llPutDReg ( UInt dregNo, IRExpr* e )
{
   vassert(dregNo < 32);
   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64);
   stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
}

/* Architected write to a VFP Dreg.  Handles conditional writes to the
   register: if guardT == IRTemp_INVALID then the write is
   unconditional. */
static void putDReg ( UInt    dregNo,
                      IRExpr* e,
                      IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
{
   /* So, generate either an unconditional or a conditional write to
      the reg. */
   if (guardT == IRTemp_INVALID) {
      /* unconditional write */
      llPutDReg( dregNo, e );
   } else {
      llPutDReg( dregNo,
                 IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
                               llGetDReg(dregNo),
                               e ));
   }
}

/* And now exactly the same stuff all over again, but this time
   taking/returning I64 rather than F64, to support 64-bit Neon
   ops. */

/* Plain ("low level") read from a Neon Integer Dreg. */
static IRExpr* llGetDRegI64 ( UInt dregNo )
{
   vassert(dregNo < 32);
   return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_I64 );
}

/* Architected read from a Neon Integer Dreg. */
static IRExpr* getDRegI64 ( UInt dregNo ) {
   return llGetDRegI64( dregNo );
}

/* Plain ("low level") write to a Neon Integer Dreg. */
static void llPutDRegI64 ( UInt dregNo, IRExpr* e )
{
   vassert(dregNo < 32);
   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
   stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
}

/* Architected write to a Neon Integer Dreg.  Handles conditional
   writes to the register: if guardT == IRTemp_INVALID then the write
   is unconditional. */
static void putDRegI64 ( UInt    dregNo,
                         IRExpr* e,
                         IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
{
   /* So, generate either an unconditional or a conditional write to
      the reg. */
   if (guardT == IRTemp_INVALID) {
      /* unconditional write */
      llPutDRegI64( dregNo, e );
   } else {
      llPutDRegI64( dregNo,
                    IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
                                  llGetDRegI64(dregNo),
                                  e ));
   }
}

/* ---------------- Quad registers ---------------- */

static Int quadGuestRegOffset ( UInt qregNo )
{
   /* Do we care about endianness here?  Probably do if we ever get
      into the situation of dealing with the 64 bit Neon registers. */
   switch (qregNo) {
      case 0:  return OFFB_D0;
      case 1:  return OFFB_D2;
      case 2:  return OFFB_D4;
      case 3:  return OFFB_D6;
      case 4:  return OFFB_D8;
      case 5:  return OFFB_D10;
      case 6:  return OFFB_D12;
      case 7:  return OFFB_D14;
      case 8:  return OFFB_D16;
      case 9:  return OFFB_D18;
      case 10: return OFFB_D20;
      case 11: return OFFB_D22;
      case 12: return OFFB_D24;
      case 13: return OFFB_D26;
      case 14: return OFFB_D28;
      case 15: return OFFB_D30;
      default: vassert(0);
   }
}

/* Plain ("low level") read from a Neon Qreg. */
static IRExpr* llGetQReg ( UInt qregNo )
{
   vassert(qregNo < 16);
   return IRExpr_Get( quadGuestRegOffset(qregNo), Ity_V128 );
}

/* Architected read from a Neon Qreg. */
static IRExpr* getQReg ( UInt qregNo ) {
   return llGetQReg( qregNo );
}

/* Plain ("low level") write to a Neon Qreg. */
static void llPutQReg ( UInt qregNo, IRExpr* e )
{
   vassert(qregNo < 16);
   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
   stmt( IRStmt_Put(quadGuestRegOffset(qregNo), e) );
}

/* Architected write to a Neon Qreg.  Handles conditional writes to the
   register: if guardT == IRTemp_INVALID then the write is
   unconditional. */
static void putQReg ( UInt    qregNo,
                      IRExpr* e,
                      IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
{
   /* So, generate either an unconditional or a conditional write to
      the reg. */
   if (guardT == IRTemp_INVALID) {
      /* unconditional write */
      llPutQReg( qregNo, e );
   } else {
      llPutQReg( qregNo,
                 IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
                               llGetQReg(qregNo),
                               e ));
   }
}


/* ---------------- Float registers ---------------- */

static Int floatGuestRegOffset ( UInt fregNo )
{
   /* Start with the offset of the containing double, and then correct
      for endianness.  Actually this is completely bogus and needs
      careful thought. */
   Int off;
   vassert(fregNo < 32);
   off = doubleGuestRegOffset(fregNo >> 1);
   if (host_is_bigendian) {
      vassert(0);
   } else {
      if (fregNo & 1)
         off += 4;
   }
   return off;
}

/* Plain ("low level") read from a VFP Freg. */
static IRExpr* llGetFReg ( UInt fregNo )
{
   vassert(fregNo < 32);
   return IRExpr_Get( floatGuestRegOffset(fregNo), Ity_F32 );
}

/* Architected read from a VFP Freg. */
static IRExpr* getFReg ( UInt fregNo ) {
   return llGetFReg( fregNo );
}

/* Plain ("low level") write to a VFP Freg. */
static void llPutFReg ( UInt fregNo, IRExpr* e )
{
   vassert(fregNo < 32);
   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32);
   stmt( IRStmt_Put(floatGuestRegOffset(fregNo), e) );
}

/* Architected write to a VFP Freg.  Handles conditional writes to the
   register: if guardT == IRTemp_INVALID then the write is
   unconditional. */
static void putFReg ( UInt    fregNo,
                      IRExpr* e,
                      IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
{
   /* So, generate either an unconditional or a conditional write to
      the reg. */
   if (guardT == IRTemp_INVALID) {
      /* unconditional write */
      llPutFReg( fregNo, e );
   } else {
      llPutFReg( fregNo,
                 IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
                               llGetFReg(fregNo),
                               e ));
   }
}


/* ---------------- Misc registers ---------------- */

static void putMiscReg32 ( UInt    gsoffset,
                           IRExpr* e, /* :: Ity_I32 */
                           IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
{
   switch (gsoffset) {
      case OFFB_FPSCR:   break;
      case OFFB_QFLAG32: break;
      case OFFB_GEFLAG0: break;
      case OFFB_GEFLAG1: break;
      case OFFB_GEFLAG2: break;
      case OFFB_GEFLAG3: break;
      default: vassert(0); /* awaiting more cases */
   }
   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);

   if (guardT == IRTemp_INVALID) {
      /* unconditional write */
      stmt(IRStmt_Put(gsoffset, e));
   } else {
      stmt(IRStmt_Put(
         gsoffset,
         IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
                       IRExpr_Get(gsoffset, Ity_I32),
                       e
         )
      ));
   }
}

static IRTemp get_ITSTATE ( void )
{
   ASSERT_IS_THUMB;
   IRTemp t = newTemp(Ity_I32);
   assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
   return t;
}

static void put_ITSTATE ( IRTemp t )
{
   ASSERT_IS_THUMB;
   stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
}

static IRTemp get_QFLAG32 ( void )
{
   IRTemp t = newTemp(Ity_I32);
   assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
   return t;
}

static void put_QFLAG32 ( IRTemp t, IRTemp condT )
{
   putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
}

/* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
   Status Register) to indicate that overflow or saturation occurred.
   Nb: t must be zero to denote no saturation, and any nonzero
   value to indicate saturation. */
static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
{
   IRTemp old = get_QFLAG32();
   IRTemp nyu = newTemp(Ity_I32);
   assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
   put_QFLAG32(nyu, condT);
}

/* Generate code to set APSR.GE[flagNo]. Each fn call sets 1 bit.
   flagNo: which flag bit to set [3...0]
   lowbits_to_ignore:  0 = look at all 32 bits
                       8 = look at top 24 bits only
                      16 = look at top 16 bits only
                      31 = look at the top bit only
   e: input value to be evaluated.
   The new value is taken from 'e' with the lowest 'lowbits_to_ignore'
   masked out.  If the resulting value is zero then the GE flag is
   set to 0; any other value sets the flag to 1. */
static void put_GEFLAG32 ( Int flagNo,            /* 0, 1, 2 or 3 */
                           Int lowbits_to_ignore, /* 0, 8, 16 or 31   */
                           IRExpr* e,             /* Ity_I32 */
                           IRTemp condT )
{
   vassert( flagNo >= 0 && flagNo <= 3 );
   vassert( lowbits_to_ignore == 0  ||
            lowbits_to_ignore == 8  ||
            lowbits_to_ignore == 16 ||
            lowbits_to_ignore == 31 );
   IRTemp masked = newTemp(Ity_I32);
   assign(masked, binop(Iop_Shr32, e, mkU8(lowbits_to_ignore)));

   switch (flagNo) {
      case 0: putMiscReg32(OFFB_GEFLAG0, mkexpr(masked), condT); break;
      case 1: putMiscReg32(OFFB_GEFLAG1, mkexpr(masked), condT); break;
      case 2: putMiscReg32(OFFB_GEFLAG2, mkexpr(masked), condT); break;
      case 3: putMiscReg32(OFFB_GEFLAG3, mkexpr(masked), condT); break;
      default: vassert(0);
   }
}

/* Return the (32-bit, zero-or-nonzero representation scheme) of
   the specified GE flag. */
static IRExpr* get_GEFLAG32( Int flagNo /* 0, 1, 2, 3 */ )
{
   switch (flagNo) {
      case 0: return IRExpr_Get( OFFB_GEFLAG0, Ity_I32 );
      case 1: return IRExpr_Get( OFFB_GEFLAG1, Ity_I32 );
      case 2: return IRExpr_Get( OFFB_GEFLAG2, Ity_I32 );
      case 3: return IRExpr_Get( OFFB_GEFLAG3, Ity_I32 );
      default: vassert(0);
   }
}

/* Set all 4 GE flags from the given 32-bit value as follows: GE 3 and
   2 are set from bit 31 of the value, and GE 1 and 0 are set from bit
   15 of the value.  All other bits are ignored. */
static void set_GE_32_10_from_bits_31_15 ( IRTemp t32, IRTemp condT )
{
   IRTemp ge10 = newTemp(Ity_I32);
   IRTemp ge32 = newTemp(Ity_I32);
   assign(ge10, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
   assign(ge32, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
   put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
   put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
   put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
   put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
}


/* Set all 4 GE flags from the given 32-bit value as follows: GE 3
   from bit 31, GE 2 from bit 23, GE 1 from bit 15, and GE0 from
   bit 7.  All other bits are ignored. */
static void set_GE_3_2_1_0_from_bits_31_23_15_7 ( IRTemp t32, IRTemp condT )
{
   IRTemp ge0 = newTemp(Ity_I32);
   IRTemp ge1 = newTemp(Ity_I32);
   IRTemp ge2 = newTemp(Ity_I32);
   IRTemp ge3 = newTemp(Ity_I32);
   assign(ge0, binop(Iop_And32, mkexpr(t32), mkU32(0x00000080)));
   assign(ge1, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
   assign(ge2, binop(Iop_And32, mkexpr(t32), mkU32(0x00800000)));
   assign(ge3, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
   put_GEFLAG32( 0, 0, mkexpr(ge0), condT );
   put_GEFLAG32( 1, 0, mkexpr(ge1), condT );
   put_GEFLAG32( 2, 0, mkexpr(ge2), condT );
   put_GEFLAG32( 3, 0, mkexpr(ge3), condT );
}


/* ---------------- FPSCR stuff ---------------- */

/* Generate IR to get hold of the rounding mode bits in FPSCR, and
   convert them to IR format.  Bind the final result to the
   returned temp. */
static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
{
   /* The ARMvfp encoding for rounding mode bits is:
         00  to nearest
         01  to +infinity
         10  to -infinity
         11  to zero
      We need to convert that to the IR encoding:
         00  to nearest (the default)
         10  to +infinity
         01  to -infinity
         11  to zero
      Which can be done by swapping bits 0 and 1.
      The rmode bits are at 23:22 in FPSCR.
   */
   IRTemp armEncd = newTemp(Ity_I32);
   IRTemp swapped = newTemp(Ity_I32);
   /* Fish FPSCR[23:22] out, and slide to bottom.  Doesn't matter that
      we don't zero out bits 24 and above, since the assignment to
      'swapped' will mask them out anyway. */
   assign(armEncd,
          binop(Iop_Shr32, IRExpr_Get(OFFB_FPSCR, Ity_I32), mkU8(22)));
   /* Now swap them. */
   assign(swapped,
          binop(Iop_Or32,
                binop(Iop_And32,
                      binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
                      mkU32(2)),
                binop(Iop_And32,
                      binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
                      mkU32(1))
         ));
   return swapped;
}


/*------------------------------------------------------------*/
/*--- Helpers for flag handling and conditional insns      ---*/
/*------------------------------------------------------------*/

static HChar* name_ARMCondcode ( ARMCondcode cond )
{
   switch (cond) {
      case ARMCondEQ:  return "{eq}";
      case ARMCondNE:  return "{ne}";
      case ARMCondHS:  return "{hs}";  // or 'cs'
      case ARMCondLO:  return "{lo}";  // or 'cc'
      case ARMCondMI:  return "{mi}";
      case ARMCondPL:  return "{pl}";
      case ARMCondVS:  return "{vs}";
      case ARMCondVC:  return "{vc}";
      case ARMCondHI:  return "{hi}";
      case ARMCondLS:  return "{ls}";
      case ARMCondGE:  return "{ge}";
      case ARMCondLT:  return "{lt}";
      case ARMCondGT:  return "{gt}";
      case ARMCondLE:  return "{le}";
      case ARMCondAL:  return ""; // {al}: is the default
      case ARMCondNV:  return "{nv}";
      default: vpanic("name_ARMCondcode");
   }
}
/* and a handy shorthand for it */
static HChar* nCC ( ARMCondcode cond ) {
   return name_ARMCondcode(cond);
}


/* Build IR to calculate some particular condition from stored
   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
   Ity_I32, suitable for narrowing.  Although the return type is
   Ity_I32, the returned value is either 0 or 1.  'cond' must be
   :: Ity_I32 and must denote the condition to compute in
   bits 7:4, and be zero everywhere else.
*/
static IRExpr* mk_armg_calculate_condition_dyn ( IRExpr* cond )
{
   vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I32);
   /* And 'cond' had better produce a value in which only bits 7:4
      bits are nonzero.  However, obviously we can't assert for
      that. */

   /* So what we're constructing for the first argument is
      "(cond << 4) | stored-operation-operation".  However,
      as per comments above, must be supplied pre-shifted to this
      function.

      This pairing scheme requires that the ARM_CC_OP_ values all fit
      in 4 bits.  Hence we are passing a (COND, OP) pair in the lowest
      8 bits of the first argument. */
   IRExpr** args
      = mkIRExprVec_4(
           binop(Iop_Or32, IRExpr_Get(OFFB_CC_OP, Ity_I32), cond),
           IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
           IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
           IRExpr_Get(OFFB_CC_NDEP, Ity_I32)
        );
   IRExpr* call
      = mkIRExprCCall(
           Ity_I32,
           0/*regparm*/,
           "armg_calculate_condition", &armg_calculate_condition,
           args
        );

   /* Exclude the requested condition, OP and NDEP from definedness
      checking.  We're only interested in DEP1 and DEP2. */
   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   return call;
}


/* Build IR to calculate some particular condition from stored
   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
   Ity_I32, suitable for narrowing.  Although the return type is
   Ity_I32, the returned value is either 0 or 1.
*/
static IRExpr* mk_armg_calculate_condition ( ARMCondcode cond )
{
  /* First arg is "(cond << 4) | condition".  This requires that the
     ARM_CC_OP_ values all fit in 4 bits.  Hence we are passing a
     (COND, OP) pair in the lowest 8 bits of the first argument. */
   vassert(cond >= 0 && cond <= 15);
   return mk_armg_calculate_condition_dyn( mkU32(cond << 4) );
}


/* Build IR to calculate just the carry flag from stored
   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
   Ity_I32. */
static IRExpr* mk_armg_calculate_flag_c ( void )
{
   IRExpr** args
      = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
                       IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
                       IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
                       IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
   IRExpr* call
      = mkIRExprCCall(
           Ity_I32,
           0/*regparm*/,
           "armg_calculate_flag_c", &armg_calculate_flag_c,
           args
        );
   /* Exclude OP and NDEP from definedness checking.  We're only
      interested in DEP1 and DEP2. */
   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   return call;
}


/* Build IR to calculate just the overflow flag from stored
   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
   Ity_I32. */
static IRExpr* mk_armg_calculate_flag_v ( void )
{
   IRExpr** args
      = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
                       IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
                       IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
                       IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
   IRExpr* call
      = mkIRExprCCall(
           Ity_I32,
           0/*regparm*/,
           "armg_calculate_flag_v", &armg_calculate_flag_v,
           args
        );
   /* Exclude OP and NDEP from definedness checking.  We're only
      interested in DEP1 and DEP2. */
   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   return call;
}


/* Build IR to calculate N Z C V in bits 31:28 of the
   returned word. */
static IRExpr* mk_armg_calculate_flags_nzcv ( void )
{
   IRExpr** args
      = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
                       IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
                       IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
                       IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
   IRExpr* call
      = mkIRExprCCall(
           Ity_I32,
           0/*regparm*/,
           "armg_calculate_flags_nzcv", &armg_calculate_flags_nzcv,
           args
        );
   /* Exclude OP and NDEP from definedness checking.  We're only
      interested in DEP1 and DEP2. */
   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   return call;
}

static IRExpr* mk_armg_calculate_flag_qc ( IRExpr* resL, IRExpr* resR, Bool Q )
{
   IRExpr** args1;
   IRExpr** args2;
   IRExpr *call1, *call2, *res;

   if (Q) {
      args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(0)),
                              binop(Iop_GetElem32x4, resL, mkU8(1)),
                              binop(Iop_GetElem32x4, resR, mkU8(0)),
                              binop(Iop_GetElem32x4, resR, mkU8(1)) );
      args2 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(2)),
                              binop(Iop_GetElem32x4, resL, mkU8(3)),
                              binop(Iop_GetElem32x4, resR, mkU8(2)),
                              binop(Iop_GetElem32x4, resR, mkU8(3)) );
   } else {
      args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x2, resL, mkU8(0)),
                              binop(Iop_GetElem32x2, resL, mkU8(1)),
                              binop(Iop_GetElem32x2, resR, mkU8(0)),
                              binop(Iop_GetElem32x2, resR, mkU8(1)) );
   }

#if 1
   call1 = mkIRExprCCall(
             Ity_I32,
             0/*regparm*/,
             "armg_calculate_flag_qc", &armg_calculate_flag_qc,
             args1
          );
   if (Q) {
      call2 = mkIRExprCCall(
                Ity_I32,
                0/*regparm*/,
                "armg_calculate_flag_qc", &armg_calculate_flag_qc,
                args2
             );
   }
   if (Q) {
      res = binop(Iop_Or32, call1, call2);
   } else {
      res = call1;
   }
#else
   if (Q) {
      res = unop(Iop_1Uto32,
                 binop(Iop_CmpNE32,
                       binop(Iop_Or32,
                             binop(Iop_Or32,
                                   binop(Iop_Xor32,
                                         args1[0],
                                         args1[2]),
                                   binop(Iop_Xor32,
                                         args1[1],
                                         args1[3])),
                             binop(Iop_Or32,
                                   binop(Iop_Xor32,
                                         args2[0],
                                         args2[2]),
                                   binop(Iop_Xor32,
                                         args2[1],
                                         args2[3]))),
                       mkU32(0)));
   } else {
      res = unop(Iop_1Uto32,
                 binop(Iop_CmpNE32,
                       binop(Iop_Or32,
                             binop(Iop_Xor32,
                                   args1[0],
                                   args1[2]),
                             binop(Iop_Xor32,
                                   args1[1],
                                   args1[3])),
                       mkU32(0)));
   }
#endif
   return res;
}

// FIXME: this is named wrongly .. looks like a sticky set of
// QC, not a write to it.
static void setFlag_QC ( IRExpr* resL, IRExpr* resR, Bool Q,
                         IRTemp condT )
{
   putMiscReg32 (OFFB_FPSCR,
                 binop(Iop_Or32,
                       IRExpr_Get(OFFB_FPSCR, Ity_I32),
                       binop(Iop_Shl32,
                             mk_armg_calculate_flag_qc(resL, resR, Q),
                             mkU8(27))),
                 condT);
}

/* Build IR to conditionally set the flags thunk.  As with putIReg, if
   guard is IRTemp_INVALID then it's unconditional, else it holds a
   condition :: Ity_I32. */
static
void setFlags_D1_D2_ND ( UInt cc_op, IRTemp t_dep1,
                         IRTemp t_dep2, IRTemp t_ndep,
                         IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
{
   IRTemp c8;
   vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I32));
   vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I32));
   vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I32));
   vassert(cc_op >= ARMG_CC_OP_COPY && cc_op < ARMG_CC_OP_NUMBER);
   if (guardT == IRTemp_INVALID) {
      /* unconditional */
      stmt( IRStmt_Put( OFFB_CC_OP,   mkU32(cc_op) ));
      stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
      stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
      stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
   } else {
      /* conditional */
      c8 = newTemp(Ity_I8);
      assign( c8, unop(Iop_32to8, mkexpr(guardT)) );
      stmt( IRStmt_Put(
               OFFB_CC_OP,
               IRExpr_Mux0X( mkexpr(c8),
                             IRExpr_Get(OFFB_CC_OP, Ity_I32),
                             mkU32(cc_op) )));
      stmt( IRStmt_Put(
               OFFB_CC_DEP1,
               IRExpr_Mux0X( mkexpr(c8),
                             IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
                             mkexpr(t_dep1) )));
      stmt( IRStmt_Put(
               OFFB_CC_DEP2,
               IRExpr_Mux0X( mkexpr(c8),
                             IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
                             mkexpr(t_dep2) )));
      stmt( IRStmt_Put(
               OFFB_CC_NDEP,
               IRExpr_Mux0X( mkexpr(c8),
                             IRExpr_Get(OFFB_CC_NDEP, Ity_I32),
                             mkexpr(t_ndep) )));
   }
}


/* Minor variant of the above that sets NDEP to zero (if it
   sets it at all) */
static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
                             IRTemp t_dep2,
                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
{
   IRTemp z32 = newTemp(Ity_I32);
   assign( z32, mkU32(0) );
   setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
}


/* Minor variant of the above that sets DEP2 to zero (if it
   sets it at all) */
static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
                             IRTemp t_ndep,
                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
{
   IRTemp z32 = newTemp(Ity_I32);
   assign( z32, mkU32(0) );
   setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
}


/* Minor variant of the above that sets DEP2 and NDEP to zero (if it
   sets them at all) */
static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
                          IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
{
   IRTemp z32 = newTemp(Ity_I32);
   assign( z32, mkU32(0) );
   setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
}


/* ARM only */
/* Generate a side-exit to the next instruction, if the given guard
   expression :: Ity_I32 is 0 (note!  the side exit is taken if the
   condition is false!)  This is used to skip over conditional
   instructions which we can't generate straight-line code for, either
   because they are too complex or (more likely) they potentially
   generate exceptions.
*/
static void mk_skip_over_A32_if_cond_is_false (
               IRTemp guardT /* :: Ity_I32, 0 or 1 */
            )
{
   ASSERT_IS_ARM;
   vassert(guardT != IRTemp_INVALID);
   vassert(0 == (guest_R15_curr_instr_notENC & 3));
   stmt( IRStmt_Exit(
            unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
            Ijk_Boring,
            IRConst_U32(toUInt(guest_R15_curr_instr_notENC + 4))
       ));
}

/* Thumb16 only */
/* ditto, but jump over a 16-bit thumb insn */
static void mk_skip_over_T16_if_cond_is_false (
               IRTemp guardT /* :: Ity_I32, 0 or 1 */
            )
{
   ASSERT_IS_THUMB;
   vassert(guardT != IRTemp_INVALID);
   vassert(0 == (guest_R15_curr_instr_notENC & 1));
   stmt( IRStmt_Exit(
            unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
            Ijk_Boring,
            IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 2) | 1))
       ));
}


/* Thumb32 only */
/* ditto, but jump over a 32-bit thumb insn */
static void mk_skip_over_T32_if_cond_is_false (
               IRTemp guardT /* :: Ity_I32, 0 or 1 */
            )
{
   ASSERT_IS_THUMB;
   vassert(guardT != IRTemp_INVALID);
   vassert(0 == (guest_R15_curr_instr_notENC & 1));
   stmt( IRStmt_Exit(
            unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
            Ijk_Boring,
            IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 4) | 1))
       ));
}


/* Thumb16 and Thumb32 only
   Generate a SIGILL followed by a restart of the current instruction
   if the given temp is nonzero. */
static void gen_SIGILL_T_if_nonzero ( IRTemp t /* :: Ity_I32 */ )
{
   ASSERT_IS_THUMB;
   vassert(t != IRTemp_INVALID);
   vassert(0 == (guest_R15_curr_instr_notENC & 1));
   stmt(
      IRStmt_Exit(
         binop(Iop_CmpNE32, mkexpr(t), mkU32(0)),
         Ijk_NoDecode,
         IRConst_U32(toUInt(guest_R15_curr_instr_notENC | 1))
      )
   );
}


/* Inspect the old_itstate, and generate a SIGILL if it indicates that
   we are currently in an IT block and are not the last in the block.
   This also rolls back guest_ITSTATE to its old value before the exit
   and restores it to its new value afterwards.  This is so that if
   the exit is taken, we have an up to date version of ITSTATE
   available.  Without doing that, we have no hope of making precise
   exceptions work. */
static void gen_SIGILL_T_if_in_but_NLI_ITBlock (
               IRTemp old_itstate /* :: Ity_I32 */,
               IRTemp new_itstate /* :: Ity_I32 */
            )
{
   ASSERT_IS_THUMB;
   put_ITSTATE(old_itstate); // backout
   IRTemp guards_for_next3 = newTemp(Ity_I32);
   assign(guards_for_next3,
          binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
   gen_SIGILL_T_if_nonzero(guards_for_next3);
   put_ITSTATE(new_itstate); //restore
}


/* Simpler version of the above, which generates a SIGILL if
   we're anywhere within an IT block. */
static void gen_SIGILL_T_if_in_ITBlock (
               IRTemp old_itstate /* :: Ity_I32 */,
               IRTemp new_itstate /* :: Ity_I32 */
            )
{
   put_ITSTATE(old_itstate); // backout
   gen_SIGILL_T_if_nonzero(old_itstate);
   put_ITSTATE(new_itstate); //restore
}


/* Generate an APSR value, from the NZCV thunk, and
   from QFLAG32 and GEFLAG0 .. GEFLAG3. */
static IRTemp synthesise_APSR ( void )
{
   IRTemp res1 = newTemp(Ity_I32);
   // Get NZCV
   assign( res1, mk_armg_calculate_flags_nzcv() );
   // OR in the Q value
   IRTemp res2 = newTemp(Ity_I32);
   assign(
      res2,
      binop(Iop_Or32,
            mkexpr(res1),
            binop(Iop_Shl32,
                  unop(Iop_1Uto32,
                       binop(Iop_CmpNE32,
                             mkexpr(get_QFLAG32()),
                             mkU32(0))),
                  mkU8(ARMG_CC_SHIFT_Q)))
   );
   // OR in GE0 .. GE3
   IRExpr* ge0
      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(0), mkU32(0)));
   IRExpr* ge1
      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(1), mkU32(0)));
   IRExpr* ge2
      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(2), mkU32(0)));
   IRExpr* ge3
      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(3), mkU32(0)));
   IRTemp res3 = newTemp(Ity_I32);
   assign(res3,
          binop(Iop_Or32,
                mkexpr(res2),
                binop(Iop_Or32,
                      binop(Iop_Or32,
                            binop(Iop_Shl32, ge0, mkU8(16)),
                            binop(Iop_Shl32, ge1, mkU8(17))),
                      binop(Iop_Or32,
                            binop(Iop_Shl32, ge2, mkU8(18)),
                            binop(Iop_Shl32, ge3, mkU8(19))) )));
   return res3;
}


/* and the inverse transformation: given an APSR value,
   set the NZCV thunk, the Q flag, and the GE flags. */
static void desynthesise_APSR ( Bool write_nzcvq, Bool write_ge,
                                IRTemp apsrT, IRTemp condT )
{
   vassert(write_nzcvq || write_ge);
   if (write_nzcvq) {
      // Do NZCV
      IRTemp immT = newTemp(Ity_I32);
      assign(immT, binop(Iop_And32, mkexpr(apsrT), mkU32(0xF0000000)) );
      setFlags_D1(ARMG_CC_OP_COPY, immT, condT);
      // Do Q
      IRTemp qnewT = newTemp(Ity_I32);
      assign(qnewT, binop(Iop_And32, mkexpr(apsrT), mkU32(ARMG_CC_MASK_Q)));
      put_QFLAG32(qnewT, condT);
   }
   if (write_ge) {
      // Do GE3..0
      put_GEFLAG32(0, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<16)),
                   condT);
      put_GEFLAG32(1, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<17)),
                   condT);
      put_GEFLAG32(2, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<18)),
                   condT);
      put_GEFLAG32(3, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<19)),
                   condT);
   }
}


/*------------------------------------------------------------*/
/*--- Helpers for saturation                               ---*/
/*------------------------------------------------------------*/

/* FIXME: absolutely the only diff. between (a) armUnsignedSatQ and
   (b) armSignedSatQ is that in (a) the floor is set to 0, whereas in
   (b) the floor is computed from the value of imm5.  these two fnsn
   should be commoned up. */

/* UnsignedSatQ(): 'clamp' each value so it lies between 0 <= x <= (2^N)-1
   Optionally return flag resQ saying whether saturation occurred.
   See definition in manual, section A2.2.1, page 41
   (bits(N), boolean) UnsignedSatQ( integer i, integer N )
   {
     if ( i > (2^N)-1 ) { result = (2^N)-1; saturated = TRUE; }
     elsif ( i < 0 )    { result = 0; saturated = TRUE; }
     else               { result = i; saturated = FALSE; }
     return ( result<N-1:0>, saturated );
   }
*/
static void armUnsignedSatQ( IRTemp* res,  /* OUT - Ity_I32 */
                             IRTemp* resQ, /* OUT - Ity_I32  */
                             IRTemp regT,  /* value to clamp - Ity_I32 */
                             UInt imm5 )   /* saturation ceiling */
{
   UInt ceil  = (1 << imm5) - 1;    // (2^imm5)-1
   UInt floor = 0;

   IRTemp node0 = newTemp(Ity_I32);
   IRTemp node1 = newTemp(Ity_I32);
   IRTemp node2 = newTemp(Ity_I1);
   IRTemp node3 = newTemp(Ity_I32);
   IRTemp node4 = newTemp(Ity_I32);
   IRTemp node5 = newTemp(Ity_I1);
   IRTemp node6 = newTemp(Ity_I32);

   assign( node0, mkexpr(regT) );
   assign( node1, mkU32(ceil) );
   assign( node2, binop( Iop_CmpLT32S, mkexpr(node1), mkexpr(node0) ) );
   assign( node3, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node2)),
                                mkexpr(node0),
                                mkexpr(node1) ) );
   assign( node4, mkU32(floor) );
   assign( node5, binop( Iop_CmpLT32S, mkexpr(node3), mkexpr(node4) ) );
   assign( node6, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node5)),
                                mkexpr(node3),
                                mkexpr(node4) ) );
   assign( *res, mkexpr(node6) );

   /* if saturation occurred, then resQ is set to some nonzero value
      if sat did not occur, resQ is guaranteed to be zero. */
   if (resQ) {
      assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
   }
}


/* SignedSatQ(): 'clamp' each value so it lies between  -2^N <= x <= (2^N) - 1
   Optionally return flag resQ saying whether saturation occurred.
   - see definition in manual, section A2.2.1, page 41
   (bits(N), boolean ) SignedSatQ( integer i, integer N )
   {
     if ( i > 2^(N-1) - 1 )    { result = 2^(N-1) - 1; saturated = TRUE; }
     elsif ( i < -(2^(N-1)) )  { result = -(2^(N-1));  saturated = FALSE; }
     else                      { result = i;           saturated = FALSE; }
     return ( result[N-1:0], saturated );
   }
*/
static void armSignedSatQ( IRTemp regT,    /* value to clamp - Ity_I32 */
                           UInt imm5,      /* saturation ceiling */
                           IRTemp* res,    /* OUT - Ity_I32 */
                           IRTemp* resQ )  /* OUT - Ity_I32  */
{
   Int ceil  =  (1 << (imm5-1)) - 1;  //  (2^(imm5-1))-1
   Int floor = -(1 << (imm5-1));      // -(2^(imm5-1))

   IRTemp node0 = newTemp(Ity_I32);
   IRTemp node1 = newTemp(Ity_I32);
   IRTemp node2 = newTemp(Ity_I1);
   IRTemp node3 = newTemp(Ity_I32);
   IRTemp node4 = newTemp(Ity_I32);
   IRTemp node5 = newTemp(Ity_I1);
   IRTemp node6 = newTemp(Ity_I32);

   assign( node0, mkexpr(regT) );
   assign( node1, mkU32(ceil) );
   assign( node2, binop( Iop_CmpLT32S, mkexpr(node1), mkexpr(node0) ) );
   assign( node3, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node2)),
                                mkexpr(node0),  mkexpr(node1) ) );
   assign( node4, mkU32(floor) );
   assign( node5, binop( Iop_CmpLT32S, mkexpr(node3), mkexpr(node4) ) );
   assign( node6, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node5)),
                                mkexpr(node3),  mkexpr(node4) ) );
   assign( *res, mkexpr(node6) );

   /* if saturation occurred, then resQ is set to some nonzero value
      if sat did not occur, resQ is guaranteed to be zero. */
   if (resQ) {
     assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
   }
}


/* Compute a value 0 :: I32 or 1 :: I32, indicating whether signed
   overflow occurred for 32-bit addition.  Needs both args and the
   result.  HD p27. */
static
IRExpr* signed_overflow_after_Add32 ( IRExpr* resE,
                                      IRTemp argL, IRTemp argR )
{
   IRTemp res = newTemp(Ity_I32);
   assign(res, resE);
   return
      binop( Iop_Shr32,
             binop( Iop_And32,
                    binop( Iop_Xor32, mkexpr(res), mkexpr(argL) ),
                    binop( Iop_Xor32, mkexpr(res), mkexpr(argR) )),
             mkU8(31) );
}


/*------------------------------------------------------------*/
/*--- Larger helpers                                       ---*/
/*------------------------------------------------------------*/

/* Compute both the result and new C flag value for a LSL by an imm5
   or by a register operand.  May generate reads of the old C value
   (hence only safe to use before any writes to guest state happen).
   Are factored out so can be used by both ARM and Thumb.

   Note that in compute_result_and_C_after_{LSL,LSR,ASR}_by{imm5,reg},
   "res" (the result)  is a.k.a. "shop", shifter operand
   "newC" (the new C)  is a.k.a. "shco", shifter carry out

   The calling convention for res and newC is a bit funny.  They could
   be passed by value, but instead are passed by ref.
*/

static void compute_result_and_C_after_LSL_by_imm5 (
               /*OUT*/HChar* buf,
               IRTemp* res,
               IRTemp* newC,
               IRTemp rMt, UInt shift_amt, /* operands */
               UInt rM      /* only for debug printing */
            )
{
   if (shift_amt == 0) {
      if (newC) {
         assign( *newC, mk_armg_calculate_flag_c() );
      }
      assign( *res, mkexpr(rMt) );
      DIS(buf, "r%u", rM);
   } else {
      vassert(shift_amt >= 1 && shift_amt <= 31);
      if (newC) {
         assign( *newC,
                 binop(Iop_And32,
                       binop(Iop_Shr32, mkexpr(rMt),
                                        mkU8(32 - shift_amt)),
                       mkU32(1)));
      }
      assign( *res,
              binop(Iop_Shl32, mkexpr(rMt), mkU8(shift_amt)) );
      DIS(buf, "r%u, LSL #%u", rM, shift_amt);
   }
}


static void compute_result_and_C_after_LSL_by_reg (
               /*OUT*/HChar* buf,
               IRTemp* res,
               IRTemp* newC,
               IRTemp rMt, IRTemp rSt,  /* operands */
               UInt rM,    UInt rS      /* only for debug printing */
            )
{
   // shift left in range 0 .. 255
   // amt  = rS & 255
   // res  = amt < 32 ?  Rm << amt  : 0
   // newC = amt == 0     ? oldC  :
   //        amt in 1..32 ?  Rm[32-amt]  : 0
   IRTemp amtT = newTemp(Ity_I32);
   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   if (newC) {
      /* mux0X(amt == 0,
               mux0X(amt < 32,
                     0,
                     Rm[(32-amt) & 31])
               oldC)
      */
      /* About the best you can do is pray that iropt is able
         to nuke most or all of the following junk. */
      IRTemp oldC = newTemp(Ity_I32);
      assign(oldC, mk_armg_calculate_flag_c() );
      assign(
         *newC,
         IRExpr_Mux0X(
            unop(Iop_1Uto8,
                 binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0))),
            IRExpr_Mux0X(
               unop(Iop_1Uto8,
                    binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32))),
               mkU32(0),
               binop(Iop_Shr32,
                     mkexpr(rMt),
                     unop(Iop_32to8,
                          binop(Iop_And32,
                                binop(Iop_Sub32,
                                      mkU32(32),
                                      mkexpr(amtT)),
                                mkU32(31)
                          )
                     )
               )
            ),
            mkexpr(oldC)
         )
      );
   }
   // (Rm << (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
   // Lhs of the & limits the shift to 31 bits, so as to
   // give known IR semantics.  Rhs of the & is all 1s for
   // Rs <= 31 and all 0s for Rs >= 32.
   assign(
      *res,
      binop(
         Iop_And32,
         binop(Iop_Shl32,
               mkexpr(rMt),
               unop(Iop_32to8,
                    binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
         binop(Iop_Sar32,
               binop(Iop_Sub32,
                     mkexpr(amtT),
                     mkU32(32)),
               mkU8(31))));
    DIS(buf, "r%u, LSL r%u", rM, rS);
}


static void compute_result_and_C_after_LSR_by_imm5 (
               /*OUT*/HChar* buf,
               IRTemp* res,
               IRTemp* newC,
               IRTemp rMt, UInt shift_amt, /* operands */
               UInt rM      /* only for debug printing */
            )
{
   if (shift_amt == 0) {
      // conceptually a 32-bit shift, however:
      // res  = 0
      // newC = Rm[31]
      if (newC) {
         assign( *newC,
                 binop(Iop_And32,
                       binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
                       mkU32(1)));
      }
      assign( *res, mkU32(0) );
      DIS(buf, "r%u, LSR #0(a.k.a. 32)", rM);
   } else {
      // shift in range 1..31
      // res  = Rm >>u shift_amt
      // newC = Rm[shift_amt - 1]
      vassert(shift_amt >= 1 && shift_amt <= 31);
      if (newC) {
         assign( *newC,
                 binop(Iop_And32,
                       binop(Iop_Shr32, mkexpr(rMt),
                                        mkU8(shift_amt - 1)),
                       mkU32(1)));
      }
      assign( *res,
              binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)) );
      DIS(buf, "r%u, LSR #%u", rM, shift_amt);
   }
}


static void compute_result_and_C_after_LSR_by_reg (
               /*OUT*/HChar* buf,
               IRTemp* res,
               IRTemp* newC,
               IRTemp rMt, IRTemp rSt,  /* operands */
               UInt rM,    UInt rS      /* only for debug printing */
            )
{
   // shift right in range 0 .. 255
   // amt = rS & 255
   // res  = amt < 32 ?  Rm >>u amt  : 0
   // newC = amt == 0     ? oldC  :
   //        amt in 1..32 ?  Rm[amt-1]  : 0
   IRTemp amtT = newTemp(Ity_I32);
   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   if (newC) {
      /* mux0X(amt == 0,
               mux0X(amt < 32,
                     0,
                     Rm[(amt-1) & 31])
               oldC)
      */
      IRTemp oldC = newTemp(Ity_I32);
      assign(oldC, mk_armg_calculate_flag_c() );
      assign(
         *newC,
         IRExpr_Mux0X(
            unop(Iop_1Uto8,
                 binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0))),
            IRExpr_Mux0X(
               unop(Iop_1Uto8,
                    binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32))),
               mkU32(0),
               binop(Iop_Shr32,
                     mkexpr(rMt),
                     unop(Iop_32to8,
                          binop(Iop_And32,
                                binop(Iop_Sub32,
                                      mkexpr(amtT),
                                      mkU32(1)),
                                mkU32(31)
                          )
                     )
               )
            ),
            mkexpr(oldC)
         )
      );
   }
   // (Rm >>u (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
   // Lhs of the & limits the shift to 31 bits, so as to
   // give known IR semantics.  Rhs of the & is all 1s for
   // Rs <= 31 and all 0s for Rs >= 32.
   assign(
      *res,
      binop(
         Iop_And32,
         binop(Iop_Shr32,
               mkexpr(rMt),
               unop(Iop_32to8,
                    binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
         binop(Iop_Sar32,
               binop(Iop_Sub32,
                     mkexpr(amtT),
                     mkU32(32)),
               mkU8(31))));
    DIS(buf, "r%u, LSR r%u", rM, rS);
}


static void compute_result_and_C_after_ASR_by_imm5 (
               /*OUT*/HChar* buf,
               IRTemp* res,
               IRTemp* newC,
               IRTemp rMt, UInt shift_amt, /* operands */
               UInt rM      /* only for debug printing */
            )
{
   if (shift_amt == 0) {
      // conceptually a 32-bit shift, however:
      // res  = Rm >>s 31
      // newC = Rm[31]
      if (newC) {
         assign( *newC,
                 binop(Iop_And32,
                       binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
                       mkU32(1)));
      }
      assign( *res, binop(Iop_Sar32, mkexpr(rMt), mkU8(31)) );
      DIS(buf, "r%u, ASR #0(a.k.a. 32)", rM);
   } else {
      // shift in range 1..31
      // res = Rm >>s shift_amt
      // newC = Rm[shift_amt - 1]
      vassert(shift_amt >= 1 && shift_amt <= 31);
      if (newC) {
         assign( *newC,
                 binop(Iop_And32,
                       binop(Iop_Shr32, mkexpr(rMt),
                                        mkU8(shift_amt - 1)),
                       mkU32(1)));
      }
      assign( *res,
              binop(Iop_Sar32, mkexpr(rMt), mkU8(shift_amt)) );
      DIS(buf, "r%u, ASR #%u", rM, shift_amt);
   }
}


static void compute_result_and_C_after_ASR_by_reg (
               /*OUT*/HChar* buf,
               IRTemp* res,
               IRTemp* newC,
               IRTemp rMt, IRTemp rSt,  /* operands */
               UInt rM,    UInt rS      /* only for debug printing */
            )
{
   // arithmetic shift right in range 0 .. 255
   // amt = rS & 255
   // res  = amt < 32 ?  Rm >>s amt  : Rm >>s 31
   // newC = amt == 0     ? oldC  :
   //        amt in 1..32 ?  Rm[amt-1]  : Rm[31]
   IRTemp amtT = newTemp(Ity_I32);
   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   if (newC) {
      /* mux0X(amt == 0,
               mux0X(amt < 32,
                     Rm[31],
                     Rm[(amt-1) & 31])
               oldC)
      */
      IRTemp oldC = newTemp(Ity_I32);
      assign(oldC, mk_armg_calculate_flag_c() );
      assign(
         *newC,
         IRExpr_Mux0X(
            unop(Iop_1Uto8,
                 binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0))),
            IRExpr_Mux0X(
               unop(Iop_1Uto8,
                    binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32))),
               binop(Iop_Shr32,
                     mkexpr(rMt),
                     mkU8(31)
               ),
               binop(Iop_Shr32,
                     mkexpr(rMt),
                     unop(Iop_32to8,
                          binop(Iop_And32,
                                binop(Iop_Sub32,
                                      mkexpr(amtT),
                                      mkU32(1)),
                                mkU32(31)
                          )
                     )
               )
            ),
            mkexpr(oldC)
         )
      );
   }
   // (Rm >>s (amt <u 32 ? amt : 31))
   assign(
      *res,
      binop(
         Iop_Sar32,
         mkexpr(rMt),
         unop(
            Iop_32to8,
            IRExpr_Mux0X(
               unop(
                 Iop_1Uto8,
                 binop(Iop_CmpLT32U, mkexpr(amtT), mkU32(32))),
               mkU32(31),
               mkexpr(amtT)))));
    DIS(buf, "r%u, ASR r%u", rM, rS);
}


static void compute_result_and_C_after_ROR_by_reg (
               /*OUT*/HChar* buf,
               IRTemp* res,
               IRTemp* newC,
               IRTemp rMt, IRTemp rSt,  /* operands */
               UInt rM,    UInt rS      /* only for debug printing */
            )
{
   // rotate right in range 0 .. 255
   // amt = rS & 255
   // shop =  Rm `ror` (amt & 31)
   // shco =  amt == 0 ? oldC : Rm[(amt-1) & 31]
   IRTemp amtT = newTemp(Ity_I32);
   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
   IRTemp amt5T = newTemp(Ity_I32);
   assign( amt5T, binop(Iop_And32, mkexpr(rSt), mkU32(31)) );
   IRTemp oldC = newTemp(Ity_I32);
   assign(oldC, mk_armg_calculate_flag_c() );
   if (newC) {
      assign(
         *newC,
         IRExpr_Mux0X(
            unop(Iop_32to8, mkexpr(amtT)),
            mkexpr(oldC),
            binop(Iop_And32,
                  binop(Iop_Shr32,
                        mkexpr(rMt),
                        unop(Iop_32to8,
                             binop(Iop_And32,
                                   binop(Iop_Sub32,
                                         mkexpr(amtT),
                                         mkU32(1)
                                   ),
                                   mkU32(31)
                             )
                        )
                  ),
                  mkU32(1)
            )
         )
      );
   }
   assign(
      *res,
      IRExpr_Mux0X(
         unop(Iop_32to8, mkexpr(amt5T)), mkexpr(rMt),
         binop(Iop_Or32,
               binop(Iop_Shr32,
                     mkexpr(rMt),
                     unop(Iop_32to8, mkexpr(amt5T))
               ),
               binop(Iop_Shl32,
                     mkexpr(rMt),
                     unop(Iop_32to8,
                          binop(Iop_Sub32, mkU32(32), mkexpr(amt5T))
                     )
               )
         )
      )
   );
   DIS(buf, "r%u, ROR r#%u", rM, rS);
}


/* Generate an expression corresponding to the immediate-shift case of
   a shifter operand.  This is used both for ARM and Thumb2.

   Bind it to a temporary, and return that via *res.  If newC is
   non-NULL, also compute a value for the shifter's carry out (in the
   LSB of a word), bind it to a temporary, and return that via *shco.

   Generates GETs from the guest state and is therefore not safe to
   use once we start doing PUTs to it, for any given instruction.

   'how' is encoded thusly:
      00b LSL,  01b LSR,  10b ASR,  11b ROR
   Most but not all ARM and Thumb integer insns use this encoding.
   Be careful to ensure the right value is passed here.
*/
static void compute_result_and_C_after_shift_by_imm5 (
               /*OUT*/HChar* buf,
               /*OUT*/IRTemp* res,
               /*OUT*/IRTemp* newC,
               IRTemp  rMt,       /* reg to shift */
               UInt    how,       /* what kind of shift */
               UInt    shift_amt, /* shift amount (0..31) */
               UInt    rM         /* only for debug printing */
            )
{
   vassert(shift_amt < 32);
   vassert(how < 4);

   switch (how) {

      case 0:
         compute_result_and_C_after_LSL_by_imm5(
            buf, res, newC, rMt, shift_amt, rM
         );
         break;

      case 1:
         compute_result_and_C_after_LSR_by_imm5(
            buf, res, newC, rMt, shift_amt, rM
         );
         break;

      case 2:
         compute_result_and_C_after_ASR_by_imm5(
            buf, res, newC, rMt, shift_amt, rM
         );
         break;

      case 3:
         if (shift_amt == 0) {
            IRTemp oldcT = newTemp(Ity_I32);
            // rotate right 1 bit through carry (?)
            // RRX -- described at ARM ARM A5-17
            // res  = (oldC << 31) | (Rm >>u 1)
            // newC = Rm[0]
            if (newC) {
               assign( *newC,
                       binop(Iop_And32, mkexpr(rMt), mkU32(1)));
            }
            assign( oldcT, mk_armg_calculate_flag_c() );
            assign( *res,
                    binop(Iop_Or32,
                          binop(Iop_Shl32, mkexpr(oldcT), mkU8(31)),
                          binop(Iop_Shr32, mkexpr(rMt), mkU8(1))) );
            DIS(buf, "r%u, RRX", rM);
         } else {
            // rotate right in range 1..31
            // res  = Rm `ror` shift_amt
            // newC = Rm[shift_amt - 1]
            vassert(shift_amt >= 1 && shift_amt <= 31);
            if (newC) {
               assign( *newC,
                       binop(Iop_And32,
                             binop(Iop_Shr32, mkexpr(rMt),
                                              mkU8(shift_amt - 1)),
                             mkU32(1)));
            }
            assign( *res,
                    binop(Iop_Or32,
                          binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)),
                          binop(Iop_Shl32, mkexpr(rMt),
                                           mkU8(32-shift_amt))));
            DIS(buf, "r%u, ROR #%u", rM, shift_amt);
         }
         break;

      default:
         /*NOTREACHED*/
         vassert(0);
   }
}


/* Generate an expression corresponding to the register-shift case of
   a shifter operand.  This is used both for ARM and Thumb2.

   Bind it to a temporary, and return that via *res.  If newC is
   non-NULL, also compute a value for the shifter's carry out (in the
   LSB of a word), bind it to a temporary, and return that via *shco.

   Generates GETs from the guest state and is therefore not safe to
   use once we start doing PUTs to it, for any given instruction.

   'how' is encoded thusly:
      00b LSL,  01b LSR,  10b ASR,  11b ROR
   Most but not all ARM and Thumb integer insns use this encoding.
   Be careful to ensure the right value is passed here.
*/
static void compute_result_and_C_after_shift_by_reg (
               /*OUT*/HChar*  buf,
               /*OUT*/IRTemp* res,
               /*OUT*/IRTemp* newC,
               IRTemp  rMt,       /* reg to shift */
               UInt    how,       /* what kind of shift */
               IRTemp  rSt,       /* shift amount */
               UInt    rM,        /* only for debug printing */
               UInt    rS         /* only for debug printing */
            )
{
   vassert(how < 4);
   switch (how) {
      case 0: { /* LSL */
         compute_result_and_C_after_LSL_by_reg(
            buf, res, newC, rMt, rSt, rM, rS
         );
         break;
      }
      case 1: { /* LSR */
         compute_result_and_C_after_LSR_by_reg(
            buf, res, newC, rMt, rSt, rM, rS
         );
         break;
      }
      case 2: { /* ASR */
         compute_result_and_C_after_ASR_by_reg(
            buf, res, newC, rMt, rSt, rM, rS
         );
         break;
      }
      case 3: { /* ROR */
         compute_result_and_C_after_ROR_by_reg(
             buf, res, newC, rMt, rSt, rM, rS
         );
         break;
      }
      default:
         /*NOTREACHED*/
         vassert(0);
   }
}


/* Generate an expression corresponding to a shifter_operand, bind it
   to a temporary, and return that via *shop.  If shco is non-NULL,
   also compute a value for the shifter's carry out (in the LSB of a
   word), bind it to a temporary, and return that via *shco.

   If for some reason we can't come up with a shifter operand (missing
   case?  not really a shifter operand?) return False.

   Generates GETs from the guest state and is therefore not safe to
   use once we start doing PUTs to it, for any given instruction.

   For ARM insns only; not for Thumb.
*/
static Bool mk_shifter_operand ( UInt insn_25, UInt insn_11_0,
                                 /*OUT*/IRTemp* shop,
                                 /*OUT*/IRTemp* shco,
                                 /*OUT*/HChar* buf )
{
   UInt insn_4 = (insn_11_0 >> 4) & 1;
   UInt insn_7 = (insn_11_0 >> 7) & 1;
   vassert(insn_25 <= 0x1);
   vassert(insn_11_0 <= 0xFFF);

   vassert(shop && *shop == IRTemp_INVALID);
   *shop = newTemp(Ity_I32);

   if (shco) {
      vassert(*shco == IRTemp_INVALID);
      *shco = newTemp(Ity_I32);
   }

   /* 32-bit immediate */

   if (insn_25 == 1) {
      /* immediate: (7:0) rotated right by 2 * (11:8) */
      UInt imm = (insn_11_0 >> 0) & 0xFF;
      UInt rot = 2 * ((insn_11_0 >> 8) & 0xF);
      vassert(rot <= 30);
      imm = ROR32(imm, rot);
      if (shco) {
         if (rot == 0) {
            assign( *shco, mk_armg_calculate_flag_c() );
         } else {
            assign( *shco, mkU32( (imm >> 31) & 1 ) );
         }
      }
      DIS(buf, "#0x%x", imm);
      assign( *shop, mkU32(imm) );
      return True;
   }

   /* Shift/rotate by immediate */

   if (insn_25 == 0 && insn_4 == 0) {
      /* Rm (3:0) shifted (6:5) by immediate (11:7) */
      UInt shift_amt = (insn_11_0 >> 7) & 0x1F;
      UInt rM        = (insn_11_0 >> 0) & 0xF;
      UInt how       = (insn_11_0 >> 5) & 3;
      /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
      IRTemp rMt = newTemp(Ity_I32);
      assign(rMt, getIRegA(rM));

      vassert(shift_amt <= 31);

      compute_result_and_C_after_shift_by_imm5(
         buf, shop, shco, rMt, how, shift_amt, rM
      );
      return True;
   }

   /* Shift/rotate by register */
   if (insn_25 == 0 && insn_4 == 1) {
      /* Rm (3:0) shifted (6:5) by Rs (11:8) */
      UInt rM  = (insn_11_0 >> 0) & 0xF;
      UInt rS  = (insn_11_0 >> 8) & 0xF;
      UInt how = (insn_11_0 >> 5) & 3;
      /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
      IRTemp rMt = newTemp(Ity_I32);
      IRTemp rSt = newTemp(Ity_I32);

      if (insn_7 == 1)
         return False; /* not really a shifter operand */

      assign(rMt, getIRegA(rM));
      assign(rSt, getIRegA(rS));

      compute_result_and_C_after_shift_by_reg(
         buf, shop, shco, rMt, how, rSt, rM, rS
      );
      return True;
   }

   vex_printf("mk_shifter_operand(0x%x,0x%x)\n", insn_25, insn_11_0 );
   return False;
}


/* ARM only */
static
IRExpr* mk_EA_reg_plusminus_imm12 ( UInt rN, UInt bU, UInt imm12,
                                    /*OUT*/HChar* buf )
{
   vassert(rN < 16);
   vassert(bU < 2);
   vassert(imm12 < 0x1000);
   UChar opChar = bU == 1 ? '+' : '-';
   DIS(buf, "[r%u, #%c%u]", rN, opChar, imm12);
   return
      binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
             getIRegA(rN),
             mkU32(imm12) );
}


/* ARM only.
   NB: This is "DecodeImmShift" in newer versions of the the ARM ARM.
*/
static
IRExpr* mk_EA_reg_plusminus_shifted_reg ( UInt rN, UInt bU, UInt rM,
                                          UInt sh2, UInt imm5,
                                          /*OUT*/HChar* buf )
{
   vassert(rN < 16);
   vassert(bU < 2);
   vassert(rM < 16);
   vassert(sh2 < 4);
   vassert(imm5 < 32);
   UChar   opChar = bU == 1 ? '+' : '-';
   IRExpr* index  = NULL;
   switch (sh2) {
      case 0: /* LSL */
         /* imm5 can be in the range 0 .. 31 inclusive. */
         index = binop(Iop_Shl32, getIRegA(rM), mkU8(imm5));
         DIS(buf, "[r%u, %c r%u LSL #%u]", rN, opChar, rM, imm5);
         break;
      case 1: /* LSR */
         if (imm5 == 0) {
            index = mkU32(0);
            vassert(0); // ATC
         } else {
            index = binop(Iop_Shr32, getIRegA(rM), mkU8(imm5));
         }
         DIS(buf, "[r%u, %cr%u, LSR #%u]",
                  rN, opChar, rM, imm5 == 0 ? 32 : imm5);
         break;
      case 2: /* ASR */
         /* Doesn't this just mean that the behaviour with imm5 == 0
            is the same as if it had been 31 ? */
         if (imm5 == 0) {
            index = binop(Iop_Sar32, getIRegA(rM), mkU8(31));
            vassert(0); // ATC
         } else {
            index = binop(Iop_Sar32, getIRegA(rM), mkU8(imm5));
         }
         DIS(buf, "[r%u, %cr%u, ASR #%u]",
                  rN, opChar, rM, imm5 == 0 ? 32 : imm5);
         break;
      case 3: /* ROR or RRX */
         if (imm5 == 0) {
            IRTemp rmT    = newTemp(Ity_I32);
            IRTemp cflagT = newTemp(Ity_I32);
            assign(rmT, getIRegA(rM));
            assign(cflagT, mk_armg_calculate_flag_c());
            index = binop(Iop_Or32,
                          binop(Iop_Shl32, mkexpr(cflagT), mkU8(31)),
                          binop(Iop_Shr32, mkexpr(rmT), mkU8(1)));
            DIS(buf, "[r%u, %cr%u, RRX]", rN, opChar, rM);
         } else {
            IRTemp rmT = newTemp(Ity_I32);
            assign(rmT, getIRegA(rM));
            vassert(imm5 >= 1 && imm5 <= 31);
            index = binop(Iop_Or32,
                          binop(Iop_Shl32, mkexpr(rmT), mkU8(32-imm5)),
                          binop(Iop_Shr32, mkexpr(rmT), mkU8(imm5)));
            DIS(buf, "[r%u, %cr%u, ROR #%u]", rN, opChar, rM, imm5);
         }
         break;
      default:
         vassert(0);
   }
   vassert(index);
   return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
                getIRegA(rN), index);
}


/* ARM only */
static
IRExpr* mk_EA_reg_plusminus_imm8 ( UInt rN, UInt bU, UInt imm8,
                                   /*OUT*/HChar* buf )
{
   vassert(rN < 16);
   vassert(bU < 2);
   vassert(imm8 < 0x100);
   UChar opChar = bU == 1 ? '+' : '-';
   DIS(buf, "[r%u, #%c%u]", rN, opChar, imm8);
   return
      binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
             getIRegA(rN),
             mkU32(imm8) );
}


/* ARM only */
static
IRExpr* mk_EA_reg_plusminus_reg ( UInt rN, UInt bU, UInt rM,
                                  /*OUT*/HChar* buf )
{
   vassert(rN < 16);
   vassert(bU < 2);
   vassert(rM < 16);
   UChar   opChar = bU == 1 ? '+' : '-';
   IRExpr* index  = getIRegA(rM);
   DIS(buf, "[r%u, %c r%u]", rN, opChar, rM);
   return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
                getIRegA(rN), index);
}


/* irRes :: Ity_I32 holds a floating point comparison result encoded
   as an IRCmpF64Result.  Generate code to convert it to an
   ARM-encoded (N,Z,C,V) group in the lowest 4 bits of an I32 value.
   Assign a new temp to hold that value, and return the temp. */
static
IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes )
{
   IRTemp ix       = newTemp(Ity_I32);
   IRTemp termL    = newTemp(Ity_I32);
   IRTemp termR    = newTemp(Ity_I32);
   IRTemp nzcv     = newTemp(Ity_I32);

   /* This is where the fun starts.  We have to convert 'irRes' from
      an IR-convention return result (IRCmpF64Result) to an
      ARM-encoded (N,Z,C,V) group.  The final result is in the bottom
      4 bits of 'nzcv'. */
   /* Map compare result from IR to ARM(nzcv) */
   /*
      FP cmp result | IR   | ARM(nzcv)
      --------------------------------
      UN              0x45   0011
      LT              0x01   1000
      GT              0x00   0010
      EQ              0x40   0110
   */
   /* Now since you're probably wondering WTF ..

      ix fishes the useful bits out of the IR value, bits 6 and 0, and
      places them side by side, giving a number which is 0, 1, 2 or 3.

      termL is a sequence cooked up by GNU superopt.  It converts ix
         into an almost correct value NZCV value (incredibly), except
         for the case of UN, where it produces 0100 instead of the
         required 0011.

      termR is therefore a correction term, also computed from ix.  It
         is 1 in the UN case and 0 for LT, GT and UN.  Hence, to get
         the final correct value, we subtract termR from termL.

      Don't take my word for it.  There's a test program at the bottom
      of this file, to try this out with.
   */
   assign(
      ix,
      binop(Iop_Or32,
            binop(Iop_And32,
                  binop(Iop_Shr32, mkexpr(irRes), mkU8(5)),
                  mkU32(3)),
            binop(Iop_And32, mkexpr(irRes), mkU32(1))));

   assign(
      termL,
      binop(Iop_Add32,
            binop(Iop_Shr32,
                  binop(Iop_Sub32,
                        binop(Iop_Shl32,
                              binop(Iop_Xor32, mkexpr(ix), mkU32(1)),
                              mkU8(30)),
                        mkU32(1)),
                  mkU8(29)),
            mkU32(1)));

   assign(
      termR,
      binop(Iop_And32,
            binop(Iop_And32,
                  mkexpr(ix),
                  binop(Iop_Shr32, mkexpr(ix), mkU8(1))),
            mkU32(1)));

   assign(nzcv, binop(Iop_Sub32, mkexpr(termL), mkexpr(termR)));
   return nzcv;
}


/* Thumb32 only.  This is "ThumbExpandImm" in the ARM ARM.  If
   updatesC is non-NULL, a boolean is written to it indicating whether
   or not the C flag is updated, as per ARM ARM "ThumbExpandImm_C".
*/
static UInt thumbExpandImm ( Bool* updatesC,
                             UInt imm1, UInt imm3, UInt imm8 )
{
   vassert(imm1 < (1<<1));
   vassert(imm3 < (1<<3));
   vassert(imm8 < (1<<8));
   UInt i_imm3_a = (imm1 << 4) | (imm3 << 1) | ((imm8 >> 7) & 1);
   UInt abcdefgh = imm8;
   UInt lbcdefgh = imm8 | 0x80;
   if (updatesC) {
      *updatesC = i_imm3_a >= 8;
   }
   switch (i_imm3_a) {
      case 0: case 1:
         return abcdefgh;
      case 2: case 3:
         return (abcdefgh << 16) | abcdefgh;
      case 4: case 5:
         return (abcdefgh << 24) | (abcdefgh << 8);
      case 6: case 7:
         return (abcdefgh << 24) | (abcdefgh << 16)
                | (abcdefgh << 8) | abcdefgh;
      case 8 ... 31:
         return lbcdefgh << (32 - i_imm3_a);
      default:
         break;
   }
   /*NOTREACHED*/vassert(0);
}


/* Version of thumbExpandImm where we simply feed it the
   instruction halfwords (the lowest addressed one is I0). */
static UInt thumbExpandImm_from_I0_I1 ( Bool* updatesC,
                                        UShort i0s, UShort i1s )
{
   UInt i0    = (UInt)i0s;
   UInt i1    = (UInt)i1s;
   UInt imm1  = SLICE_UInt(i0,10,10);
   UInt imm3  = SLICE_UInt(i1,14,12);
   UInt imm8  = SLICE_UInt(i1,7,0);
   return thumbExpandImm(updatesC, imm1, imm3, imm8);
}


/* Thumb16 only.  Given the firstcond and mask fields from an IT
   instruction, compute the 32-bit ITSTATE value implied, as described
   in libvex_guest_arm.h.  This is not the ARM ARM representation.
   Also produce the t/e chars for the 2nd, 3rd, 4th insns, for
   disassembly printing.  Returns False if firstcond or mask
   denote something invalid.

   The number and conditions for the instructions to be
   conditionalised depend on firstcond and mask:

   mask      cond 1    cond 2      cond 3      cond 4

   1000      fc[3:0]
   x100      fc[3:0]   fc[3:1]:x
   xy10      fc[3:0]   fc[3:1]:x   fc[3:1]:y
   xyz1      fc[3:0]   fc[3:1]:x   fc[3:1]:y   fc[3:1]:z

   The condition fields are assembled in *itstate backwards (cond 4 at
   the top, cond 1 at the bottom).  Conditions are << 4'd and then
   ^0xE'd, and those fields that correspond to instructions in the IT
   block are tagged with a 1 bit.
*/
static Bool compute_ITSTATE ( /*OUT*/UInt*  itstate,
                              /*OUT*/UChar* ch1,
                              /*OUT*/UChar* ch2,
                              /*OUT*/UChar* ch3,
                              UInt firstcond, UInt mask )
{
   vassert(firstcond <= 0xF);
   vassert(mask <= 0xF);
   *itstate = 0;
   *ch1 = *ch2 = *ch3 = '.';
   if (mask == 0)
      return False; /* the logic below actually ensures this anyway,
                       but clearer to make it explicit. */
   if (firstcond == 0xF)
      return False; /* NV is not allowed */
   if (firstcond == 0xE && popcount32(mask) != 1)
      return False; /* if firstcond is AL then all the rest must be too */

   UInt m3 = (mask >> 3) & 1;
   UInt m2 = (mask >> 2) & 1;
   UInt m1 = (mask >> 1) & 1;
   UInt m0 = (mask >> 0) & 1;

   UInt fc = (firstcond << 4) | 1/*in-IT-block*/;
   UInt ni = (0xE/*AL*/ << 4) | 0/*not-in-IT-block*/;

   if (m3 == 1 && (m2|m1|m0) == 0) {
      *itstate = (ni << 24) | (ni << 16) | (ni << 8) | fc;
      *itstate ^= 0xE0E0E0E0;
      return True;
   }

   if (m2 == 1 && (m1|m0) == 0) {
      *itstate = (ni << 24) | (ni << 16) | (setbit32(fc, 4, m3) << 8) | fc;
      *itstate ^= 0xE0E0E0E0;
      *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
      return True;
   }

   if (m1 == 1 && m0 == 0) {
      *itstate = (ni << 24)
                 | (setbit32(fc, 4, m2) << 16)
                 | (setbit32(fc, 4, m3) << 8) | fc;
      *itstate ^= 0xE0E0E0E0;
      *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
      *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
      return True;
   }

   if (m0 == 1) {
      *itstate = (setbit32(fc, 4, m1) << 24)
                 | (setbit32(fc, 4, m2) << 16)
                 | (setbit32(fc, 4, m3) << 8) | fc;
      *itstate ^= 0xE0E0E0E0;
      *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
      *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
      *ch3 = m1 == (firstcond & 1) ? 't' : 'e';
      return True;
   }

   return False;
}


/* Generate IR to do 32-bit bit reversal, a la Hacker's Delight
   Chapter 7 Section 1. */
static IRTemp gen_BITREV ( IRTemp x0 )
{
   IRTemp x1 = newTemp(Ity_I32);
   IRTemp x2 = newTemp(Ity_I32);
   IRTemp x3 = newTemp(Ity_I32);
   IRTemp x4 = newTemp(Ity_I32);
   IRTemp x5 = newTemp(Ity_I32);
   UInt   c1 = 0x55555555;
   UInt   c2 = 0x33333333;
   UInt   c3 = 0x0F0F0F0F;
   UInt   c4 = 0x00FF00FF;
   UInt   c5 = 0x0000FFFF;
   assign(x1,
          binop(Iop_Or32,
                binop(Iop_Shl32,
                      binop(Iop_And32, mkexpr(x0), mkU32(c1)),
                      mkU8(1)),
                binop(Iop_Shr32,
                      binop(Iop_And32, mkexpr(x0), mkU32(~c1)),
                      mkU8(1))
   ));
   assign(x2,
          binop(Iop_Or32,
                binop(Iop_Shl32,
                      binop(Iop_And32, mkexpr(x1), mkU32(c2)),
                      mkU8(2)),
                binop(Iop_Shr32,
                      binop(Iop_And32, mkexpr(x1), mkU32(~c2)),
                      mkU8(2))
   ));
   assign(x3,
          binop(Iop_Or32,
                binop(Iop_Shl32,
                      binop(Iop_And32, mkexpr(x2), mkU32(c3)),
                      mkU8(4)),
                binop(Iop_Shr32,
                      binop(Iop_And32, mkexpr(x2), mkU32(~c3)),
                      mkU8(4))
   ));
   assign(x4,
          binop(Iop_Or32,
                binop(Iop_Shl32,
                      binop(Iop_And32, mkexpr(x3), mkU32(c4)),
                      mkU8(8)),
                binop(Iop_Shr32,
                      binop(Iop_And32, mkexpr(x3), mkU32(~c4)),
                      mkU8(8))
   ));
   assign(x5,
          binop(Iop_Or32,
                binop(Iop_Shl32,
                      binop(Iop_And32, mkexpr(x4), mkU32(c5)),
                      mkU8(16)),
                binop(Iop_Shr32,
                      binop(Iop_And32, mkexpr(x4), mkU32(~c5)),
                      mkU8(16))
   ));
   return x5;
}


/* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
   0:1:2:3 (aka byte-swap). */
static IRTemp gen_REV ( IRTemp arg )
{
   IRTemp res = newTemp(Ity_I32);
   assign(res,
          binop(Iop_Or32,
                binop(Iop_Shl32, mkexpr(arg), mkU8(24)),
          binop(Iop_Or32,
                binop(Iop_And32, binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
                                 mkU32(0x00FF0000)),
          binop(Iop_Or32,
                binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
                                       mkU32(0x0000FF00)),
                binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(24)),
                                       mkU32(0x000000FF) )
   ))));
   return res;
}


/* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
   2:3:0:1 (swap within lo and hi halves). */
static IRTemp gen_REV16 ( IRTemp arg )
{
   IRTemp res = newTemp(Ity_I32);
   assign(res,
          binop(Iop_Or32,
                binop(Iop_And32,
                      binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
                      mkU32(0xFF00FF00)),
                binop(Iop_And32,
                      binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
                      mkU32(0x00FF00FF))));
   return res;
}


/*------------------------------------------------------------*/
/*--- Advanced SIMD (NEON) instructions                    ---*/
/*------------------------------------------------------------*/

/*------------------------------------------------------------*/
/*--- NEON data processing                                 ---*/
/*------------------------------------------------------------*/

/* For all NEON DP ops, we use the normal scheme to handle conditional
   writes to registers -- pass in condT and hand that on to the
   put*Reg functions.  In ARM mode condT is always IRTemp_INVALID
   since NEON is unconditional for ARM.  In Thumb mode condT is
   derived from the ITSTATE shift register in the normal way. */

static
UInt get_neon_d_regno(UInt theInstr)
{
   UInt x = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
   if (theInstr & 0x40) {
      if (x & 1) {
         x = x + 0x100;
      } else {
         x = x >> 1;
      }
   }
   return x;
}

static
UInt get_neon_n_regno(UInt theInstr)
{
   UInt x = ((theInstr >> 3) & 0x10) | ((theInstr >> 16) & 0xF);
   if (theInstr & 0x40) {
      if (x & 1) {
         x = x + 0x100;
      } else {
         x = x >> 1;
      }
   }
   return x;
}

static
UInt get_neon_m_regno(UInt theInstr)
{
   UInt x = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
   if (theInstr & 0x40) {
      if (x & 1) {
         x = x + 0x100;
      } else {
         x = x >> 1;
      }
   }
   return x;
}

static
Bool dis_neon_vext ( UInt theInstr, IRTemp condT )
{
   UInt dreg = get_neon_d_regno(theInstr);
   UInt mreg = get_neon_m_regno(theInstr);
   UInt nreg = get_neon_n_regno(theInstr);
   UInt imm4 = (theInstr >> 8) & 0xf;
   UInt Q = (theInstr >> 6) & 1;
   HChar reg_t = Q ? 'q' : 'd';

   if (Q) {
      putQReg(dreg, triop(Iop_ExtractV128, getQReg(nreg),
               getQReg(mreg), mkU8(imm4)), condT);
   } else {
      putDRegI64(dreg, triop(Iop_Extract64, getDRegI64(nreg),
                 getDRegI64(mreg), mkU8(imm4)), condT);
   }
   DIP("vext.8 %c%d, %c%d, %c%d, #%d\n", reg_t, dreg, reg_t, nreg,
                                         reg_t, mreg, imm4);
   return True;
}

/* VTBL, VTBX */
static
Bool dis_neon_vtb ( UInt theInstr, IRTemp condT )
{
   UInt op = (theInstr >> 6) & 1;
   UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
   UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
   UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
   UInt len = (theInstr >> 8) & 3;
   Int i;
   IROp cmp;
   ULong imm;
   IRTemp arg_l;
   IRTemp old_mask, new_mask, cur_mask;
   IRTemp old_res, new_res;
   IRTemp old_arg, new_arg;

   if (dreg >= 0x100 || mreg >= 0x100 || nreg >= 0x100)
      return False;
   if (nreg + len > 31)
      return False;

   cmp = Iop_CmpGT8Ux8;

   old_mask = newTemp(Ity_I64);
   old_res = newTemp(Ity_I64);
   old_arg = newTemp(Ity_I64);
   assign(old_mask, mkU64(0));
   assign(old_res, mkU64(0));
   assign(old_arg, getDRegI64(mreg));
   imm = 8;
   imm = (imm <<  8) | imm;
   imm = (imm << 16) | imm;
   imm = (imm << 32) | imm;

   for (i = 0; i <= len; i++) {
      arg_l = newTemp(Ity_I64);
      new_mask = newTemp(Ity_I64);
      cur_mask = newTemp(Ity_I64);
      new_res = newTemp(Ity_I64);
      new_arg = newTemp(Ity_I64);
      assign(arg_l, getDRegI64(nreg+i));
      assign(new_arg, binop(Iop_Sub8x8, mkexpr(old_arg), mkU64(imm)));
      assign(cur_mask, binop(cmp, mkU64(imm), mkexpr(old_arg)));
      assign(new_mask, binop(Iop_Or64, mkexpr(old_mask), mkexpr(cur_mask)));
      assign(new_res, binop(Iop_Or64,
                            mkexpr(old_res),
                            binop(Iop_And64,
                                  binop(Iop_Perm8x8,
                                        mkexpr(arg_l),
                                        binop(Iop_And64,
                                              mkexpr(old_arg),
                                              mkexpr(cur_mask))),
                                  mkexpr(cur_mask))));

      old_arg = new_arg;
      old_mask = new_mask;
      old_res = new_res;
   }
   if (op) {
      new_res = newTemp(Ity_I64);
      assign(new_res, binop(Iop_Or64,
                            binop(Iop_And64,
                                  getDRegI64(dreg),
                                  unop(Iop_Not64, mkexpr(old_mask))),
                            mkexpr(old_res)));
      old_res = new_res;
   }

   putDRegI64(dreg, mkexpr(old_res), condT);
   DIP("vtb%c.8 d%u, {", op ? 'x' : 'l', dreg);
   if (len > 0) {
      DIP("d%u-d%u", nreg, nreg + len);
   } else {
      DIP("d%u", nreg);
   }
   DIP("}, d%u\n", mreg);
   return True;
}

/* VDUP (scalar)  */
static
Bool dis_neon_vdup ( UInt theInstr, IRTemp condT )
{
   UInt Q = (theInstr >> 6) & 1;
   UInt dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
   UInt mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
   UInt imm4 = (theInstr >> 16) & 0xF;
   UInt index;
   UInt size;
   IRTemp arg_m;
   IRTemp res;
   IROp op, op2;

   if ((imm4 == 0) || (imm4 == 8))
      return False;
   if ((Q == 1) && ((dreg & 1) == 1))
      return False;
   if (Q)
      dreg >>= 1;
   arg_m = newTemp(Ity_I64);
   assign(arg_m, getDRegI64(mreg));
   if (Q)
      res = newTemp(Ity_V128);
   else
      res = newTemp(Ity_I64);
   if ((imm4 & 1) == 1) {
      op = Q ? Iop_Dup8x16 : Iop_Dup8x8;
      op2 = Iop_GetElem8x8;
      index = imm4 >> 1;
      size = 8;
   } else if ((imm4 & 3) == 2) {
      op = Q ? Iop_Dup16x8 : Iop_Dup16x4;
      op2 = Iop_GetElem16x4;
      index = imm4 >> 2;
      size = 16;
   } else if ((imm4 & 7) == 4) {
      op = Q ? Iop_Dup32x4 : Iop_Dup32x2;
      op2 = Iop_GetElem32x2;
      index = imm4 >> 3;
      size = 32;
   } else {
      return False; // can this ever happen?
   }
   assign(res, unop(op, binop(op2, mkexpr(arg_m), mkU8(index))));
   if (Q) {
      putQReg(dreg, mkexpr(res), condT);
   } else {
      putDRegI64(dreg, mkexpr(res), condT);
   }
   DIP("vdup.%d %c%d, d%d[%d]\n", size, Q ? 'q' : 'd', dreg, mreg, index);
   return True;
}

/* A7.4.1 Three registers of the same length */
static
Bool dis_neon_data_3same ( UInt theInstr, IRTemp condT )
{
   UInt Q = (theInstr >> 6) & 1;
   UInt dreg = get_neon_d_regno(theInstr);
   UInt nreg = get_neon_n_regno(theInstr);
   UInt mreg = get_neon_m_regno(theInstr);
   UInt A = (theInstr >> 8) & 0xF;
   UInt B = (theInstr >> 4) & 1;
   UInt C = (theInstr >> 20) & 0x3;
   UInt U = (theInstr >> 24) & 1;
   UInt size = C;

   IRTemp arg_n;
   IRTemp arg_m;
   IRTemp res;

   if (Q) {
      arg_n = newTemp(Ity_V128);
      arg_m = newTemp(Ity_V128);
      res = newTemp(Ity_V128);
      assign(arg_n, getQReg(nreg));
      assign(arg_m, getQReg(mreg));
   } else {
      arg_n = newTemp(Ity_I64);
      arg_m = newTemp(Ity_I64);
      res = newTemp(Ity_I64);
      assign(arg_n, getDRegI64(nreg));
      assign(arg_m, getDRegI64(mreg));
   }

   switch(A) {
      case 0:
         if (B == 0) {
            /* VHADD */
            ULong imm = 0;
            IRExpr *imm_val;
            IROp addOp;
            IROp andOp;
            IROp shOp;
            char regType = Q ? 'q' : 'd';

            if (size == 3)
               return False;
            switch(size) {
               case 0: imm = 0x101010101010101LL; break;
               case 1: imm = 0x1000100010001LL; break;
               case 2: imm = 0x100000001LL; break;
               default: vassert(0);
            }
            if (Q) {
               imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
               andOp = Iop_AndV128;
            } else {
               imm_val = mkU64(imm);
               andOp = Iop_And64;
            }
            if (U) {
               switch(size) {
                  case 0:
                     addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
                     shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
                     break;
                  case 1:
                     addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
                     shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
                     break;
                  case 2:
                     addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
                     shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
                     break;
                  default:
                     vassert(0);
               }
            } else {
               switch(size) {
                  case 0:
                     addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
                     shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
                     break;
                  case 1:
                     addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
                     shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
                     break;
                  case 2:
                     addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
                     shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
                     break;
                  default:
                     vassert(0);
               }
            }
            assign(res,
                   binop(addOp,
                         binop(addOp,
                               binop(shOp, mkexpr(arg_m), mkU8(1)),
                               binop(shOp, mkexpr(arg_n), mkU8(1))),
                         binop(shOp,
                               binop(addOp,
                                     binop(andOp, mkexpr(arg_m), imm_val),
                                     binop(andOp, mkexpr(arg_n), imm_val)),
                               mkU8(1))));
            DIP("vhadd.%c%d %c%d, %c%d, %c%d\n",
                U ? 'u' : 's', 8 << size, regType,
                dreg, regType, nreg, regType, mreg);
         } else {
            /* VQADD */
            IROp op, op2;
            IRTemp tmp;
            char reg_t = Q ? 'q' : 'd';
            if (Q) {
               switch (size) {
                  case 0:
                     op = U ? Iop_QAdd8Ux16 : Iop_QAdd8Sx16;
                     op2 = Iop_Add8x16;
                     break;
                  case 1:
                     op = U ? Iop_QAdd16Ux8 : Iop_QAdd16Sx8;
                     op2 = Iop_Add16x8;
                     break;
                  case 2:
                     op = U ? Iop_QAdd32Ux4 : Iop_QAdd32Sx4;
                     op2 = Iop_Add32x4;
                     break;
                  case 3:
                     op = U ? Iop_QAdd64Ux2 : Iop_QAdd64Sx2;
                     op2 = Iop_Add64x2;
                     break;
                  default:
                     vassert(0);
               }
            } else {
               switch (size) {
                  case 0:
                     op = U ? Iop_QAdd8Ux8 : Iop_QAdd8Sx8;
                     op2 = Iop_Add8x8;
                     break;
                  case 1:
                     op = U ? Iop_QAdd16Ux4 : Iop_QAdd16Sx4;
                     op2 = Iop_Add16x4;
                     break;
                  case 2:
                     op = U ? Iop_QAdd32Ux2 : Iop_QAdd32Sx2;
                     op2 = Iop_Add32x2;
                     break;
                  case 3:
                     op = U ? Iop_QAdd64Ux1 : Iop_QAdd64Sx1;
                     op2 = Iop_Add64;
                     break;
                  default:
                     vassert(0);
               }
            }
            if (Q) {
               tmp = newTemp(Ity_V128);
            } else {
               tmp = newTemp(Ity_I64);
            }
            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
#ifndef DISABLE_QC_FLAG
            assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
            setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
#endif
            DIP("vqadd.%c%d %c%d, %c%d, %c%d\n",
                U ? 'u' : 's',
                8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
         }
         break;
      case 1:
         if (B == 0) {
            /* VRHADD */
            /* VRHADD C, A, B ::=
                 C = (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1) */
            IROp shift_op, add_op;
            IRTemp cc;
            ULong one = 1;
            HChar reg_t = Q ? 'q' : 'd';
            switch (size) {
               case 0: one = (one <<  8) | one; /* fall through */
               case 1: one = (one << 16) | one; /* fall through */
               case 2: one = (one << 32) | one; break;
               case 3: return False;
               default: vassert(0);
            }
            if (Q) {
               switch (size) {
                  case 0:
                     shift_op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
                     add_op = Iop_Add8x16;
                     break;
                  case 1:
                     shift_op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
                     add_op = Iop_Add16x8;
                     break;
                  case 2:
                     shift_op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
                     add_op = Iop_Add32x4;
                     break;
                  case 3:
                     return False;
                  default:
                     vassert(0);
               }
            } else {
               switch (size) {
                  case 0:
                     shift_op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
                     add_op = Iop_Add8x8;
                     break;
                  case 1:
                     shift_op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
                     add_op = Iop_Add16x4;
                     break;
                  case 2:
                     shift_op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
                     add_op = Iop_Add32x2;
                     break;
                  case 3:
                     return False;
                  default:
                     vassert(0);
               }
            }
            if (Q) {
               cc = newTemp(Ity_V128);
               assign(cc, binop(shift_op,
                                binop(add_op,
                                      binop(add_op,
                                            binop(Iop_AndV128,
                                                  mkexpr(arg_n),
                                                  binop(Iop_64HLtoV128,
                                                        mkU64(one),
                                                        mkU64(one))),
                                            binop(Iop_AndV128,
                                                  mkexpr(arg_m),
                                                  binop(Iop_64HLtoV128,
                                                        mkU64(one),
                                                        mkU64(one)))),
                                      binop(Iop_64HLtoV128,
                                            mkU64(one),
                                            mkU64(one))),
                                mkU8(1)));
               assign(res, binop(add_op,
                                 binop(add_op,
                                       binop(shift_op,
                                             mkexpr(arg_n),
                                             mkU8(1)),
                                       binop(shift_op,
                                             mkexpr(arg_m),
                                             mkU8(1))),
                                 mkexpr(cc)));
            } else {
               cc = newTemp(Ity_I64);
               assign(cc, binop(shift_op,
                                binop(add_op,
                                      binop(add_op,
                                            binop(Iop_And64,
                                                  mkexpr(arg_n),
                                                  mkU64(one)),
                                            binop(Iop_And64,
                                                  mkexpr(arg_m),
                                                  mkU64(one))),
                                      mkU64(one)),
                                mkU8(1)));
               assign(res, binop(add_op,
                                 binop(add_op,
                                       binop(shift_op,
                                             mkexpr(arg_n),
                                             mkU8(1)),
                                       binop(shift_op,
                                             mkexpr(arg_m),
                                             mkU8(1))),
                                 mkexpr(cc)));
            }
            DIP("vrhadd.%c%d %c%d, %c%d, %c%d\n",
                U ? 'u' : 's',
                8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
         } else {
            if (U == 0)  {
               switch(C) {
                  case 0: {
                     /* VAND  */
                     HChar reg_t = Q ? 'q' : 'd';
                     if (Q) {
                        assign(res, binop(Iop_AndV128, mkexpr(arg_n),
                                                       mkexpr(arg_m)));
                     } else {
                        assign(res, binop(Iop_And64, mkexpr(arg_n),
                                                     mkexpr(arg_m)));
                     }
                     DIP("vand %c%d, %c%d, %c%d\n",
                         reg_t, dreg, reg_t, nreg, reg_t, mreg);
                     break;
                  }
                  case 1: {
                     /* VBIC  */
                     HChar reg_t = Q ? 'q' : 'd';
                     if (Q) {
                        assign(res, binop(Iop_AndV128,mkexpr(arg_n),
                               unop(Iop_NotV128, mkexpr(arg_m))));
                     } else {
                        assign(res, binop(Iop_And64, mkexpr(arg_n),
                               unop(Iop_Not64, mkexpr(arg_m))));
                     }
                     DIP("vbic %c%d, %c%d, %c%d\n",
                         reg_t, dreg, reg_t, nreg, reg_t, mreg);
                     break;
                  }
                  case 2:
                     if ( nreg != mreg) {
                        /* VORR  */
                        HChar reg_t = Q ? 'q' : 'd';
                        if (Q) {
                           assign(res, binop(Iop_OrV128, mkexpr(arg_n),
                                                         mkexpr(arg_m)));
                        } else {
                           assign(res, binop(Iop_Or64, mkexpr(arg_n),
                                                       mkexpr(arg_m)));
                        }
                        DIP("vorr %c%d, %c%d, %c%d\n",
                            reg_t, dreg, reg_t, nreg, reg_t, mreg);
                     } else {
                        /* VMOV  */
                        HChar reg_t = Q ? 'q' : 'd';
                        assign(res, mkexpr(arg_m));
                        DIP("vmov %c%d, %c%d\n", reg_t, dreg, reg_t, mreg);
                     }
                     break;
                  case 3:{
                     /* VORN  */
                     HChar reg_t = Q ? 'q' : 'd';
                     if (Q) {
                        assign(res, binop(Iop_OrV128,mkexpr(arg_n),
                               unop(Iop_NotV128, mkexpr(arg_m))));
                     } else {
                        assign(res, binop(Iop_Or64, mkexpr(arg_n),
                               unop(Iop_Not64, mkexpr(arg_m))));
                     }
                     DIP("vorn %c%d, %c%d, %c%d\n",
                         reg_t, dreg, reg_t, nreg, reg_t, mreg);
                     break;
                  }
               }
            } else {
               switch(C) {
                  case 0:
                     /* VEOR (XOR)  */
                     if (Q) {
                        assign(res, binop(Iop_XorV128, mkexpr(arg_n),
                                                       mkexpr(arg_m)));
                     } else {
                        assign(res, binop(Iop_Xor64, mkexpr(arg_n),
                                                     mkexpr(arg_m)));
                     }
                     DIP("veor %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
                           Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
                     break;
                  case 1:
                     /* VBSL  */
                     if (Q) {
                        IRTemp reg_d = newTemp(Ity_V128);
                        assign(reg_d, getQReg(dreg));
                        assign(res,
                               binop(Iop_OrV128,
                                     binop(Iop_AndV128, mkexpr(arg_n),
                                                        mkexpr(reg_d)),
                                     binop(Iop_AndV128,
                                           mkexpr(arg_m),
                                           unop(Iop_NotV128,
                                                 mkexpr(reg_d)) ) ) );
                     } else {
                        IRTemp reg_d = newTemp(Ity_I64);
                        assign(reg_d, getDRegI64(dreg));
                        assign(res,
                               binop(Iop_Or64,
                                     binop(Iop_And64, mkexpr(arg_n),
                                                      mkexpr(reg_d)),
                                     binop(Iop_And64,
                                           mkexpr(arg_m),
                                           unop(Iop_Not64, mkexpr(reg_d)))));
                     }
                     DIP("vbsl %c%u, %c%u, %c%u\n",
                         Q ? 'q' : 'd', dreg,
                         Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
                     break;
                  case 2:
                     /* VBIT  */
                     if (Q) {
                        IRTemp reg_d = newTemp(Ity_V128);
                        assign(reg_d, getQReg(dreg));
                        assign(res,
                               binop(Iop_OrV128,
                                     binop(Iop_AndV128, mkexpr(arg_n),
                                                        mkexpr(arg_m)),
                                     binop(Iop_AndV128,
                                           mkexpr(reg_d),
                                           unop(Iop_NotV128, mkexpr(arg_m)))));
                     } else {
                        IRTemp reg_d = newTemp(Ity_I64);
                        assign(reg_d, getDRegI64(dreg));
                        assign(res,
                               binop(Iop_Or64,
                                     binop(Iop_And64, mkexpr(arg_n),
                                                      mkexpr(arg_m)),
                                     binop(Iop_And64,
                                           mkexpr(reg_d),
                                           unop(Iop_Not64, mkexpr(arg_m)))));
                     }
                     DIP("vbit %c%u, %c%u, %c%u\n",
                         Q ? 'q' : 'd', dreg,
                         Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
                     break;
                  case 3:
                     /* VBIF  */
                     if (Q) {
                        IRTemp reg_d = newTemp(Ity_V128);
                        assign(reg_d, getQReg(dreg));
                        assign(res,
                               binop(Iop_OrV128,
                                     binop(Iop_AndV128, mkexpr(reg_d),
                                                        mkexpr(arg_m)),
                                     binop(Iop_AndV128,
                                           mkexpr(arg_n),
                                           unop(Iop_NotV128, mkexpr(arg_m)))));
                     } else {
                        IRTemp reg_d = newTemp(Ity_I64);
                        assign(reg_d, getDRegI64(dreg));
                        assign(res,
                               binop(Iop_Or64,
                                     binop(Iop_And64, mkexpr(reg_d),
                                                      mkexpr(arg_m)),
                                     binop(Iop_And64,
                                           mkexpr(arg_n),
                                           unop(Iop_Not64, mkexpr(arg_m)))));
                     }
                     DIP("vbif %c%u, %c%u, %c%u\n",
                         Q ? 'q' : 'd', dreg,
                         Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
                     break;
               }
            }
         }
         break;
      case 2:
         if (B == 0) {
            /* VHSUB */
            /* (A >> 1) - (B >> 1) - (NOT (A) & B & 1)   */
            ULong imm = 0;
            IRExpr *imm_val;
            IROp subOp;
            IROp notOp;
            IROp andOp;
            IROp shOp;
            if (size == 3)
               return False;
            switch(size) {
               case 0: imm = 0x101010101010101LL; break;
               case 1: imm = 0x1000100010001LL; break;
               case 2: imm = 0x100000001LL; break;
               default: vassert(0);
            }
            if (Q) {
               imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
               andOp = Iop_AndV128;
               notOp = Iop_NotV128;
            } else {
               imm_val = mkU64(imm);
               andOp = Iop_And64;
               notOp = Iop_Not64;
            }
            if (U) {
               switch(size) {
                  case 0:
                     subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
                     shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
                     break;
                  case 1:
                     subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
                     shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
                     break;
                  case 2:
                     subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
                     shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
                     break;
                  default:
                     vassert(0);
               }
            } else {
               switch(size) {
                  case 0:
                     subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
                     shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
                     break;
                  case 1:
                     subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
                     shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
                     break;
                  case 2:
                     subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
                     shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
                     break;
                  default:
                     vassert(0);
               }
            }
            assign(res,
                   binop(subOp,
                         binop(subOp,
                               binop(shOp, mkexpr(arg_n), mkU8(1)),
                               binop(shOp, mkexpr(arg_m), mkU8(1))),
                         binop(andOp,
                               binop(andOp,
                                     unop(notOp, mkexpr(arg_n)),
                                     mkexpr(arg_m)),
                               imm_val)));
            DIP("vhsub.%c%u %c%u, %c%u, %c%u\n",
                U ? 'u' : 's', 8 << size,
                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
                mreg);
         } else {
            /* VQSUB */
            IROp op, op2;
            IRTemp tmp;
            if (Q) {
               switch (size) {
                  case 0:
                     op = U ? Iop_QSub8Ux16 : Iop_QSub8Sx16;
                     op2 = Iop_Sub8x16;
                     break;
                  case 1:
                     op = U ? Iop_QSub16Ux8 : Iop_QSub16Sx8;
                     op2 = Iop_Sub16x8;
                     break;
                  case 2:
                     op = U ? Iop_QSub32Ux4 : Iop_QSub32Sx4;
                     op2 = Iop_Sub32x4;
                     break;
                  case 3:
                     op = U ? Iop_QSub64Ux2 : Iop_QSub64Sx2;
                     op2 = Iop_Sub64x2;
                     break;
                  default:
                     vassert(0);
               }
            } else {
               switch (size) {
                  case 0:
                     op = U ? Iop_QSub8Ux8 : Iop_QSub8Sx8;
                     op2 = Iop_Sub8x8;
                     break;
                  case 1:
                     op = U ? Iop_QSub16Ux4 : Iop_QSub16Sx4;
                     op2 = Iop_Sub16x4;
                     break;
                  case 2:
                     op = U ? Iop_QSub32Ux2 : Iop_QSub32Sx2;
                     op2 = Iop_Sub32x2;
                     break;
                  case 3:
                     op = U ? Iop_QSub64Ux1 : Iop_QSub64Sx1;
                     op2 = Iop_Sub64;
                     break;
                  default:
                     vassert(0);
               }
            }
            if (Q)
               tmp = newTemp(Ity_V128);
            else
               tmp = newTemp(Ity_I64);
            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
#ifndef DISABLE_QC_FLAG
            assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
            setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
#endif
            DIP("vqsub.%c%u %c%u, %c%u, %c%u\n",
                U ? 'u' : 's', 8 << size,
                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
                mreg);
         }
         break;
      case 3: {
            IROp op;
            if (Q) {
               switch (size) {
                  case 0: op = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16; break;
                  case 1: op = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8; break;
                  case 2: op = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4; break;
                  case 3: return False;
                  default: vassert(0);
               }
            } else {
               switch (size) {
                  case 0: op = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8; break;
                  case 1: op = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4; break;
                  case 2: op = U ? Iop_CmpGT32Ux2: Iop_CmpGT32Sx2; break;
                  case 3: return False;
                  default: vassert(0);
               }
            }
            if (B == 0) {
               /* VCGT  */
               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
               DIP("vcgt.%c%u %c%u, %c%u, %c%u\n",
                   U ? 'u' : 's', 8 << size,
                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
                   mreg);
            } else {
               /* VCGE  */
               /* VCGE res, argn, argm
                    is equal to
                  VCGT tmp, argm, argn
                  VNOT res, tmp */
               assign(res,
                      unop(Q ? Iop_NotV128 : Iop_Not64,
                           binop(op, mkexpr(arg_m), mkexpr(arg_n))));
               DIP("vcge.%c%u %c%u, %c%u, %c%u\n",
                   U ? 'u' : 's', 8 << size,
                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
                   mreg);
            }
         }
         break;
      case 4:
         if (B == 0) {
            /* VSHL */
            IROp op, sub_op;
            IRTemp tmp;
            if (U) {
               switch (size) {
                  case 0: op = Q ? Iop_Shl8x16 : Iop_Shl8x8; break;
                  case 1: op = Q ? Iop_Shl16x8 : Iop_Shl16x4; break;
                  case 2: op = Q ? Iop_Shl32x4 : Iop_Shl32x2; break;
                  case 3: op = Q ? Iop_Shl64x2 : Iop_Shl64; break;
                  default: vassert(0);
               }
            } else {
               tmp = newTemp(Q ? Ity_V128 : Ity_I64);
               switch (size) {
                  case 0:
                     op = Q ? Iop_Sar8x16 : Iop_Sar8x8;
                     sub_op = Q ? Iop_Sub8x16 : Iop_Sub8x8;
                     break;
                  case 1:
                     op = Q ? Iop_Sar16x8 : Iop_Sar16x4;
                     sub_op = Q ? Iop_Sub16x8 : Iop_Sub16x4;
                     break;
                  case 2:
                     op = Q ? Iop_Sar32x4 : Iop_Sar32x2;
                     sub_op = Q ? Iop_Sub32x4 : Iop_Sub32x2;
                     break;
                  case 3:
                     op = Q ? Iop_Sar64x2 : Iop_Sar64;
                     sub_op = Q ? Iop_Sub64x2 : Iop_Sub64;
                     break;
                  default:
                     vassert(0);
               }
            }
            if (U) {
               if (!Q && (size == 3))
                  assign(res, binop(op, mkexpr(arg_m),
                                        unop(Iop_64to8, mkexpr(arg_n))));
               else
                  assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
            } else {
               if (Q)
                  assign(tmp, binop(sub_op,
                                    binop(Iop_64HLtoV128, mkU64(0), mkU64(0)),
                                    mkexpr(arg_n)));
               else
                  assign(tmp, binop(sub_op, mkU64(0), mkexpr(arg_n)));
               if (!Q && (size == 3))
                  assign(res, binop(op, mkexpr(arg_m),
                                        unop(Iop_64to8, mkexpr(tmp))));
               else
                  assign(res, binop(op, mkexpr(arg_m), mkexpr(tmp)));
            }
            DIP("vshl.%c%u %c%u, %c%u, %c%u\n",
                U ? 'u' : 's', 8 << size,
                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
                nreg);
         } else {
            /* VQSHL */
            IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt;
            IRTemp tmp, shval, mask, old_shval;
            UInt i;
            ULong esize;
            cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
            cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
            if (U) {
               switch (size) {
                  case 0:
                     op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
                     op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
                     break;
                  case 1:
                     op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
                     op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
                     break;
                  case 2:
                     op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
                     op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
                     break;
                  case 3:
                     op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
                     op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
                     break;
                  default:
                     vassert(0);
               }
            } else {
               switch (size) {
                  case 0:
                     op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
                     op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
                     break;
                  case 1:
                     op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
                     op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
                     break;
                  case 2:
                     op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
                     op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
                     break;
                  case 3:
                     op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
                     op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
                     break;
                  default:
                     vassert(0);
               }
            }
            if (Q) {
               tmp = newTemp(Ity_V128);
               shval = newTemp(Ity_V128);
               mask = newTemp(Ity_V128);
            } else {
               tmp = newTemp(Ity_I64);
               shval = newTemp(Ity_I64);
               mask = newTemp(Ity_I64);
            }
            assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
#ifndef DISABLE_QC_FLAG
            /* Only least significant byte from second argument is used.
               Copy this byte to the whole vector element. */
            assign(shval, binop(op_shrn,
                                binop(op_shln,
                                       mkexpr(arg_n),
                                       mkU8((8 << size) - 8)),
                                mkU8((8 << size) - 8)));
            for(i = 0; i < size; i++) {
               old_shval = shval;
               shval = newTemp(Q ? Ity_V128 : Ity_I64);
               assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
                                   mkexpr(old_shval),
                                   binop(op_shln,
                                         mkexpr(old_shval),
                                         mkU8(8 << i))));
            }
            /* If shift is greater or equal to the element size and
               element is non-zero, then QC flag should be set. */
            esize = (8 << size) - 1;
            esize = (esize <<  8) | esize;
            esize = (esize << 16) | esize;
            esize = (esize << 32) | esize;
            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
                             binop(cmp_gt, mkexpr(shval),
                                           Q ? mkU128(esize) : mkU64(esize)),
                             unop(cmp_neq, mkexpr(arg_m))),
                       Q ? mkU128(0) : mkU64(0),
                       Q, condT);
            /* Othervise QC flag should be set if shift value is positive and
               result beign rightshifted the same value is not equal to left
               argument. */
            assign(mask, binop(cmp_gt, mkexpr(shval),
                                       Q ? mkU128(0) : mkU64(0)));
            if (!Q && size == 3)
               assign(tmp, binop(op_rev, mkexpr(res),
                                         unop(Iop_64to8, mkexpr(arg_n))));
            else
               assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
                             mkexpr(tmp), mkexpr(mask)),
                       binop(Q ? Iop_AndV128 : Iop_And64,
                             mkexpr(arg_m), mkexpr(mask)),
                       Q, condT);
#endif
            DIP("vqshl.%c%u %c%u, %c%u, %c%u\n",
                U ? 'u' : 's', 8 << size,
                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
                nreg);
         }
         break;
      case 5:
         if (B == 0) {
            /* VRSHL */
            IROp op, op_shrn, op_shln, cmp_gt, op_sub, op_add;
            IRTemp shval, old_shval, imm_val, round;
            UInt i;
            ULong imm;
            cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
            imm = 1L;
            switch (size) {
               case 0: imm = (imm <<  8) | imm; /* fall through */
               case 1: imm = (imm << 16) | imm; /* fall through */
               case 2: imm = (imm << 32) | imm; /* fall through */
               case 3: break;
               default: vassert(0);
            }
            imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
            round = newTemp(Q ? Ity_V128 : Ity_I64);
            assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
            if (U) {
               switch (size) {
                  case 0:
                     op = Q ? Iop_Shl8x16 : Iop_Shl8x8;
                     op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
                     break;
                  case 1:
                     op = Q ? Iop_Shl16x8 : Iop_Shl16x4;
                     op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
                     break;
                  case 2:
                     op = Q ? Iop_Shl32x4 : Iop_Shl32x2;
                     op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
                     break;
                  case 3:
                     op = Q ? Iop_Shl64x2 : Iop_Shl64;
                     op_sub = Q ? Iop_Sub64x2 : Iop_Sub64;
                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
                     break;
                  default:
                     vassert(0);
               }
            } else {
               switch (size) {
                  case 0:
                     op = Q ? Iop_Sal8x16 : Iop_Sal8x8;
                     op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
                     break;
                  case 1:
                     op = Q ? Iop_Sal16x8 : Iop_Sal16x4;
                     op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
                     break;
                  case 2:
                     op = Q ? Iop_Sal32x4 : Iop_Sal32x2;
                     op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
                     break;
                  case 3:
                     op = Q ? Iop_Sal64x2 : Iop_Sal64x1;
                     op_sub = Q ? Iop_Sub64x2 : Iop_Sub64;
                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
                     break;
                  default:
                     vassert(0);
               }
            }
            if (Q) {
               shval = newTemp(Ity_V128);
            } else {
               shval = newTemp(Ity_I64);
            }
            /* Only least significant byte from second argument is used.
               Copy this byte to the whole vector element. */
            assign(shval, binop(op_shrn,
                                binop(op_shln,
                                       mkexpr(arg_n),
                                       mkU8((8 << size) - 8)),
                                mkU8((8 << size) - 8)));
            for (i = 0; i < size; i++) {
               old_shval = shval;
               shval = newTemp(Q ? Ity_V128 : Ity_I64);
               assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
                                   mkexpr(old_shval),
                                   binop(op_shln,
                                         mkexpr(old_shval),
                                         mkU8(8 << i))));
            }
            /* Compute the result */
            if (!Q && size == 3 && U) {
               assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
                                   binop(op,
                                         mkexpr(arg_m),
                                         unop(Iop_64to8,
                                              binop(op_add,
                                                    mkexpr(arg_n),
                                                    mkexpr(imm_val)))),
                                   binop(Q ? Iop_AndV128 : Iop_And64,
                                         mkexpr(imm_val),
                                         binop(cmp_gt,
                                               Q ? mkU128(0) : mkU64(0),
                                               mkexpr(arg_n)))));
               assign(res, binop(op_add,
                                 binop(op,
                                       mkexpr(arg_m),
                                       unop(Iop_64to8, mkexpr(arg_n))),
                                 mkexpr(round)));
            } else {
               assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
                                   binop(op,
                                         mkexpr(arg_m),
                                         binop(op_add,
                                               mkexpr(arg_n),
                                               mkexpr(imm_val))),
                                   binop(Q ? Iop_AndV128 : Iop_And64,
                                         mkexpr(imm_val),
                                         binop(cmp_gt,
                                               Q ? mkU128(0) : mkU64(0),
                                               mkexpr(arg_n)))));
               assign(res, binop(op_add,
                                 binop(op, mkexpr(arg_m), mkexpr(arg_n)),
                                 mkexpr(round)));
            }
            DIP("vrshl.%c%u %c%u, %c%u, %c%u\n",
                U ? 'u' : 's', 8 << size,
                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
                nreg);
         } else {
            /* VQRSHL */
            IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt, op_sub, op_add;
            IRTemp tmp, shval, mask, old_shval, imm_val, round;
            UInt i;
            ULong esize, imm;
            cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
            cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
            imm = 1L;
            switch (size) {
               case 0: imm = (imm <<  8) | imm; /* fall through */
               case 1: imm = (imm << 16) | imm; /* fall through */
               case 2: imm = (imm << 32) | imm; /* fall through */
               case 3: break;
               default: vassert(0);
            }
            imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
            round = newTemp(Q ? Ity_V128 : Ity_I64);
            assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
            if (U) {
               switch (size) {
                  case 0:
                     op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
                     op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
                     op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
                     break;
                  case 1:
                     op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
                     op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
                     op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
                     break;
                  case 2:
                     op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
                     op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
                     op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
                     break;
                  case 3:
                     op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
                     op_sub = Q ? Iop_Sub64x2 : Iop_Sub64;
                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
                     op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
                     break;
                  default:
                     vassert(0);
               }
            } else {
               switch (size) {
                  case 0:
                     op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
                     op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
                     op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
                     break;
                  case 1:
                     op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
                     op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
                     op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
                     break;
                  case 2:
                     op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
                     op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
                     op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
                     break;
                  case 3:
                     op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
                     op_sub = Q ? Iop_Sub64x2 : Iop_Sub64;
                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
                     op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
                     break;
                  default:
                     vassert(0);
               }
            }
            if (Q) {
               tmp = newTemp(Ity_V128);
               shval = newTemp(Ity_V128);
               mask = newTemp(Ity_V128);
            } else {
               tmp = newTemp(Ity_I64);
               shval = newTemp(Ity_I64);
               mask = newTemp(Ity_I64);
            }
            /* Only least significant byte from second argument is used.
               Copy this byte to the whole vector element. */
            assign(shval, binop(op_shrn,
                                binop(op_shln,
                                       mkexpr(arg_n),
                                       mkU8((8 << size) - 8)),
                                mkU8((8 << size) - 8)));
            for (i = 0; i < size; i++) {
               old_shval = shval;
               shval = newTemp(Q ? Ity_V128 : Ity_I64);
               assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
                                   mkexpr(old_shval),
                                   binop(op_shln,
                                         mkexpr(old_shval),
                                         mkU8(8 << i))));
            }
            /* Compute the result */
            assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
                                binop(op,
                                      mkexpr(arg_m),
                                      binop(op_add,
                                            mkexpr(arg_n),
                                            mkexpr(imm_val))),
                                binop(Q ? Iop_AndV128 : Iop_And64,
                                      mkexpr(imm_val),
                                      binop(cmp_gt,
                                            Q ? mkU128(0) : mkU64(0),
                                            mkexpr(arg_n)))));
            assign(res, binop(op_add,
                              binop(op, mkexpr(arg_m), mkexpr(arg_n)),
                              mkexpr(round)));
#ifndef DISABLE_QC_FLAG
            /* If shift is greater or equal to the element size and element is
               non-zero, then QC flag should be set. */
            esize = (8 << size) - 1;
            esize = (esize <<  8) | esize;
            esize = (esize << 16) | esize;
            esize = (esize << 32) | esize;
            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
                             binop(cmp_gt, mkexpr(shval),
                                           Q ? mkU128(esize) : mkU64(esize)),
                             unop(cmp_neq, mkexpr(arg_m))),
                       Q ? mkU128(0) : mkU64(0),
                       Q, condT);
            /* Othervise QC flag should be set if shift value is positive and
               result beign rightshifted the same value is not equal to left
               argument. */
            assign(mask, binop(cmp_gt, mkexpr(shval),
                               Q ? mkU128(0) : mkU64(0)));
            if (!Q && size == 3)
               assign(tmp, binop(op_rev, mkexpr(res),
                                         unop(Iop_64to8, mkexpr(arg_n))));
            else
               assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
                             mkexpr(tmp), mkexpr(mask)),
                       binop(Q ? Iop_AndV128 : Iop_And64,
                             mkexpr(arg_m), mkexpr(mask)),
                       Q, condT);
#endif
            DIP("vqrshl.%c%u %c%u, %c%u, %c%u\n",
                U ? 'u' : 's', 8 << size,
                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
                nreg);
         }
         break;
      case 6:
         /* VMAX, VMIN  */
         if (B == 0) {
            /* VMAX */
            IROp op;
            if (U == 0) {
               switch (size) {
                  case 0: op = Q ? Iop_Max8Sx16 : Iop_Max8Sx8; break;
                  case 1: op = Q ? Iop_Max16Sx8 : Iop_Max16Sx4; break;
                  case 2: op = Q ? Iop_Max32Sx4 : Iop_Max32Sx2; break;
                  case 3: return False;
                  default: vassert(0);
               }
            } else {
               switch (size) {
                  case 0: op = Q ? Iop_Max8Ux16 : Iop_Max8Ux8; break;
                  case 1: op = Q ? Iop_Max16Ux8 : Iop_Max16Ux4; break;
                  case 2: op = Q ? Iop_Max32Ux4 : Iop_Max32Ux2; break;
                  case 3: return False;
                  default: vassert(0);
               }
            }
            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
            DIP("vmax.%c%u %c%u, %c%u, %c%u\n",
                U ? 'u' : 's', 8 << size,
                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
                mreg);
         } else {
            /* VMIN */
            IROp op;
            if (U == 0) {
               switch (size) {
                  case 0: op = Q ? Iop_Min8Sx16 : Iop_Min8Sx8; break;
                  case 1: op = Q ? Iop_Min16Sx8 : Iop_Min16Sx4; break;
                  case 2: op = Q ? Iop_Min32Sx4 : Iop_Min32Sx2; break;
                  case 3: return False;
                  default: vassert(0);
               }
            } else {
               switch (size) {
                  case 0: op = Q ? Iop_Min8Ux16 : Iop_Min8Ux8; break;
                  case 1: op = Q ? Iop_Min16Ux8 : Iop_Min16Ux4; break;
                  case 2: op = Q ? Iop_Min32Ux4 : Iop_Min32Ux2; break;
                  case 3: return False;
                  default: vassert(0);
               }
            }
            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
            DIP("vmin.%c%u %c%u, %c%u, %c%u\n",
                U ? 'u' : 's', 8 << size,
                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
                mreg);
         }
         break;
      case 7:
         if (B == 0) {
            /* VABD */
            IROp op_cmp, op_sub;
            IRTemp cond;
            if ((theInstr >> 23) & 1) {
               vpanic("VABDL should not be in dis_neon_data_3same\n");
            }
            if (Q) {
               switch (size) {
                  case 0:
                     op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
                     op_sub = Iop_Sub8x16;
                     break;
                  case 1:
                     op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
                     op_sub = Iop_Sub16x8;
                     break;
                  case 2:
                     op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
                     op_sub = Iop_Sub32x4;
                     break;
                  case 3:
                     return False;
                  default:
                     vassert(0);
               }
            } else {
               switch (size) {
                  case 0:
                     op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
                     op_sub = Iop_Sub8x8;
                     break;
                  case 1:
                     op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
                     op_sub = Iop_Sub16x4;
                     break;
                  case 2:
                     op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
                     op_sub = Iop_Sub32x2;
                     break;
                  case 3:
                     return False;
                  default:
                     vassert(0);
               }
            }
            if (Q) {
               cond = newTemp(Ity_V128);
            } else {
               cond = newTemp(Ity_I64);
            }
            assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
            assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
                              binop(Q ? Iop_AndV128 : Iop_And64,
                                    binop(op_sub, mkexpr(arg_n),
                                                  mkexpr(arg_m)),
                                    mkexpr(cond)),
                              binop(Q ? Iop_AndV128 : Iop_And64,
                                    binop(op_sub, mkexpr(arg_m),
                                                  mkexpr(arg_n)),
                                    unop(Q ? Iop_NotV128 : Iop_Not64,
                                         mkexpr(cond)))));
            DIP("vabd.%c%u %c%u, %c%u, %c%u\n",
                U ? 'u' : 's', 8 << size,
                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
                mreg);
         } else {
            /* VABA */
            IROp op_cmp, op_sub, op_add;
            IRTemp cond, acc, tmp;
            if ((theInstr >> 23) & 1) {
               vpanic("VABAL should not be in dis_neon_data_3same");
            }
            if (Q) {
               switch (size) {
                  case 0:
                     op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
                     op_sub = Iop_Sub8x16;
                     op_add = Iop_Add8x16;
                     break;
                  case 1:
                     op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
                     op_sub = Iop_Sub16x8;
                     op_add = Iop_Add16x8;
                     break;
                  case 2:
                     op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
                     op_sub = Iop_Sub32x4;
                     op_add = Iop_Add32x4;
                     break;
                  case 3:
                     return False;
                  default:
                     vassert(0);
               }
            } else {
               switch (size) {
                  case 0:
                     op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
                     op_sub = Iop_Sub8x8;
                     op_add = Iop_Add8x8;
                     break;
                  case 1:
                     op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
                     op_sub = Iop_Sub16x4;
                     op_add = Iop_Add16x4;
                     break;
                  case 2:
                     op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
                     op_sub = Iop_Sub32x2;
                     op_add = Iop_Add32x2;
                     break;
                  case 3:
                     return False;
                  default:
                     vassert(0);
               }
            }
            if (Q) {
               cond = newTemp(Ity_V128);
               acc = newTemp(Ity_V128);
               tmp = newTemp(Ity_V128);
               assign(acc, getQReg(dreg));
            } else {
               cond = newTemp(Ity_I64);
               acc = newTemp(Ity_I64);
               tmp = newTemp(Ity_I64);
               assign(acc, getDRegI64(dreg));
            }
            assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));