Home | History | Annotate | Download | only in priv
      1 /* -*- mode: C; c-basic-offset: 3; -*- */
      2 
      3 /*--------------------------------------------------------------------*/
      4 /*--- begin                                     guest_arm64_toIR.c ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Valgrind, a dynamic binary instrumentation
      9    framework.
     10 
     11    Copyright (C) 2013-2013 OpenWorks
     12       info (at) open-works.net
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     27    02110-1301, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 */
     31 
     32 //ZZ /* XXXX thumb to check:
     33 //ZZ    that all cases where putIRegT writes r15, we generate a jump.
     34 //ZZ
     35 //ZZ    All uses of newTemp assign to an IRTemp and not a UInt
     36 //ZZ
     37 //ZZ    For all thumb loads and stores, including VFP ones, new-ITSTATE is
     38 //ZZ    backed out before the memory op, and restored afterwards.  This
     39 //ZZ    needs to happen even after we go uncond.  (and for sure it doesn't
     40 //ZZ    happen for VFP loads/stores right now).
     41 //ZZ
     42 //ZZ    VFP on thumb: check that we exclude all r13/r15 cases that we
     43 //ZZ    should.
     44 //ZZ
     45 //ZZ    XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
     46 //ZZ    taking into account the number of insns guarded by an IT.
     47 //ZZ
     48 //ZZ    remove the nasty hack, in the spechelper, of looking for Or32(...,
     49 //ZZ    0xE0) in as the first arg to armg_calculate_condition, and instead
     50 //ZZ    use Slice44 as specified in comments in the spechelper.
     51 //ZZ
     52 //ZZ    add specialisations for armg_calculate_flag_c and _v, as they
     53 //ZZ    are moderately often needed in Thumb code.
     54 //ZZ
     55 //ZZ    Correctness: ITSTATE handling in Thumb SVCs is wrong.
     56 //ZZ
     57 //ZZ    Correctness (obscure): in m_transtab, when invalidating code
     58 //ZZ    address ranges, invalidate up to 18 bytes after the end of the
     59 //ZZ    range.  This is because the ITSTATE optimisation at the top of
     60 //ZZ    _THUMB_WRK below analyses up to 18 bytes before the start of any
     61 //ZZ    given instruction, and so might depend on the invalidated area.
     62 //ZZ */
     63 //ZZ
     64 //ZZ /* Limitations, etc
     65 //ZZ
     66 //ZZ    - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD.
     67 //ZZ      These instructions are non-restartable in the case where the
     68 //ZZ      transfer(s) fault.
     69 //ZZ
     70 //ZZ    - SWP: the restart jump back is Ijk_Boring; it should be
     71 //ZZ      Ijk_NoRedir but that's expensive.  See comments on casLE() in
     72 //ZZ      guest_x86_toIR.c.
     73 //ZZ */
     74 
     75 /* "Special" instructions.
     76 
     77    This instruction decoder can decode four special instructions
     78    which mean nothing natively (are no-ops as far as regs/mem are
     79    concerned) but have meaning for supporting Valgrind.  A special
     80    instruction is flagged by a 16-byte preamble:
     81 
     82       93CC0D8C 93CC358C 93CCCD8C 93CCF58C
     83       (ror x12, x12, #3;   ror x12, x12, #13
     84        ror x12, x12, #51;  ror x12, x12, #61)
     85 
     86    Following that, one of the following 3 are allowed
     87    (standard interpretation in parentheses):
     88 
     89       AA0A014A (orr x10,x10,x10)   X3 = client_request ( X4 )
     90       AA0B016B (orr x11,x11,x11)   X3 = guest_NRADDR
     91       AA0C018C (orr x12,x12,x12)   branch-and-link-to-noredir X8
     92       AA090129 (orr x9,x9,x9)      IR injection
     93 
     94    Any other bytes following the 16-byte preamble are illegal and
     95    constitute a failure in instruction decoding.  This all assumes
     96    that the preamble will never occur except in specific code
     97    fragments designed for Valgrind to catch.
     98 */
     99 
    100 /* Translates ARM64 code to IR. */
    101 
    102 #include "libvex_basictypes.h"
    103 #include "libvex_ir.h"
    104 #include "libvex.h"
    105 #include "libvex_guest_arm64.h"
    106 
    107 #include "main_util.h"
    108 #include "main_globals.h"
    109 #include "guest_generic_bb_to_IR.h"
    110 #include "guest_arm64_defs.h"
    111 
    112 
    113 /*------------------------------------------------------------*/
    114 /*--- Globals                                              ---*/
    115 /*------------------------------------------------------------*/
    116 
    117 /* These are set at the start of the translation of a instruction, so
    118    that we don't have to pass them around endlessly.  CONST means does
    119    not change during translation of the instruction.
    120 */
    121 
    122 /* CONST: is the host bigendian?  We need to know this in order to do
    123    sub-register accesses to the SIMD/FP registers correctly. */
    124 static Bool host_is_bigendian;
    125 
    126 /* CONST: The guest address for the instruction currently being
    127    translated.  */
    128 static Addr64 guest_PC_curr_instr;
    129 
    130 /* MOD: The IRSB* into which we're generating code. */
    131 static IRSB* irsb;
    132 
    133 
    134 /*------------------------------------------------------------*/
    135 /*--- Debugging output                                     ---*/
    136 /*------------------------------------------------------------*/
    137 
    138 #define DIP(format, args...)           \
    139    if (vex_traceflags & VEX_TRACE_FE)  \
    140       vex_printf(format, ## args)
    141 
    142 #define DIS(buf, format, args...)      \
    143    if (vex_traceflags & VEX_TRACE_FE)  \
    144       vex_sprintf(buf, format, ## args)
    145 
    146 
    147 /*------------------------------------------------------------*/
    148 /*--- Helper bits and pieces for deconstructing the        ---*/
    149 /*--- arm insn stream.                                     ---*/
    150 /*------------------------------------------------------------*/
    151 
    152 /* Do a little-endian load of a 32-bit word, regardless of the
    153    endianness of the underlying host. */
    154 static inline UInt getUIntLittleEndianly ( UChar* p )
    155 {
    156    UInt w = 0;
    157    w = (w << 8) | p[3];
    158    w = (w << 8) | p[2];
    159    w = (w << 8) | p[1];
    160    w = (w << 8) | p[0];
    161    return w;
    162 }
    163 
    164 /* Sign extend a N-bit value up to 64 bits, by copying
    165    bit N-1 into all higher positions. */
    166 static ULong sx_to_64 ( ULong x, UInt n )
    167 {
    168    vassert(n > 1 && n < 64);
    169    Long r = (Long)x;
    170    r = (r << (64-n)) >> (64-n);
    171    return (ULong)r;
    172 }
    173 
    174 //ZZ /* Do a little-endian load of a 16-bit word, regardless of the
    175 //ZZ    endianness of the underlying host. */
    176 //ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
    177 //ZZ {
    178 //ZZ    UShort w = 0;
    179 //ZZ    w = (w << 8) | p[1];
    180 //ZZ    w = (w << 8) | p[0];
    181 //ZZ    return w;
    182 //ZZ }
    183 //ZZ
    184 //ZZ static UInt ROR32 ( UInt x, UInt sh ) {
    185 //ZZ    vassert(sh >= 0 && sh < 32);
    186 //ZZ    if (sh == 0)
    187 //ZZ       return x;
    188 //ZZ    else
    189 //ZZ       return (x << (32-sh)) | (x >> sh);
    190 //ZZ }
    191 //ZZ
    192 //ZZ static Int popcount32 ( UInt x )
    193 //ZZ {
    194 //ZZ    Int res = 0, i;
    195 //ZZ    for (i = 0; i < 32; i++) {
    196 //ZZ       res += (x & 1);
    197 //ZZ       x >>= 1;
    198 //ZZ    }
    199 //ZZ    return res;
    200 //ZZ }
    201 //ZZ
    202 //ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
    203 //ZZ {
    204 //ZZ    UInt mask = 1 << ix;
    205 //ZZ    x &= ~mask;
    206 //ZZ    x |= ((b << ix) & mask);
    207 //ZZ    return x;
    208 //ZZ }
    209 
    210 #define BITS2(_b1,_b0)  \
    211    (((_b1) << 1) | (_b0))
    212 
    213 #define BITS3(_b2,_b1,_b0)  \
    214   (((_b2) << 2) | ((_b1) << 1) | (_b0))
    215 
    216 #define BITS4(_b3,_b2,_b1,_b0)  \
    217    (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
    218 
    219 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    220    ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4)  \
    221     | BITS4((_b3),(_b2),(_b1),(_b0)))
    222 
    223 #define BITS5(_b4,_b3,_b2,_b1,_b0)  \
    224    (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
    225 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0)  \
    226    (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    227 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    228    (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    229 
    230 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    231    (((_b8) << 8)  \
    232     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    233 
    234 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    235    (((_b9) << 9) | ((_b8) << 8)  \
    236     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    237 
    238 #define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    239    (((_b10) << 10)  \
    240     | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
    241 
    242 #define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
    243    (((_b11) << 11)  \
    244     | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
    245 
    246 // produces _uint[_bMax:_bMin]
    247 #define SLICE_UInt(_uint,_bMax,_bMin)  \
    248    (( ((UInt)(_uint)) >> (_bMin))  \
    249     & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
    250 
    251 
    252 /*------------------------------------------------------------*/
    253 /*--- Helper bits and pieces for creating IR fragments.    ---*/
    254 /*------------------------------------------------------------*/
    255 
    256 static IRExpr* mkV128 ( UShort w )
    257 {
    258    return IRExpr_Const(IRConst_V128(w));
    259 }
    260 
    261 static IRExpr* mkU64 ( ULong i )
    262 {
    263    return IRExpr_Const(IRConst_U64(i));
    264 }
    265 
    266 static IRExpr* mkU32 ( UInt i )
    267 {
    268    return IRExpr_Const(IRConst_U32(i));
    269 }
    270 
    271 static IRExpr* mkU8 ( UInt i )
    272 {
    273    vassert(i < 256);
    274    return IRExpr_Const(IRConst_U8( (UChar)i ));
    275 }
    276 
    277 static IRExpr* mkexpr ( IRTemp tmp )
    278 {
    279    return IRExpr_RdTmp(tmp);
    280 }
    281 
    282 static IRExpr* unop ( IROp op, IRExpr* a )
    283 {
    284    return IRExpr_Unop(op, a);
    285 }
    286 
    287 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
    288 {
    289    return IRExpr_Binop(op, a1, a2);
    290 }
    291 
    292 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
    293 {
    294    return IRExpr_Triop(op, a1, a2, a3);
    295 }
    296 
    297 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
    298 {
    299    return IRExpr_Load(Iend_LE, ty, addr);
    300 }
    301 
    302 /* Add a statement to the list held by "irbb". */
    303 static void stmt ( IRStmt* st )
    304 {
    305    addStmtToIRSB( irsb, st );
    306 }
    307 
    308 static void assign ( IRTemp dst, IRExpr* e )
    309 {
    310    stmt( IRStmt_WrTmp(dst, e) );
    311 }
    312 
    313 static void storeLE ( IRExpr* addr, IRExpr* data )
    314 {
    315    stmt( IRStmt_Store(Iend_LE, addr, data) );
    316 }
    317 
    318 //ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
    319 //ZZ {
    320 //ZZ    if (guardT == IRTemp_INVALID) {
    321 //ZZ       /* unconditional */
    322 //ZZ       storeLE(addr, data);
    323 //ZZ    } else {
    324 //ZZ       stmt( IRStmt_StoreG(Iend_LE, addr, data,
    325 //ZZ                           binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
    326 //ZZ    }
    327 //ZZ }
    328 //ZZ
    329 //ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
    330 //ZZ                             IRExpr* addr, IRExpr* alt,
    331 //ZZ                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
    332 //ZZ {
    333 //ZZ    if (guardT == IRTemp_INVALID) {
    334 //ZZ       /* unconditional */
    335 //ZZ       IRExpr* loaded = NULL;
    336 //ZZ       switch (cvt) {
    337 //ZZ          case ILGop_Ident32:
    338 //ZZ             loaded = loadLE(Ity_I32, addr); break;
    339 //ZZ          case ILGop_8Uto32:
    340 //ZZ             loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
    341 //ZZ          case ILGop_8Sto32:
    342 //ZZ             loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
    343 //ZZ          case ILGop_16Uto32:
    344 //ZZ             loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
    345 //ZZ          case ILGop_16Sto32:
    346 //ZZ             loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
    347 //ZZ          default:
    348 //ZZ             vassert(0);
    349 //ZZ       }
    350 //ZZ       vassert(loaded != NULL);
    351 //ZZ       assign(dst, loaded);
    352 //ZZ    } else {
    353 //ZZ       /* Generate a guarded load into 'dst', but apply 'cvt' to the
    354 //ZZ          loaded data before putting the data in 'dst'.  If the load
    355 //ZZ          does not take place, 'alt' is placed directly in 'dst'. */
    356 //ZZ       stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
    357 //ZZ                          binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
    358 //ZZ    }
    359 //ZZ }
    360 
    361 /* Generate a new temporary of the given type. */
    362 static IRTemp newTemp ( IRType ty )
    363 {
    364    vassert(isPlausibleIRType(ty));
    365    return newIRTemp( irsb->tyenv, ty );
    366 }
    367 
    368 //ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
    369 //ZZ    IRRoundingMode. */
    370 //ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
    371 //ZZ {
    372 //ZZ    return mkU32(Irrm_NEAREST);
    373 //ZZ }
    374 //ZZ
    375 //ZZ /* Generate an expression for SRC rotated right by ROT. */
    376 //ZZ static IRExpr* genROR32( IRTemp src, Int rot )
    377 //ZZ {
    378 //ZZ    vassert(rot >= 0 && rot < 32);
    379 //ZZ    if (rot == 0)
    380 //ZZ       return mkexpr(src);
    381 //ZZ    return
    382 //ZZ       binop(Iop_Or32,
    383 //ZZ             binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
    384 //ZZ             binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
    385 //ZZ }
    386 //ZZ
    387 //ZZ static IRExpr* mkU128 ( ULong i )
    388 //ZZ {
    389 //ZZ    return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
    390 //ZZ }
    391 //ZZ
    392 //ZZ /* Generate a 4-aligned version of the given expression if
    393 //ZZ    the given condition is true.  Else return it unchanged. */
    394 //ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
    395 //ZZ {
    396 //ZZ    if (b)
    397 //ZZ       return binop(Iop_And32, e, mkU32(~3));
    398 //ZZ    else
    399 //ZZ       return e;
    400 //ZZ }
    401 
    402 /* Other IR construction helpers. */
    403 static IROp mkAND ( IRType ty ) {
    404    switch (ty) {
    405       case Ity_I32: return Iop_And32;
    406       case Ity_I64: return Iop_And64;
    407       default: vpanic("mkAND");
    408    }
    409 }
    410 
    411 static IROp mkOR ( IRType ty ) {
    412    switch (ty) {
    413       case Ity_I32: return Iop_Or32;
    414       case Ity_I64: return Iop_Or64;
    415       default: vpanic("mkOR");
    416    }
    417 }
    418 
    419 static IROp mkXOR ( IRType ty ) {
    420    switch (ty) {
    421       case Ity_I32: return Iop_Xor32;
    422       case Ity_I64: return Iop_Xor64;
    423       default: vpanic("mkXOR");
    424    }
    425 }
    426 
    427 static IROp mkSHL ( IRType ty ) {
    428    switch (ty) {
    429       case Ity_I32: return Iop_Shl32;
    430       case Ity_I64: return Iop_Shl64;
    431       default: vpanic("mkSHL");
    432    }
    433 }
    434 
    435 static IROp mkSHR ( IRType ty ) {
    436    switch (ty) {
    437       case Ity_I32: return Iop_Shr32;
    438       case Ity_I64: return Iop_Shr64;
    439       default: vpanic("mkSHR");
    440    }
    441 }
    442 
    443 static IROp mkSAR ( IRType ty ) {
    444    switch (ty) {
    445       case Ity_I32: return Iop_Sar32;
    446       case Ity_I64: return Iop_Sar64;
    447       default: vpanic("mkSAR");
    448    }
    449 }
    450 
    451 static IROp mkNOT ( IRType ty ) {
    452    switch (ty) {
    453       case Ity_I32: return Iop_Not32;
    454       case Ity_I64: return Iop_Not64;
    455       default: vpanic("mkNOT");
    456    }
    457 }
    458 
    459 static IROp mkADD ( IRType ty ) {
    460    switch (ty) {
    461       case Ity_I32: return Iop_Add32;
    462       case Ity_I64: return Iop_Add64;
    463       default: vpanic("mkADD");
    464    }
    465 }
    466 
    467 static IROp mkSUB ( IRType ty ) {
    468    switch (ty) {
    469       case Ity_I32: return Iop_Sub32;
    470       case Ity_I64: return Iop_Sub64;
    471       default: vpanic("mkSUB");
    472    }
    473 }
    474 
    475 static IROp mkADDF ( IRType ty ) {
    476    switch (ty) {
    477       case Ity_F32: return Iop_AddF32;
    478       case Ity_F64: return Iop_AddF64;
    479       default: vpanic("mkADDF");
    480    }
    481 }
    482 
    483 static IROp mkSUBF ( IRType ty ) {
    484    switch (ty) {
    485       case Ity_F32: return Iop_SubF32;
    486       case Ity_F64: return Iop_SubF64;
    487       default: vpanic("mkSUBF");
    488    }
    489 }
    490 
    491 static IROp mkMULF ( IRType ty ) {
    492    switch (ty) {
    493       case Ity_F32: return Iop_MulF32;
    494       case Ity_F64: return Iop_MulF64;
    495       default: vpanic("mkMULF");
    496    }
    497 }
    498 
    499 static IROp mkDIVF ( IRType ty ) {
    500    switch (ty) {
    501       case Ity_F32: return Iop_DivF32;
    502       case Ity_F64: return Iop_DivF64;
    503       default: vpanic("mkMULF");
    504    }
    505 }
    506 
    507 static IROp mkNEGF ( IRType ty ) {
    508    switch (ty) {
    509       case Ity_F32: return Iop_NegF32;
    510       case Ity_F64: return Iop_NegF64;
    511       default: vpanic("mkNEGF");
    512    }
    513 }
    514 
    515 static IROp mkABSF ( IRType ty ) {
    516    switch (ty) {
    517       case Ity_F32: return Iop_AbsF32;
    518       case Ity_F64: return Iop_AbsF64;
    519       default: vpanic("mkNEGF");
    520    }
    521 }
    522 
    523 static IROp mkSQRTF ( IRType ty ) {
    524    switch (ty) {
    525       case Ity_F32: return Iop_SqrtF32;
    526       case Ity_F64: return Iop_SqrtF64;
    527       default: vpanic("mkNEGF");
    528    }
    529 }
    530 
    531 static IRExpr* mkU ( IRType ty, ULong imm ) {
    532    switch (ty) {
    533       case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
    534       case Ity_I64: return mkU64(imm);
    535       default: vpanic("mkU");
    536    }
    537 }
    538 
    539 /* Generate IR to create 'arg rotated right by imm', for sane values
    540    of 'ty' and 'imm'. */
    541 static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
    542 {
    543    UInt w = 0;
    544    if (ty == Ity_I64) {
    545       w = 64;
    546    } else {
    547       vassert(ty == Ity_I32);
    548       w = 32;
    549    }
    550    vassert(w != 0);
    551    vassert(imm < w);
    552    if (imm == 0) {
    553       return arg;
    554    }
    555    IRTemp res = newTemp(ty);
    556    assign(res, binop(mkOR(ty),
    557                      binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
    558                      binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
    559    return res;
    560 }
    561 
    562 /* Generate IR to set the returned temp to either all-zeroes or
    563    all ones, as a copy of arg<imm>. */
    564 static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
    565 {
    566    UInt w = 0;
    567    if (ty == Ity_I64) {
    568       w = 64;
    569    } else {
    570       vassert(ty == Ity_I32);
    571       w = 32;
    572    }
    573    vassert(w != 0);
    574    vassert(imm < w);
    575    IRTemp res = newTemp(ty);
    576    assign(res, binop(mkSAR(ty),
    577                      binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
    578                      mkU8(w - 1)));
    579    return res;
    580 }
    581 
    582 /* U-widen 8/16/32/64 bit int expr to 64. */
    583 static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
    584 {
    585    switch (srcTy) {
    586       case Ity_I64: return e;
    587       case Ity_I32: return unop(Iop_32Uto64, e);
    588       case Ity_I16: return unop(Iop_16Uto64, e);
    589       case Ity_I8:  return unop(Iop_8Uto64, e);
    590       default: vpanic("widenUto64(arm64)");
    591    }
    592 }
    593 
    594 /* Narrow 64 bit int expr to 8/16/32/64.  Clearly only some
    595    of these combinations make sense. */
    596 static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
    597 {
    598    switch (dstTy) {
    599       case Ity_I64: return e;
    600       case Ity_I32: return unop(Iop_64to32, e);
    601       case Ity_I16: return unop(Iop_64to16, e);
    602       case Ity_I8:  return unop(Iop_64to8, e);
    603       default: vpanic("narrowFrom64(arm64)");
    604    }
    605 }
    606 
    607 
    608 /*------------------------------------------------------------*/
    609 /*--- Helpers for accessing guest registers.               ---*/
    610 /*------------------------------------------------------------*/
    611 
    612 #define OFFB_X0       offsetof(VexGuestARM64State,guest_X0)
    613 #define OFFB_X1       offsetof(VexGuestARM64State,guest_X1)
    614 #define OFFB_X2       offsetof(VexGuestARM64State,guest_X2)
    615 #define OFFB_X3       offsetof(VexGuestARM64State,guest_X3)
    616 #define OFFB_X4       offsetof(VexGuestARM64State,guest_X4)
    617 #define OFFB_X5       offsetof(VexGuestARM64State,guest_X5)
    618 #define OFFB_X6       offsetof(VexGuestARM64State,guest_X6)
    619 #define OFFB_X7       offsetof(VexGuestARM64State,guest_X7)
    620 #define OFFB_X8       offsetof(VexGuestARM64State,guest_X8)
    621 #define OFFB_X9       offsetof(VexGuestARM64State,guest_X9)
    622 #define OFFB_X10      offsetof(VexGuestARM64State,guest_X10)
    623 #define OFFB_X11      offsetof(VexGuestARM64State,guest_X11)
    624 #define OFFB_X12      offsetof(VexGuestARM64State,guest_X12)
    625 #define OFFB_X13      offsetof(VexGuestARM64State,guest_X13)
    626 #define OFFB_X14      offsetof(VexGuestARM64State,guest_X14)
    627 #define OFFB_X15      offsetof(VexGuestARM64State,guest_X15)
    628 #define OFFB_X16      offsetof(VexGuestARM64State,guest_X16)
    629 #define OFFB_X17      offsetof(VexGuestARM64State,guest_X17)
    630 #define OFFB_X18      offsetof(VexGuestARM64State,guest_X18)
    631 #define OFFB_X19      offsetof(VexGuestARM64State,guest_X19)
    632 #define OFFB_X20      offsetof(VexGuestARM64State,guest_X20)
    633 #define OFFB_X21      offsetof(VexGuestARM64State,guest_X21)
    634 #define OFFB_X22      offsetof(VexGuestARM64State,guest_X22)
    635 #define OFFB_X23      offsetof(VexGuestARM64State,guest_X23)
    636 #define OFFB_X24      offsetof(VexGuestARM64State,guest_X24)
    637 #define OFFB_X25      offsetof(VexGuestARM64State,guest_X25)
    638 #define OFFB_X26      offsetof(VexGuestARM64State,guest_X26)
    639 #define OFFB_X27      offsetof(VexGuestARM64State,guest_X27)
    640 #define OFFB_X28      offsetof(VexGuestARM64State,guest_X28)
    641 #define OFFB_X29      offsetof(VexGuestARM64State,guest_X29)
    642 #define OFFB_X30      offsetof(VexGuestARM64State,guest_X30)
    643 
    644 #define OFFB_XSP      offsetof(VexGuestARM64State,guest_XSP)
    645 #define OFFB_PC       offsetof(VexGuestARM64State,guest_PC)
    646 
    647 #define OFFB_CC_OP    offsetof(VexGuestARM64State,guest_CC_OP)
    648 #define OFFB_CC_DEP1  offsetof(VexGuestARM64State,guest_CC_DEP1)
    649 #define OFFB_CC_DEP2  offsetof(VexGuestARM64State,guest_CC_DEP2)
    650 #define OFFB_CC_NDEP  offsetof(VexGuestARM64State,guest_CC_NDEP)
    651 
    652 #define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
    653 #define OFFB_NRADDR   offsetof(VexGuestARM64State,guest_NRADDR)
    654 
    655 #define OFFB_Q0       offsetof(VexGuestARM64State,guest_Q0)
    656 #define OFFB_Q1       offsetof(VexGuestARM64State,guest_Q1)
    657 #define OFFB_Q2       offsetof(VexGuestARM64State,guest_Q2)
    658 #define OFFB_Q3       offsetof(VexGuestARM64State,guest_Q3)
    659 #define OFFB_Q4       offsetof(VexGuestARM64State,guest_Q4)
    660 #define OFFB_Q5       offsetof(VexGuestARM64State,guest_Q5)
    661 #define OFFB_Q6       offsetof(VexGuestARM64State,guest_Q6)
    662 #define OFFB_Q7       offsetof(VexGuestARM64State,guest_Q7)
    663 #define OFFB_Q8       offsetof(VexGuestARM64State,guest_Q8)
    664 #define OFFB_Q9       offsetof(VexGuestARM64State,guest_Q9)
    665 #define OFFB_Q10      offsetof(VexGuestARM64State,guest_Q10)
    666 #define OFFB_Q11      offsetof(VexGuestARM64State,guest_Q11)
    667 #define OFFB_Q12      offsetof(VexGuestARM64State,guest_Q12)
    668 #define OFFB_Q13      offsetof(VexGuestARM64State,guest_Q13)
    669 #define OFFB_Q14      offsetof(VexGuestARM64State,guest_Q14)
    670 #define OFFB_Q15      offsetof(VexGuestARM64State,guest_Q15)
    671 #define OFFB_Q16      offsetof(VexGuestARM64State,guest_Q16)
    672 #define OFFB_Q17      offsetof(VexGuestARM64State,guest_Q17)
    673 #define OFFB_Q18      offsetof(VexGuestARM64State,guest_Q18)
    674 #define OFFB_Q19      offsetof(VexGuestARM64State,guest_Q19)
    675 #define OFFB_Q20      offsetof(VexGuestARM64State,guest_Q20)
    676 #define OFFB_Q21      offsetof(VexGuestARM64State,guest_Q21)
    677 #define OFFB_Q22      offsetof(VexGuestARM64State,guest_Q22)
    678 #define OFFB_Q23      offsetof(VexGuestARM64State,guest_Q23)
    679 #define OFFB_Q24      offsetof(VexGuestARM64State,guest_Q24)
    680 #define OFFB_Q25      offsetof(VexGuestARM64State,guest_Q25)
    681 #define OFFB_Q26      offsetof(VexGuestARM64State,guest_Q26)
    682 #define OFFB_Q27      offsetof(VexGuestARM64State,guest_Q27)
    683 #define OFFB_Q28      offsetof(VexGuestARM64State,guest_Q28)
    684 #define OFFB_Q29      offsetof(VexGuestARM64State,guest_Q29)
    685 #define OFFB_Q30      offsetof(VexGuestARM64State,guest_Q30)
    686 #define OFFB_Q31      offsetof(VexGuestARM64State,guest_Q31)
    687 
    688 #define OFFB_FPCR     offsetof(VexGuestARM64State,guest_FPCR)
    689 #define OFFB_FPSR     offsetof(VexGuestARM64State,guest_FPSR)
    690 //ZZ #define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO)
    691 //ZZ #define OFFB_ITSTATE  offsetof(VexGuestARMState,guest_ITSTATE)
    692 //ZZ #define OFFB_QFLAG32  offsetof(VexGuestARMState,guest_QFLAG32)
    693 //ZZ #define OFFB_GEFLAG0  offsetof(VexGuestARMState,guest_GEFLAG0)
    694 //ZZ #define OFFB_GEFLAG1  offsetof(VexGuestARMState,guest_GEFLAG1)
    695 //ZZ #define OFFB_GEFLAG2  offsetof(VexGuestARMState,guest_GEFLAG2)
    696 //ZZ #define OFFB_GEFLAG3  offsetof(VexGuestARMState,guest_GEFLAG3)
    697 
    698 #define OFFB_CMSTART  offsetof(VexGuestARM64State,guest_CMSTART)
    699 #define OFFB_CMLEN    offsetof(VexGuestARM64State,guest_CMLEN)
    700 
    701 
    702 /* ---------------- Integer registers ---------------- */
    703 
    704 static Int offsetIReg64 ( UInt iregNo )
    705 {
    706    /* Do we care about endianness here?  We do if sub-parts of integer
    707       registers are accessed. */
    708    switch (iregNo) {
    709       case 0:  return OFFB_X0;
    710       case 1:  return OFFB_X1;
    711       case 2:  return OFFB_X2;
    712       case 3:  return OFFB_X3;
    713       case 4:  return OFFB_X4;
    714       case 5:  return OFFB_X5;
    715       case 6:  return OFFB_X6;
    716       case 7:  return OFFB_X7;
    717       case 8:  return OFFB_X8;
    718       case 9:  return OFFB_X9;
    719       case 10: return OFFB_X10;
    720       case 11: return OFFB_X11;
    721       case 12: return OFFB_X12;
    722       case 13: return OFFB_X13;
    723       case 14: return OFFB_X14;
    724       case 15: return OFFB_X15;
    725       case 16: return OFFB_X16;
    726       case 17: return OFFB_X17;
    727       case 18: return OFFB_X18;
    728       case 19: return OFFB_X19;
    729       case 20: return OFFB_X20;
    730       case 21: return OFFB_X21;
    731       case 22: return OFFB_X22;
    732       case 23: return OFFB_X23;
    733       case 24: return OFFB_X24;
    734       case 25: return OFFB_X25;
    735       case 26: return OFFB_X26;
    736       case 27: return OFFB_X27;
    737       case 28: return OFFB_X28;
    738       case 29: return OFFB_X29;
    739       case 30: return OFFB_X30;
    740       /* but not 31 */
    741       default: vassert(0);
    742    }
    743 }
    744 
    745 static Int offsetIReg64orSP ( UInt iregNo )
    746 {
    747    return iregNo == 31  ? OFFB_XSP  : offsetIReg64(iregNo);
    748 }
    749 
    750 static const HChar* nameIReg64orZR ( UInt iregNo )
    751 {
    752    vassert(iregNo < 32);
    753    static const HChar* names[32]
    754       = { "x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",
    755           "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
    756           "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
    757           "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
    758    return names[iregNo];
    759 }
    760 
    761 static const HChar* nameIReg64orSP ( UInt iregNo )
    762 {
    763    if (iregNo == 31) {
    764       return "sp";
    765    }
    766    vassert(iregNo < 31);
    767    return nameIReg64orZR(iregNo);
    768 }
    769 
    770 static IRExpr* getIReg64orSP ( UInt iregNo )
    771 {
    772    vassert(iregNo < 32);
    773    return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
    774 }
    775 
    776 static IRExpr* getIReg64orZR ( UInt iregNo )
    777 {
    778    if (iregNo == 31) {
    779       return mkU64(0);
    780    }
    781    vassert(iregNo < 31);
    782    return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
    783 }
    784 
    785 static void putIReg64orSP ( UInt iregNo, IRExpr* e )
    786 {
    787    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
    788    stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
    789 }
    790 
    791 static void putIReg64orZR ( UInt iregNo, IRExpr* e )
    792 {
    793    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
    794    if (iregNo == 31) {
    795       return;
    796    }
    797    vassert(iregNo < 31);
    798    stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
    799 }
    800 
    801 static const HChar* nameIReg32orZR ( UInt iregNo )
    802 {
    803    vassert(iregNo < 32);
    804    static const HChar* names[32]
    805       = { "w0",  "w1",  "w2",  "w3",  "w4",  "w5",  "w6",  "w7",
    806           "w8",  "w9",  "w10", "w11", "w12", "w13", "w14", "w15",
    807           "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
    808           "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
    809    return names[iregNo];
    810 }
    811 
    812 static const HChar* nameIReg32orSP ( UInt iregNo )
    813 {
    814    if (iregNo == 31) {
    815       return "wsp";
    816    }
    817    vassert(iregNo < 31);
    818    return nameIReg32orZR(iregNo);
    819 }
    820 
    821 static IRExpr* getIReg32orSP ( UInt iregNo )
    822 {
    823    vassert(iregNo < 32);
    824    return unop(Iop_64to32,
    825                IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
    826 }
    827 
    828 static IRExpr* getIReg32orZR ( UInt iregNo )
    829 {
    830    if (iregNo == 31) {
    831       return mkU32(0);
    832    }
    833    vassert(iregNo < 31);
    834    return unop(Iop_64to32,
    835                IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
    836 }
    837 
    838 static void putIReg32orSP ( UInt iregNo, IRExpr* e )
    839 {
    840    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
    841    stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
    842 }
    843 
    844 static void putIReg32orZR ( UInt iregNo, IRExpr* e )
    845 {
    846    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
    847    if (iregNo == 31) {
    848       return;
    849    }
    850    vassert(iregNo < 31);
    851    stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
    852 }
    853 
    854 static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
    855 {
    856    vassert(is64 == True || is64 == False);
    857    return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
    858 }
    859 
    860 static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
    861 {
    862    vassert(is64 == True || is64 == False);
    863    return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
    864 }
    865 
    866 static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
    867 {
    868    vassert(is64 == True || is64 == False);
    869    return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
    870 }
    871 
    872 static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
    873 {
    874    vassert(is64 == True || is64 == False);
    875    if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
    876 }
    877 
    878 static void putPC ( IRExpr* e )
    879 {
    880    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
    881    stmt( IRStmt_Put(OFFB_PC, e) );
    882 }
    883 
    884 
    885 /* ---------------- Vector (Q) registers ---------------- */
    886 
    887 static Int offsetQReg128 ( UInt qregNo )
    888 {
    889    /* We don't care about endianness at this point.  It only becomes
    890       relevant when dealing with sections of these registers.*/
    891    switch (qregNo) {
    892       case 0:  return OFFB_Q0;
    893       case 1:  return OFFB_Q1;
    894       case 2:  return OFFB_Q2;
    895       case 3:  return OFFB_Q3;
    896       case 4:  return OFFB_Q4;
    897       case 5:  return OFFB_Q5;
    898       case 6:  return OFFB_Q6;
    899       case 7:  return OFFB_Q7;
    900       case 8:  return OFFB_Q8;
    901       case 9:  return OFFB_Q9;
    902       case 10: return OFFB_Q10;
    903       case 11: return OFFB_Q11;
    904       case 12: return OFFB_Q12;
    905       case 13: return OFFB_Q13;
    906       case 14: return OFFB_Q14;
    907       case 15: return OFFB_Q15;
    908       case 16: return OFFB_Q16;
    909       case 17: return OFFB_Q17;
    910       case 18: return OFFB_Q18;
    911       case 19: return OFFB_Q19;
    912       case 20: return OFFB_Q20;
    913       case 21: return OFFB_Q21;
    914       case 22: return OFFB_Q22;
    915       case 23: return OFFB_Q23;
    916       case 24: return OFFB_Q24;
    917       case 25: return OFFB_Q25;
    918       case 26: return OFFB_Q26;
    919       case 27: return OFFB_Q27;
    920       case 28: return OFFB_Q28;
    921       case 29: return OFFB_Q29;
    922       case 30: return OFFB_Q30;
    923       case 31: return OFFB_Q31;
    924       default: vassert(0);
    925    }
    926 }
    927 
    928 /* Write to a complete Qreg. */
    929 static void putQReg128 ( UInt qregNo, IRExpr* e )
    930 {
    931    vassert(qregNo < 32);
    932    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
    933    stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
    934 }
    935 
    936 /* Read a complete Qreg. */
    937 static IRExpr* getQReg128 ( UInt qregNo )
    938 {
    939    vassert(qregNo < 32);
    940    return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
    941 }
    942 
    943 /* Produce the IR type for some sub-part of a vector.  For 32- and 64-
    944    bit sub-parts we can choose either integer or float types, and
    945    choose float on the basis that that is the common use case and so
    946    will give least interference with Put-to-Get forwarding later
    947    on. */
    948 static IRType preferredVectorSubTypeFromSize ( UInt szB )
    949 {
    950    switch (szB) {
    951       case 1:  return Ity_I8;
    952       case 2:  return Ity_I16;
    953       case 4:  return Ity_I32; //Ity_F32;
    954       case 8:  return Ity_F64;
    955       case 16: return Ity_V128;
    956       default: vassert(0);
    957    }
    958 }
    959 
    960 /* Find the offset of the laneNo'th lane of type laneTy in the given
    961    Qreg.  Since the host is little-endian, the least significant lane
    962    has the lowest offset. */
    963 static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
    964 {
    965    vassert(!host_is_bigendian);
    966    Int base = offsetQReg128(qregNo);
    967    /* Since the host is little-endian, the least significant lane
    968       will be at the lowest address. */
    969    /* Restrict this to known types, so as to avoid silently accepting
    970       stupid types. */
    971    UInt laneSzB = 0;
    972    switch (laneTy) {
    973       case Ity_I8:                 laneSzB = 1;  break;
    974       case Ity_I16:                laneSzB = 2;  break;
    975       case Ity_F32: case Ity_I32:  laneSzB = 4;  break;
    976       case Ity_F64: case Ity_I64:  laneSzB = 8;  break;
    977       case Ity_V128:               laneSzB = 16; break;
    978       default: break;
    979    }
    980    vassert(laneSzB > 0);
    981    UInt minOff = laneNo * laneSzB;
    982    UInt maxOff = minOff + laneSzB - 1;
    983    vassert(maxOff < 16);
    984    return base + minOff;
    985 }
    986 
    987 /* Put to the least significant lane of a Qreg. */
    988 static void putQRegLO ( UInt qregNo, IRExpr* e )
    989 {
    990    IRType ty  = typeOfIRExpr(irsb->tyenv, e);
    991    Int    off = offsetQRegLane(qregNo, ty, 0);
    992    switch (ty) {
    993       case Ity_I8:  case Ity_I16: case Ity_I32: case Ity_I64:
    994       case Ity_F32: case Ity_F64: case Ity_V128:
    995          break;
    996       default:
    997          vassert(0); // Other cases are probably invalid
    998    }
    999    stmt(IRStmt_Put(off, e));
   1000 }
   1001 
   1002 /* Get from the least significant lane of a Qreg. */
   1003 static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
   1004 {
   1005    Int off = offsetQRegLane(qregNo, ty, 0);
   1006    switch (ty) {
   1007       case Ity_I8:
   1008       case Ity_I16:
   1009       case Ity_I32: case Ity_I64:
   1010       case Ity_F32: case Ity_F64: case Ity_V128:
   1011          break;
   1012       default:
   1013          vassert(0); // Other cases are ATC
   1014    }
   1015    return IRExpr_Get(off, ty);
   1016 }
   1017 
   1018 static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
   1019 {
   1020    static const HChar* namesQ[32]
   1021       = { "q0",  "q1",  "q2",  "q3",  "q4",  "q5",  "q6",  "q7",
   1022           "q8",  "q9",  "q10", "q11", "q12", "q13", "q14", "q15",
   1023           "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
   1024           "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
   1025    static const HChar* namesD[32]
   1026       = { "d0",  "d1",  "d2",  "d3",  "d4",  "d5",  "d6",  "d7",
   1027           "d8",  "d9",  "d10", "d11", "d12", "d13", "d14", "d15",
   1028           "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
   1029           "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
   1030    static const HChar* namesS[32]
   1031       = { "s0",  "s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",
   1032           "s8",  "s9",  "s10", "s11", "s12", "s13", "s14", "s15",
   1033           "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
   1034           "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
   1035    static const HChar* namesH[32]
   1036       = { "h0",  "h1",  "h2",  "h3",  "h4",  "h5",  "h6",  "h7",
   1037           "h8",  "h9",  "h10", "h11", "h12", "h13", "h14", "h15",
   1038           "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
   1039           "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
   1040    static const HChar* namesB[32]
   1041       = { "b0",  "b1",  "b2",  "b3",  "b4",  "b5",  "b6",  "b7",
   1042           "b8",  "b9",  "b10", "b11", "b12", "b13", "b14", "b15",
   1043           "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
   1044           "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
   1045    vassert(qregNo < 32);
   1046    switch (sizeofIRType(laneTy)) {
   1047       case 1:  return namesB[qregNo];
   1048       case 2:  return namesH[qregNo];
   1049       case 4:  return namesS[qregNo];
   1050       case 8:  return namesD[qregNo];
   1051       case 16: return namesQ[qregNo];
   1052       default: vassert(0);
   1053    }
   1054    /*NOTREACHED*/
   1055 }
   1056 
   1057 static const HChar* nameQReg128 ( UInt qregNo )
   1058 {
   1059    return nameQRegLO(qregNo, Ity_V128);
   1060 }
   1061 
   1062 /* Find the offset of the most significant half (8 bytes) of the given
   1063    Qreg.  This requires knowing the endianness of the host. */
   1064 static Int offsetQRegHI64 ( UInt qregNo )
   1065 {
   1066    return offsetQRegLane(qregNo, Ity_I64, 1);
   1067 }
   1068 
   1069 static IRExpr* getQRegHI64 ( UInt qregNo )
   1070 {
   1071    return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
   1072 }
   1073 
   1074 static void putQRegHI64 ( UInt qregNo, IRExpr* e )
   1075 {
   1076    IRType ty  = typeOfIRExpr(irsb->tyenv, e);
   1077    Int    off = offsetQRegHI64(qregNo);
   1078    switch (ty) {
   1079       case Ity_I64: case Ity_F64:
   1080          break;
   1081       default:
   1082          vassert(0); // Other cases are plain wrong
   1083    }
   1084    stmt(IRStmt_Put(off, e));
   1085 }
   1086 
   1087 /* Put to a specified lane of a Qreg. */
   1088 static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
   1089 {
   1090    IRType laneTy  = typeOfIRExpr(irsb->tyenv, e);
   1091    Int    off     = offsetQRegLane(qregNo, laneTy, laneNo);
   1092    switch (laneTy) {
   1093       case Ity_F64: case Ity_I64:
   1094       case Ity_I32: case Ity_F32:
   1095       case Ity_I16:
   1096       case Ity_I8:
   1097          break;
   1098       default:
   1099          vassert(0); // Other cases are ATC
   1100    }
   1101    stmt(IRStmt_Put(off, e));
   1102 }
   1103 
   1104 /* Get from a specified lane of a Qreg. */
   1105 static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
   1106 {
   1107    Int off = offsetQRegLane(qregNo, laneTy, laneNo);
   1108    switch (laneTy) {
   1109       case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
   1110       case Ity_F64:
   1111          break;
   1112       default:
   1113          vassert(0); // Other cases are ATC
   1114    }
   1115    return IRExpr_Get(off, laneTy);
   1116 }
   1117 
   1118 
   1119 //ZZ /* ---------------- Misc registers ---------------- */
   1120 //ZZ
   1121 //ZZ static void putMiscReg32 ( UInt    gsoffset,
   1122 //ZZ                            IRExpr* e, /* :: Ity_I32 */
   1123 //ZZ                            IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
   1124 //ZZ {
   1125 //ZZ    switch (gsoffset) {
   1126 //ZZ       case OFFB_FPSCR:   break;
   1127 //ZZ       case OFFB_QFLAG32: break;
   1128 //ZZ       case OFFB_GEFLAG0: break;
   1129 //ZZ       case OFFB_GEFLAG1: break;
   1130 //ZZ       case OFFB_GEFLAG2: break;
   1131 //ZZ       case OFFB_GEFLAG3: break;
   1132 //ZZ       default: vassert(0); /* awaiting more cases */
   1133 //ZZ    }
   1134 //ZZ    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
   1135 //ZZ
   1136 //ZZ    if (guardT == IRTemp_INVALID) {
   1137 //ZZ       /* unconditional write */
   1138 //ZZ       stmt(IRStmt_Put(gsoffset, e));
   1139 //ZZ    } else {
   1140 //ZZ       stmt(IRStmt_Put(
   1141 //ZZ          gsoffset,
   1142 //ZZ          IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
   1143 //ZZ                      e, IRExpr_Get(gsoffset, Ity_I32) )
   1144 //ZZ       ));
   1145 //ZZ    }
   1146 //ZZ }
   1147 //ZZ
   1148 //ZZ static IRTemp get_ITSTATE ( void )
   1149 //ZZ {
   1150 //ZZ    ASSERT_IS_THUMB;
   1151 //ZZ    IRTemp t = newTemp(Ity_I32);
   1152 //ZZ    assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
   1153 //ZZ    return t;
   1154 //ZZ }
   1155 //ZZ
   1156 //ZZ static void put_ITSTATE ( IRTemp t )
   1157 //ZZ {
   1158 //ZZ    ASSERT_IS_THUMB;
   1159 //ZZ    stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
   1160 //ZZ }
   1161 //ZZ
   1162 //ZZ static IRTemp get_QFLAG32 ( void )
   1163 //ZZ {
   1164 //ZZ    IRTemp t = newTemp(Ity_I32);
   1165 //ZZ    assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
   1166 //ZZ    return t;
   1167 //ZZ }
   1168 //ZZ
   1169 //ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
   1170 //ZZ {
   1171 //ZZ    putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
   1172 //ZZ }
   1173 //ZZ
   1174 //ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
   1175 //ZZ    Status Register) to indicate that overflow or saturation occurred.
   1176 //ZZ    Nb: t must be zero to denote no saturation, and any nonzero
   1177 //ZZ    value to indicate saturation. */
   1178 //ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
   1179 //ZZ {
   1180 //ZZ    IRTemp old = get_QFLAG32();
   1181 //ZZ    IRTemp nyu = newTemp(Ity_I32);
   1182 //ZZ    assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
   1183 //ZZ    put_QFLAG32(nyu, condT);
   1184 //ZZ }
   1185 
   1186 
   1187 /* ---------------- FPCR stuff ---------------- */
   1188 
   1189 /* Generate IR to get hold of the rounding mode bits in FPCR, and
   1190    convert them to IR format.  Bind the final result to the
   1191    returned temp. */
   1192 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
   1193 {
   1194    /* The ARMvfp encoding for rounding mode bits is:
   1195          00  to nearest
   1196          01  to +infinity
   1197          10  to -infinity
   1198          11  to zero
   1199       We need to convert that to the IR encoding:
   1200          00  to nearest (the default)
   1201          10  to +infinity
   1202          01  to -infinity
   1203          11  to zero
   1204       Which can be done by swapping bits 0 and 1.
   1205       The rmode bits are at 23:22 in FPSCR.
   1206    */
   1207    IRTemp armEncd = newTemp(Ity_I32);
   1208    IRTemp swapped = newTemp(Ity_I32);
   1209    /* Fish FPCR[23:22] out, and slide to bottom.  Doesn't matter that
   1210       we don't zero out bits 24 and above, since the assignment to
   1211       'swapped' will mask them out anyway. */
   1212    assign(armEncd,
   1213           binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
   1214    /* Now swap them. */
   1215    assign(swapped,
   1216           binop(Iop_Or32,
   1217                 binop(Iop_And32,
   1218                       binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
   1219                       mkU32(2)),
   1220                 binop(Iop_And32,
   1221                       binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
   1222                       mkU32(1))
   1223          ));
   1224    return swapped;
   1225 }
   1226 
   1227 
   1228 /*------------------------------------------------------------*/
   1229 /*--- Helpers for flag handling and conditional insns      ---*/
   1230 /*------------------------------------------------------------*/
   1231 
   1232 static const HChar* nameARM64Condcode ( ARM64Condcode cond )
   1233 {
   1234    switch (cond) {
   1235       case ARM64CondEQ:  return "eq";
   1236       case ARM64CondNE:  return "ne";
   1237       case ARM64CondCS:  return "cs";  // or 'hs'
   1238       case ARM64CondCC:  return "cc";  // or 'lo'
   1239       case ARM64CondMI:  return "mi";
   1240       case ARM64CondPL:  return "pl";
   1241       case ARM64CondVS:  return "vs";
   1242       case ARM64CondVC:  return "vc";
   1243       case ARM64CondHI:  return "hi";
   1244       case ARM64CondLS:  return "ls";
   1245       case ARM64CondGE:  return "ge";
   1246       case ARM64CondLT:  return "lt";
   1247       case ARM64CondGT:  return "gt";
   1248       case ARM64CondLE:  return "le";
   1249       case ARM64CondAL:  return "al";
   1250       case ARM64CondNV:  return "nv";
   1251       default: vpanic("name_ARM64Condcode");
   1252    }
   1253 }
   1254 
   1255 /* and a handy shorthand for it */
   1256 static const HChar* nameCC ( ARM64Condcode cond ) {
   1257    return nameARM64Condcode(cond);
   1258 }
   1259 
   1260 
   1261 /* Build IR to calculate some particular condition from stored
   1262    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
   1263    Ity_I64, suitable for narrowing.  Although the return type is
   1264    Ity_I64, the returned value is either 0 or 1.  'cond' must be
   1265    :: Ity_I64 and must denote the condition to compute in
   1266    bits 7:4, and be zero everywhere else.
   1267 */
   1268 static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
   1269 {
   1270    vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
   1271    /* And 'cond' had better produce a value in which only bits 7:4 are
   1272       nonzero.  However, obviously we can't assert for that. */
   1273 
   1274    /* So what we're constructing for the first argument is
   1275       "(cond << 4) | stored-operation".
   1276       However, as per comments above, 'cond' must be supplied
   1277       pre-shifted to this function.
   1278 
   1279       This pairing scheme requires that the ARM64_CC_OP_ values all fit
   1280       in 4 bits.  Hence we are passing a (COND, OP) pair in the lowest
   1281       8 bits of the first argument. */
   1282    IRExpr** args
   1283       = mkIRExprVec_4(
   1284            binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
   1285            IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
   1286            IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
   1287            IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
   1288         );
   1289    IRExpr* call
   1290       = mkIRExprCCall(
   1291            Ity_I64,
   1292            0/*regparm*/,
   1293            "arm64g_calculate_condition", &arm64g_calculate_condition,
   1294            args
   1295         );
   1296 
   1297    /* Exclude the requested condition, OP and NDEP from definedness
   1298       checking.  We're only interested in DEP1 and DEP2. */
   1299    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1300    return call;
   1301 }
   1302 
   1303 
   1304 /* Build IR to calculate some particular condition from stored
   1305    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
   1306    Ity_I64, suitable for narrowing.  Although the return type is
   1307    Ity_I64, the returned value is either 0 or 1.
   1308 */
   1309 static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
   1310 {
   1311   /* First arg is "(cond << 4) | condition".  This requires that the
   1312      ARM64_CC_OP_ values all fit in 4 bits.  Hence we are passing a
   1313      (COND, OP) pair in the lowest 8 bits of the first argument. */
   1314    vassert(cond >= 0 && cond <= 15);
   1315    return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
   1316 }
   1317 
   1318 
   1319 /* Build IR to calculate just the carry flag from stored
   1320    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
   1321    Ity_I64. */
   1322 static IRExpr* mk_arm64g_calculate_flag_c ( void )
   1323 {
   1324    IRExpr** args
   1325       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I64),
   1326                        IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
   1327                        IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
   1328                        IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
   1329    IRExpr* call
   1330       = mkIRExprCCall(
   1331            Ity_I64,
   1332            0/*regparm*/,
   1333            "arm64g_calculate_flag_c", &arm64g_calculate_flag_c,
   1334            args
   1335         );
   1336    /* Exclude OP and NDEP from definedness checking.  We're only
   1337       interested in DEP1 and DEP2. */
   1338    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1339    return call;
   1340 }
   1341 
   1342 
   1343 //ZZ /* Build IR to calculate just the overflow flag from stored
   1344 //ZZ    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
   1345 //ZZ    Ity_I32. */
   1346 //ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
   1347 //ZZ {
   1348 //ZZ    IRExpr** args
   1349 //ZZ       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
   1350 //ZZ                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1351 //ZZ                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1352 //ZZ                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
   1353 //ZZ    IRExpr* call
   1354 //ZZ       = mkIRExprCCall(
   1355 //ZZ            Ity_I32,
   1356 //ZZ            0/*regparm*/,
   1357 //ZZ            "armg_calculate_flag_v", &armg_calculate_flag_v,
   1358 //ZZ            args
   1359 //ZZ         );
   1360 //ZZ    /* Exclude OP and NDEP from definedness checking.  We're only
   1361 //ZZ       interested in DEP1 and DEP2. */
   1362 //ZZ    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1363 //ZZ    return call;
   1364 //ZZ }
   1365 
   1366 
   1367 /* Build IR to calculate N Z C V in bits 31:28 of the
   1368    returned word. */
   1369 static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
   1370 {
   1371    IRExpr** args
   1372       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I64),
   1373                        IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
   1374                        IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
   1375                        IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
   1376    IRExpr* call
   1377       = mkIRExprCCall(
   1378            Ity_I64,
   1379            0/*regparm*/,
   1380            "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
   1381            args
   1382         );
   1383    /* Exclude OP and NDEP from definedness checking.  We're only
   1384       interested in DEP1 and DEP2. */
   1385    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1386    return call;
   1387 }
   1388 
   1389 
   1390 /* Build IR to set the flags thunk, in the most general case. */
   1391 static
   1392 void setFlags_D1_D2_ND ( UInt cc_op,
   1393                          IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
   1394 {
   1395    vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
   1396    vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
   1397    vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
   1398    vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
   1399    stmt( IRStmt_Put( OFFB_CC_OP,   mkU64(cc_op) ));
   1400    stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
   1401    stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
   1402    stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
   1403 }
   1404 
   1405 /* Build IR to set the flags thunk after ADD or SUB. */
   1406 static
   1407 void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
   1408 {
   1409    IRTemp argL64 = IRTemp_INVALID;
   1410    IRTemp argR64 = IRTemp_INVALID;
   1411    IRTemp z64    = newTemp(Ity_I64);
   1412    if (is64) {
   1413       argL64 = argL;
   1414       argR64 = argR;
   1415    } else {
   1416       argL64 = newTemp(Ity_I64);
   1417       argR64 = newTemp(Ity_I64);
   1418       assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
   1419       assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
   1420    }
   1421    assign(z64, mkU64(0));
   1422    UInt cc_op = ARM64G_CC_OP_NUMBER;
   1423    /**/ if ( isSUB &&  is64) { cc_op = ARM64G_CC_OP_SUB64; }
   1424    else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
   1425    else if (!isSUB &&  is64) { cc_op = ARM64G_CC_OP_ADD64; }
   1426    else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
   1427    else                      { vassert(0); }
   1428    setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
   1429 }
   1430 
   1431 static
   1432 void setFlags_ADC_SBC(Bool is64, Bool isSBC, IRTemp argL, IRTemp argR, IRTemp oldC)
   1433 {
   1434    IRTemp argL64 = IRTemp_INVALID;
   1435    IRTemp argR64 = IRTemp_INVALID;
   1436    IRTemp oldC64 = IRTemp_INVALID;
   1437    if (is64) {
   1438       argL64 = argL;
   1439       argR64 = argR;
   1440       oldC64 = oldC;
   1441    } else {
   1442       argL64 = newTemp(Ity_I64);
   1443       argR64 = newTemp(Ity_I64);
   1444       oldC64 = newTemp(Ity_I64);
   1445       assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
   1446       assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
   1447       assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC)));
   1448    }
   1449    UInt cc_op = ARM64G_CC_OP_NUMBER;
   1450    /**/ if ( isSBC &&  is64) { cc_op = ARM64G_CC_OP_SBC64; }
   1451    else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; }
   1452    else if (!isSBC &&  is64) { cc_op = ARM64G_CC_OP_ADC64; }
   1453    else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; }
   1454    else                      { vassert(0); }
   1455    setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64);
   1456 }
   1457 
   1458 /* Build IR to set the flags thunk after ADD or SUB, if the given
   1459    condition evaluates to True at run time.  If not, the flags are set
   1460    to the specified NZCV value. */
   1461 static
   1462 void setFlags_ADD_SUB_conditionally (
   1463         Bool is64, Bool isSUB,
   1464         IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
   1465      )
   1466 {
   1467    /* Generate IR as follows:
   1468         CC_OP   = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
   1469         CC_DEP1 = ITE(cond, argL64, nzcv << 28)
   1470         CC_DEP2 = ITE(cond, argR64, 0)
   1471         CC_NDEP = 0
   1472    */
   1473 
   1474    IRTemp z64 = newTemp(Ity_I64);
   1475    assign(z64, mkU64(0));
   1476 
   1477    /* Establish the operation and operands for the True case. */
   1478    IRTemp t_dep1 = IRTemp_INVALID;
   1479    IRTemp t_dep2 = IRTemp_INVALID;
   1480    UInt   t_op   = ARM64G_CC_OP_NUMBER;
   1481    /**/ if ( isSUB &&  is64) { t_op = ARM64G_CC_OP_SUB64; }
   1482    else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
   1483    else if (!isSUB &&  is64) { t_op = ARM64G_CC_OP_ADD64; }
   1484    else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
   1485    else                      { vassert(0); }
   1486    /* */
   1487    if (is64) {
   1488       t_dep1 = argL;
   1489       t_dep2 = argR;
   1490    } else {
   1491       t_dep1 = newTemp(Ity_I64);
   1492       t_dep2 = newTemp(Ity_I64);
   1493       assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
   1494       assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
   1495    }
   1496 
   1497    /* Establish the operation and operands for the False case. */
   1498    IRTemp f_dep1 = newTemp(Ity_I64);
   1499    IRTemp f_dep2 = z64;
   1500    UInt   f_op   = ARM64G_CC_OP_COPY;
   1501    assign(f_dep1, mkU64(nzcv << 28));
   1502 
   1503    /* Final thunk values */
   1504    IRTemp dep1 = newTemp(Ity_I64);
   1505    IRTemp dep2 = newTemp(Ity_I64);
   1506    IRTemp op   = newTemp(Ity_I64);
   1507 
   1508    assign(op,   IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
   1509    assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
   1510    assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
   1511 
   1512    /* finally .. */
   1513    stmt( IRStmt_Put( OFFB_CC_OP,   mkexpr(op) ));
   1514    stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
   1515    stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
   1516    stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
   1517 }
   1518 
   1519 /* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
   1520 static
   1521 void setFlags_LOGIC ( Bool is64, IRTemp res )
   1522 {
   1523    IRTemp res64 = IRTemp_INVALID;
   1524    IRTemp z64   = newTemp(Ity_I64);
   1525    UInt   cc_op = ARM64G_CC_OP_NUMBER;
   1526    if (is64) {
   1527       res64 = res;
   1528       cc_op = ARM64G_CC_OP_LOGIC64;
   1529    } else {
   1530       res64 = newTemp(Ity_I64);
   1531       assign(res64, unop(Iop_32Uto64, mkexpr(res)));
   1532       cc_op = ARM64G_CC_OP_LOGIC32;
   1533    }
   1534    assign(z64, mkU64(0));
   1535    setFlags_D1_D2_ND(cc_op, res64, z64, z64);
   1536 }
   1537 
   1538 /* Build IR to set the flags thunk to a given NZCV value.  NZCV is
   1539    located in bits 31:28 of the supplied value. */
   1540 static
   1541 void setFlags_COPY ( IRTemp nzcv_28x0 )
   1542 {
   1543    IRTemp z64 = newTemp(Ity_I64);
   1544    assign(z64, mkU64(0));
   1545    setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
   1546 }
   1547 
   1548 
   1549 //ZZ /* Minor variant of the above that sets NDEP to zero (if it
   1550 //ZZ    sets it at all) */
   1551 //ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
   1552 //ZZ                              IRTemp t_dep2,
   1553 //ZZ                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   1554 //ZZ {
   1555 //ZZ    IRTemp z32 = newTemp(Ity_I32);
   1556 //ZZ    assign( z32, mkU32(0) );
   1557 //ZZ    setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
   1558 //ZZ }
   1559 //ZZ
   1560 //ZZ
   1561 //ZZ /* Minor variant of the above that sets DEP2 to zero (if it
   1562 //ZZ    sets it at all) */
   1563 //ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
   1564 //ZZ                              IRTemp t_ndep,
   1565 //ZZ                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   1566 //ZZ {
   1567 //ZZ    IRTemp z32 = newTemp(Ity_I32);
   1568 //ZZ    assign( z32, mkU32(0) );
   1569 //ZZ    setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
   1570 //ZZ }
   1571 //ZZ
   1572 //ZZ
   1573 //ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
   1574 //ZZ    sets them at all) */
   1575 //ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
   1576 //ZZ                           IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   1577 //ZZ {
   1578 //ZZ    IRTemp z32 = newTemp(Ity_I32);
   1579 //ZZ    assign( z32, mkU32(0) );
   1580 //ZZ    setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
   1581 //ZZ }
   1582 
   1583 
   1584 /*------------------------------------------------------------*/
   1585 /*--- Misc math helpers                                    ---*/
   1586 /*------------------------------------------------------------*/
   1587 
   1588 /* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
   1589 static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
   1590 {
   1591    IRTemp maskT = newTemp(Ity_I64);
   1592    IRTemp res   = newTemp(Ity_I64);
   1593    vassert(sh >= 1 && sh <= 63);
   1594    assign(maskT, mkU64(mask));
   1595    assign( res,
   1596            binop(Iop_Or64,
   1597                  binop(Iop_Shr64,
   1598                        binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
   1599                        mkU8(sh)),
   1600                  binop(Iop_And64,
   1601                        binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
   1602                        mkexpr(maskT))
   1603                  )
   1604            );
   1605    return res;
   1606 }
   1607 
   1608 /* Generates byte swaps within 32-bit lanes. */
   1609 static IRTemp math_UINTSWAP64 ( IRTemp src )
   1610 {
   1611    IRTemp res;
   1612    res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
   1613    res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
   1614    return res;
   1615 }
   1616 
   1617 /* Generates byte swaps within 16-bit lanes. */
   1618 static IRTemp math_USHORTSWAP64 ( IRTemp src )
   1619 {
   1620    IRTemp res;
   1621    res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
   1622    return res;
   1623 }
   1624 
   1625 /* Generates a 64-bit byte swap. */
   1626 static IRTemp math_BYTESWAP64 ( IRTemp src )
   1627 {
   1628    IRTemp res;
   1629    res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
   1630    res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
   1631    res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
   1632    return res;
   1633 }
   1634 
   1635 /* Generates a 64-bit bit swap. */
   1636 static IRTemp math_BITSWAP64 ( IRTemp src )
   1637 {
   1638    IRTemp res;
   1639    res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
   1640    res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
   1641    res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
   1642    return math_BYTESWAP64(res);
   1643 }
   1644 
   1645 /* Duplicates the bits at the bottom of the given word to fill the
   1646    whole word.  src :: Ity_I64 is assumed to have zeroes everywhere
   1647    except for the bottom bits. */
   1648 static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
   1649 {
   1650    if (srcTy == Ity_I8) {
   1651       IRTemp t16 = newTemp(Ity_I64);
   1652       assign(t16, binop(Iop_Or64, mkexpr(src),
   1653                                   binop(Iop_Shl64, mkexpr(src), mkU8(8))));
   1654       IRTemp t32 = newTemp(Ity_I64);
   1655       assign(t32, binop(Iop_Or64, mkexpr(t16),
   1656                                   binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
   1657       IRTemp t64 = newTemp(Ity_I64);
   1658       assign(t64, binop(Iop_Or64, mkexpr(t32),
   1659                                   binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
   1660       return t64;
   1661    }
   1662    if (srcTy == Ity_I16) {
   1663       IRTemp t32 = newTemp(Ity_I64);
   1664       assign(t32, binop(Iop_Or64, mkexpr(src),
   1665                                   binop(Iop_Shl64, mkexpr(src), mkU8(16))));
   1666       IRTemp t64 = newTemp(Ity_I64);
   1667       assign(t64, binop(Iop_Or64, mkexpr(t32),
   1668                                   binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
   1669       return t64;
   1670    }
   1671    if (srcTy == Ity_I32) {
   1672       IRTemp t64 = newTemp(Ity_I64);
   1673       assign(t64, binop(Iop_Or64, mkexpr(src),
   1674                                   binop(Iop_Shl64, mkexpr(src), mkU8(32))));
   1675       return t64;
   1676    }
   1677    if (srcTy == Ity_I64) {
   1678       return src;
   1679    }
   1680    vassert(0);
   1681 }
   1682 
   1683 
   1684 /*------------------------------------------------------------*/
   1685 /*--- FP comparison helpers                                ---*/
   1686 /*------------------------------------------------------------*/
   1687 
   1688 /* irRes :: Ity_I32 holds a floating point comparison result encoded
   1689    as an IRCmpF64Result.  Generate code to convert it to an
   1690    ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
   1691    Assign a new temp to hold that value, and return the temp. */
   1692 static
   1693 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
   1694 {
   1695    IRTemp ix       = newTemp(Ity_I64);
   1696    IRTemp termL    = newTemp(Ity_I64);
   1697    IRTemp termR    = newTemp(Ity_I64);
   1698    IRTemp nzcv     = newTemp(Ity_I64);
   1699    IRTemp irRes    = newTemp(Ity_I64);
   1700 
   1701    /* This is where the fun starts.  We have to convert 'irRes' from
   1702       an IR-convention return result (IRCmpF64Result) to an
   1703       ARM-encoded (N,Z,C,V) group.  The final result is in the bottom
   1704       4 bits of 'nzcv'. */
   1705    /* Map compare result from IR to ARM(nzcv) */
   1706    /*
   1707       FP cmp result | IR   | ARM(nzcv)
   1708       --------------------------------
   1709       UN              0x45   0011
   1710       LT              0x01   1000
   1711       GT              0x00   0010
   1712       EQ              0x40   0110
   1713    */
   1714    /* Now since you're probably wondering WTF ..
   1715 
   1716       ix fishes the useful bits out of the IR value, bits 6 and 0, and
   1717       places them side by side, giving a number which is 0, 1, 2 or 3.
   1718 
   1719       termL is a sequence cooked up by GNU superopt.  It converts ix
   1720          into an almost correct value NZCV value (incredibly), except
   1721          for the case of UN, where it produces 0100 instead of the
   1722          required 0011.
   1723 
   1724       termR is therefore a correction term, also computed from ix.  It
   1725          is 1 in the UN case and 0 for LT, GT and UN.  Hence, to get
   1726          the final correct value, we subtract termR from termL.
   1727 
   1728       Don't take my word for it.  There's a test program at the bottom
   1729       of guest_arm_toIR.c, to try this out with.
   1730    */
   1731    assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
   1732 
   1733    assign(
   1734       ix,
   1735       binop(Iop_Or64,
   1736             binop(Iop_And64,
   1737                   binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
   1738                   mkU64(3)),
   1739             binop(Iop_And64, mkexpr(irRes), mkU64(1))));
   1740 
   1741    assign(
   1742       termL,
   1743       binop(Iop_Add64,
   1744             binop(Iop_Shr64,
   1745                   binop(Iop_Sub64,
   1746                         binop(Iop_Shl64,
   1747                               binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
   1748                               mkU8(62)),
   1749                         mkU64(1)),
   1750                   mkU8(61)),
   1751             mkU64(1)));
   1752 
   1753    assign(
   1754       termR,
   1755       binop(Iop_And64,
   1756             binop(Iop_And64,
   1757                   mkexpr(ix),
   1758                   binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
   1759             mkU64(1)));
   1760 
   1761    assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
   1762    return nzcv;
   1763 }
   1764 
   1765 
   1766 /*------------------------------------------------------------*/
   1767 /*--- Data processing (immediate)                          ---*/
   1768 /*------------------------------------------------------------*/
   1769 
   1770 /* Helper functions for supporting "DecodeBitMasks" */
   1771 
   1772 static ULong dbm_ROR ( Int width, ULong x, Int rot )
   1773 {
   1774    vassert(width > 0 && width <= 64);
   1775    vassert(rot >= 0 && rot < width);
   1776    if (rot == 0) return x;
   1777    ULong res = x >> rot;
   1778    res |= (x << (width - rot));
   1779    if (width < 64)
   1780      res &= ((1ULL << width) - 1);
   1781    return res;
   1782 }
   1783 
   1784 static ULong dbm_RepTo64( Int esize, ULong x )
   1785 {
   1786    switch (esize) {
   1787       case 64:
   1788          return x;
   1789       case 32:
   1790          x &= 0xFFFFFFFF; x |= (x << 32);
   1791          return x;
   1792       case 16:
   1793          x &= 0xFFFF; x |= (x << 16); x |= (x << 32);
   1794          return x;
   1795       case 8:
   1796          x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32);
   1797          return x;
   1798       case 4:
   1799          x &= 0xF; x |= (x << 4); x |= (x << 8);
   1800          x |= (x << 16); x |= (x << 32);
   1801          return x;
   1802       case 2:
   1803          x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8);
   1804          x |= (x << 16); x |= (x << 32);
   1805          return x;
   1806       default:
   1807          break;
   1808    }
   1809    vpanic("dbm_RepTo64");
   1810    /*NOTREACHED*/
   1811    return 0;
   1812 }
   1813 
   1814 static Int dbm_highestSetBit ( ULong x )
   1815 {
   1816    Int i;
   1817    for (i = 63; i >= 0; i--) {
   1818       if (x & (1ULL << i))
   1819          return i;
   1820    }
   1821    vassert(x == 0);
   1822    return -1;
   1823 }
   1824 
   1825 static
   1826 Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask,
   1827                           ULong immN, ULong imms, ULong immr, Bool immediate,
   1828                           UInt M /*32 or 64*/)
   1829 {
   1830    vassert(immN < (1ULL << 1));
   1831    vassert(imms < (1ULL << 6));
   1832    vassert(immr < (1ULL << 6));
   1833    vassert(immediate == False || immediate == True);
   1834    vassert(M == 32 || M == 64);
   1835 
   1836    Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) );
   1837    if (len < 1) { /* printf("fail1\n"); */ return False; }
   1838    vassert(len <= 6);
   1839    vassert(M >= (1 << len));
   1840 
   1841    vassert(len >= 1 && len <= 6);
   1842    ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len);
   1843                   (1 << len) - 1;
   1844    vassert(levels >= 1 && levels <= 63);
   1845 
   1846    if (immediate && ((imms & levels) == levels)) {
   1847       /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
   1848       return False;
   1849    }
   1850 
   1851    ULong S = imms & levels;
   1852    ULong R = immr & levels;
   1853    Int   diff = S - R;
   1854    diff &= 63;
   1855    Int esize = 1 << len;
   1856    vassert(2 <= esize && esize <= 64);
   1857 
   1858    /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
   1859       same below with d.  S can be 63 in which case we have an out of
   1860       range and hence undefined shift. */
   1861    vassert(S >= 0 && S <= 63);
   1862    vassert(esize >= (S+1));
   1863    ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
   1864                   //(1ULL << (S+1)) - 1;
   1865                   ((1ULL << S) - 1) + (1ULL << S);
   1866 
   1867    Int d = // diff<len-1:0>
   1868            diff & ((1 << len)-1);
   1869    vassert(esize >= (d+1));
   1870    vassert(d >= 0 && d <= 63);
   1871 
   1872    ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
   1873                   //(1ULL << (d+1)) - 1;
   1874                   ((1ULL << d) - 1) + (1ULL << d);
   1875 
   1876    if (esize != 64) vassert(elem_s < (1ULL << esize));
   1877    if (esize != 64) vassert(elem_d < (1ULL << esize));
   1878 
   1879    if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
   1880    if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
   1881 
   1882    return True;
   1883 }
   1884 
   1885 
   1886 static
   1887 Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres,
   1888                                          UInt insn)
   1889 {
   1890 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   1891 
   1892    /* insn[28:23]
   1893       10000x PC-rel addressing
   1894       10001x Add/subtract (immediate)
   1895       100100 Logical (immediate)
   1896       100101 Move Wide (immediate)
   1897       100110 Bitfield
   1898       100111 Extract
   1899    */
   1900 
   1901    /* ------------------ ADD/SUB{,S} imm12 ------------------ */
   1902    if (INSN(28,24) == BITS5(1,0,0,0,1)) {
   1903       Bool is64   = INSN(31,31) == 1;
   1904       Bool isSub  = INSN(30,30) == 1;
   1905       Bool setCC  = INSN(29,29) == 1;
   1906       UInt sh     = INSN(23,22);
   1907       UInt uimm12 = INSN(21,10);
   1908       UInt nn     = INSN(9,5);
   1909       UInt dd     = INSN(4,0);
   1910       const HChar* nm = isSub ? "sub" : "add";
   1911       if (sh >= 2) {
   1912          /* Invalid; fall through */
   1913       } else {
   1914          vassert(sh <= 1);
   1915          uimm12 <<= (12 * sh);
   1916          if (is64) {
   1917             IRTemp argL  = newTemp(Ity_I64);
   1918             IRTemp argR  = newTemp(Ity_I64);
   1919             IRTemp res   = newTemp(Ity_I64);
   1920             assign(argL, getIReg64orSP(nn));
   1921             assign(argR, mkU64(uimm12));
   1922             assign(res,  binop(isSub ? Iop_Sub64 : Iop_Add64,
   1923                                mkexpr(argL), mkexpr(argR)));
   1924             if (setCC) {
   1925                putIReg64orZR(dd, mkexpr(res));
   1926                setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
   1927                DIP("%ss %s, %s, 0x%x\n",
   1928                    nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
   1929             } else {
   1930                putIReg64orSP(dd, mkexpr(res));
   1931                DIP("%s %s, %s, 0x%x\n",
   1932                    nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
   1933             }
   1934          } else {
   1935             IRTemp argL  = newTemp(Ity_I32);
   1936             IRTemp argR  = newTemp(Ity_I32);
   1937             IRTemp res   = newTemp(Ity_I32);
   1938             assign(argL, getIReg32orSP(nn));
   1939             assign(argR, mkU32(uimm12));
   1940             assign(res,  binop(isSub ? Iop_Sub32 : Iop_Add32,
   1941                                mkexpr(argL), mkexpr(argR)));
   1942             if (setCC) {
   1943                putIReg32orZR(dd, mkexpr(res));
   1944                setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR);
   1945                DIP("%ss %s, %s, 0x%x\n",
   1946                    nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
   1947             } else {
   1948                putIReg32orSP(dd, mkexpr(res));
   1949                DIP("%s %s, %s, 0x%x\n",
   1950                    nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
   1951             }
   1952          }
   1953          return True;
   1954       }
   1955    }
   1956 
   1957    /* -------------------- ADR/ADRP -------------------- */
   1958    if (INSN(28,24) == BITS5(1,0,0,0,0)) {
   1959       UInt  bP    = INSN(31,31);
   1960       UInt  immLo = INSN(30,29);
   1961       UInt  immHi = INSN(23,5);
   1962       UInt  rD    = INSN(4,0);
   1963       ULong uimm  = (immHi << 2) | immLo;
   1964       ULong simm  = sx_to_64(uimm, 21);
   1965       ULong val;
   1966       if (bP) {
   1967          val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
   1968       } else {
   1969          val = guest_PC_curr_instr + simm;
   1970       }
   1971       putIReg64orZR(rD, mkU64(val));
   1972       DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
   1973       return True;
   1974    }
   1975 
   1976    /* -------------------- LOGIC(imm) -------------------- */
   1977    if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
   1978       /* 31 30 28     22 21   15   9  4
   1979          sf op 100100 N  immr imms Rn Rd
   1980            op=00: AND  Rd|SP, Rn, #imm
   1981            op=01: ORR  Rd|SP, Rn, #imm
   1982            op=10: EOR  Rd|SP, Rn, #imm
   1983            op=11: ANDS Rd|ZR, Rn, #imm
   1984       */
   1985       Bool  is64 = INSN(31,31) == 1;
   1986       UInt  op   = INSN(30,29);
   1987       UInt  N    = INSN(22,22);
   1988       UInt  immR = INSN(21,16);
   1989       UInt  immS = INSN(15,10);
   1990       UInt  nn   = INSN(9,5);
   1991       UInt  dd   = INSN(4,0);
   1992       ULong imm  = 0;
   1993       Bool  ok;
   1994       if (N == 1 && !is64)
   1995          goto after_logic_imm; /* not allowed; fall through */
   1996       ok = dbm_DecodeBitMasks(&imm, NULL,
   1997                               N, immS, immR, True, is64 ? 64 : 32);
   1998       if (!ok)
   1999          goto after_logic_imm;
   2000 
   2001       const HChar* names[4] = { "and", "orr", "eor", "ands" };
   2002       const IROp   ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
   2003       const IROp   ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
   2004 
   2005       vassert(op < 4);
   2006       if (is64) {
   2007          IRExpr* argL = getIReg64orZR(nn);
   2008          IRExpr* argR = mkU64(imm);
   2009          IRTemp  res  = newTemp(Ity_I64);
   2010          assign(res, binop(ops64[op], argL, argR));
   2011          if (op < 3) {
   2012             putIReg64orSP(dd, mkexpr(res));
   2013             DIP("%s %s, %s, 0x%llx\n", names[op],
   2014                 nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
   2015          } else {
   2016             putIReg64orZR(dd, mkexpr(res));
   2017             setFlags_LOGIC(True/*is64*/, res);
   2018             DIP("%s %s, %s, 0x%llx\n", names[op],
   2019                 nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
   2020          }
   2021       } else {
   2022          IRExpr* argL = getIReg32orZR(nn);
   2023          IRExpr* argR = mkU32((UInt)imm);
   2024          IRTemp  res  = newTemp(Ity_I32);
   2025          assign(res, binop(ops32[op], argL, argR));
   2026          if (op < 3) {
   2027             putIReg32orSP(dd, mkexpr(res));
   2028             DIP("%s %s, %s, 0x%x\n", names[op],
   2029                 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
   2030          } else {
   2031             putIReg32orZR(dd, mkexpr(res));
   2032             setFlags_LOGIC(False/*!is64*/, res);
   2033             DIP("%s %s, %s, 0x%x\n", names[op],
   2034                 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
   2035          }
   2036       }
   2037       return True;
   2038    }
   2039    after_logic_imm:
   2040 
   2041    /* -------------------- MOV{Z,N,K} -------------------- */
   2042    if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
   2043       /* 31 30 28      22 20    4
   2044          |  |  |       |  |     |
   2045          sf 10 100 101 hw imm16 Rd   MOV(Z) Rd, (imm16 << (16*hw))
   2046          sf 00 100 101 hw imm16 Rd   MOV(N) Rd, ~(imm16 << (16*hw))
   2047          sf 11 100 101 hw imm16 Rd   MOV(K) Rd, (imm16 << (16*hw))
   2048       */
   2049       Bool is64   = INSN(31,31) == 1;
   2050       UInt subopc = INSN(30,29);
   2051       UInt hw     = INSN(22,21);
   2052       UInt imm16  = INSN(20,5);
   2053       UInt dd     = INSN(4,0);
   2054       if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) {
   2055          /* invalid; fall through */
   2056       } else {
   2057          ULong imm64 = ((ULong)imm16) << (16 * hw);
   2058          if (!is64)
   2059             vassert(imm64 < 0x100000000ULL);
   2060          switch (subopc) {
   2061             case BITS2(1,0): // MOVZ
   2062                putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
   2063                DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
   2064                break;
   2065             case BITS2(0,0): // MOVN
   2066                imm64 = ~imm64;
   2067                if (!is64)
   2068                   imm64 &= 0xFFFFFFFFULL;
   2069                putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
   2070                DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
   2071                break;
   2072             case BITS2(1,1): // MOVK
   2073                /* This is more complex.  We are inserting a slice into
   2074                   the destination register, so we need to have the old
   2075                   value of it. */
   2076                if (is64) {
   2077                   IRTemp old = newTemp(Ity_I64);
   2078                   assign(old, getIReg64orZR(dd));
   2079                   ULong mask = 0xFFFFULL << (16 * hw);
   2080                   IRExpr* res
   2081                      = binop(Iop_Or64,
   2082                              binop(Iop_And64, mkexpr(old), mkU64(~mask)),
   2083                              mkU64(imm64));
   2084                   putIReg64orZR(dd, res);
   2085                   DIP("movk %s, 0x%x, lsl %u\n",
   2086                       nameIReg64orZR(dd), imm16, 16*hw);
   2087                } else {
   2088                   IRTemp old = newTemp(Ity_I32);
   2089                   assign(old, getIReg32orZR(dd));
   2090                   vassert(hw <= 1);
   2091                   UInt mask = 0xFFFF << (16 * hw);
   2092                   IRExpr* res
   2093                      = binop(Iop_Or32,
   2094                              binop(Iop_And32, mkexpr(old), mkU32(~mask)),
   2095                              mkU32((UInt)imm64));
   2096                   putIReg32orZR(dd, res);
   2097                   DIP("movk %s, 0x%x, lsl %u\n",
   2098                       nameIReg32orZR(dd), imm16, 16*hw);
   2099                }
   2100                break;
   2101             default:
   2102                vassert(0);
   2103          }
   2104          return True;
   2105       }
   2106    }
   2107 
   2108    /* -------------------- {U,S,}BFM -------------------- */
   2109    /*    30 28     22 21   15   9  4
   2110 
   2111       sf 10 100110 N  immr imms nn dd
   2112          UBFM Wd, Wn, #immr, #imms   when sf=0, N=0, immr[5]=0, imms[5]=0
   2113          UBFM Xd, Xn, #immr, #imms   when sf=1, N=1
   2114 
   2115       sf 00 100110 N  immr imms nn dd
   2116          SBFM Wd, Wn, #immr, #imms   when sf=0, N=0, immr[5]=0, imms[5]=0
   2117          SBFM Xd, Xn, #immr, #imms   when sf=1, N=1
   2118 
   2119       sf 01 100110 N  immr imms nn dd
   2120          BFM Wd, Wn, #immr, #imms   when sf=0, N=0, immr[5]=0, imms[5]=0
   2121          BFM Xd, Xn, #immr, #imms   when sf=1, N=1
   2122    */
   2123    if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
   2124       UInt sf     = INSN(31,31);
   2125       UInt opc    = INSN(30,29);
   2126       UInt N      = INSN(22,22);
   2127       UInt immR   = INSN(21,16);
   2128       UInt immS   = INSN(15,10);
   2129       UInt nn     = INSN(9,5);
   2130       UInt dd     = INSN(4,0);
   2131       Bool inZero = False;
   2132       Bool extend = False;
   2133       const HChar* nm = "???";
   2134       /* skip invalid combinations */
   2135       switch (opc) {
   2136          case BITS2(0,0):
   2137             inZero = True; extend = True; nm = "sbfm"; break;
   2138          case BITS2(0,1):
   2139             inZero = False; extend = False; nm = "bfm"; break;
   2140          case BITS2(1,0):
   2141             inZero = True; extend = False; nm = "ubfm"; break;
   2142          case BITS2(1,1):
   2143             goto after_bfm; /* invalid */
   2144          default:
   2145             vassert(0);
   2146       }
   2147       if (sf == 1 && N != 1) goto after_bfm;
   2148       if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0
   2149                              || ((immS >> 5) & 1) != 0)) goto after_bfm;
   2150       ULong wmask = 0, tmask = 0;
   2151       Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
   2152                                    N, immS, immR, False, sf == 1 ? 64 : 32);
   2153       if (!ok) goto after_bfm; /* hmmm */
   2154 
   2155       Bool   is64 = sf == 1;
   2156       IRType ty   = is64 ? Ity_I64 : Ity_I32;
   2157 
   2158       IRTemp dst = newTemp(ty);
   2159       IRTemp src = newTemp(ty);
   2160       IRTemp bot = newTemp(ty);
   2161       IRTemp top = newTemp(ty);
   2162       IRTemp res = newTemp(ty);
   2163       assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
   2164       assign(src, getIRegOrZR(is64, nn));
   2165       /* perform bitfield move on low bits */
   2166       assign(bot, binop(mkOR(ty),
   2167                         binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
   2168                         binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
   2169                                          mkU(ty, wmask))));
   2170       /* determine extension bits (sign, zero or dest register) */
   2171       assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
   2172       /* combine extension bits and result bits */
   2173       assign(res, binop(mkOR(ty),
   2174                         binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
   2175                         binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
   2176       putIRegOrZR(is64, dd, mkexpr(res));
   2177       DIP("%s %s, %s, immR=%u, immS=%u\n",
   2178           nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
   2179       return True;
   2180    }
   2181    after_bfm:
   2182 
   2183    /* ---------------------- EXTR ---------------------- */
   2184    /*   30 28     22 20 15   9 4
   2185       1 00 100111 10 m  imm6 n d  EXTR Xd, Xn, Xm, #imm6
   2186       0 00 100111 00 m  imm6 n d  EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
   2187    */
   2188    if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
   2189       Bool is64  = INSN(31,31) == 1;
   2190       UInt mm    = INSN(20,16);
   2191       UInt imm6  = INSN(15,10);
   2192       UInt nn    = INSN(9,5);
   2193       UInt dd    = INSN(4,0);
   2194       Bool valid = True;
   2195       if (INSN(31,31) != INSN(22,22))
   2196         valid = False;
   2197       if (!is64 && imm6 >= 32)
   2198         valid = False;
   2199       if (!valid) goto after_extr;
   2200       IRType ty    = is64 ? Ity_I64 : Ity_I32;
   2201       IRTemp srcHi = newTemp(ty);
   2202       IRTemp srcLo = newTemp(ty);
   2203       IRTemp res   = newTemp(ty);
   2204       assign(srcHi, getIRegOrZR(is64, nn));
   2205       assign(srcLo, getIRegOrZR(is64, mm));
   2206       if (imm6 == 0) {
   2207         assign(res, mkexpr(srcLo));
   2208       } else {
   2209         UInt szBits = 8 * sizeofIRType(ty);
   2210         vassert(imm6 > 0 && imm6 < szBits);
   2211         assign(res, binop(mkOR(ty),
   2212                           binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
   2213                           binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
   2214       }
   2215       putIRegOrZR(is64, dd, mkexpr(res));
   2216       DIP("extr %s, %s, %s, #%u\n",
   2217           nameIRegOrZR(is64,dd),
   2218           nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
   2219       return True;
   2220    }
   2221   after_extr:
   2222 
   2223    vex_printf("ARM64 front end: data_processing_immediate\n");
   2224    return False;
   2225 #  undef INSN
   2226 }
   2227 
   2228 
   2229 /*------------------------------------------------------------*/
   2230 /*--- Data processing (register) instructions              ---*/
   2231 /*------------------------------------------------------------*/
   2232 
   2233 static const HChar* nameSH ( UInt sh ) {
   2234    switch (sh) {
   2235       case 0: return "lsl";
   2236       case 1: return "lsr";
   2237       case 2: return "asr";
   2238       case 3: return "ror";
   2239       default: vassert(0);
   2240    }
   2241 }
   2242 
   2243 /* Generate IR to get a register value, possibly shifted by an
   2244    immediate.  Returns either a 32- or 64-bit temporary holding the
   2245    result.  After the shift, the value can optionally be NOT-ed
   2246    too.
   2247 
   2248    sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR.  sh_amt may only be
   2249    in the range 0 to (is64 ? 64 : 32)-1.  For some instructions, ROR
   2250    isn't allowed, but it's the job of the caller to check that.
   2251 */
   2252 static IRTemp getShiftedIRegOrZR ( Bool is64,
   2253                                    UInt sh_how, UInt sh_amt, UInt regNo,
   2254                                    Bool invert )
   2255 {
   2256    vassert(sh_how < 4);
   2257    vassert(sh_amt < (is64 ? 64 : 32));
   2258    IRType ty = is64 ? Ity_I64 : Ity_I32;
   2259    IRTemp t0 = newTemp(ty);
   2260    assign(t0, getIRegOrZR(is64, regNo));
   2261    IRTemp t1 = newTemp(ty);
   2262    switch (sh_how) {
   2263       case BITS2(0,0):
   2264          assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
   2265          break;
   2266       case BITS2(0,1):
   2267          assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
   2268          break;
   2269       case BITS2(1,0):
   2270          assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
   2271          break;
   2272       case BITS2(1,1):
   2273          assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
   2274          break;
   2275       default:
   2276          vassert(0);
   2277    }
   2278    if (invert) {
   2279       IRTemp t2 = newTemp(ty);
   2280       assign(t2, unop(mkNOT(ty), mkexpr(t1)));
   2281       return t2;
   2282    } else {
   2283       return t1;
   2284    }
   2285 }
   2286 
   2287 
   2288 static
   2289 Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres,
   2290                                         UInt insn)
   2291 {
   2292 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   2293 
   2294    /* ------------------- ADD/SUB(reg) ------------------- */
   2295    /* x==0 => 32 bit op      x==1 => 64 bit op
   2296       sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
   2297 
   2298       31 30 29 28    23 21 20 15   9  4
   2299       |  |  |  |     |  |  |  |    |  |
   2300       x  0  0  01011 sh 0  Rm imm6 Rn Rd   ADD  Rd,Rn, sh(Rm,imm6)
   2301       x  0  1  01011 sh 0  Rm imm6 Rn Rd   ADDS Rd,Rn, sh(Rm,imm6)
   2302       x  1  0  01011 sh 0  Rm imm6 Rn Rd   SUB  Rd,Rn, sh(Rm,imm6)
   2303       x  1  1  01011 sh 0  Rm imm6 Rn Rd   SUBS Rd,Rn, sh(Rm,imm6)
   2304    */
   2305    if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
   2306       UInt   bX    = INSN(31,31);
   2307       UInt   bOP   = INSN(30,30); /* 0: ADD, 1: SUB */
   2308       UInt   bS    = INSN(29, 29); /* set flags? */
   2309       UInt   sh    = INSN(23,22);
   2310       UInt   rM    = INSN(20,16);
   2311       UInt   imm6  = INSN(15,10);
   2312       UInt   rN    = INSN(9,5);
   2313       UInt   rD    = INSN(4,0);
   2314       Bool   isSUB = bOP == 1;
   2315       Bool   is64  = bX == 1;
   2316       IRType ty    = is64 ? Ity_I64 : Ity_I32;
   2317       if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) {
   2318          /* invalid; fall through */
   2319       } else {
   2320          IRTemp argL = newTemp(ty);
   2321          assign(argL, getIRegOrZR(is64, rN));
   2322          IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
   2323          IROp   op   = isSUB ? mkSUB(ty) : mkADD(ty);
   2324          IRTemp res  = newTemp(ty);
   2325          assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
   2326          if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
   2327          if (bS) {
   2328             setFlags_ADD_SUB(is64, isSUB, argL, argR);
   2329          }
   2330          DIP("%s%s %s, %s, %s, %s #%u\n",
   2331              bOP ? "sub" : "add", bS ? "s" : "",
   2332              nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
   2333              nameIRegOrZR(is64, rM), nameSH(sh), imm6);
   2334          return True;
   2335       }
   2336    }
   2337 
   2338    /* ------------------- ADC/SBC(reg) ------------------- */
   2339    /* x==0 => 32 bit op      x==1 => 64 bit op
   2340 
   2341       31 30 29 28    23 21 20 15     9  4
   2342       |  |  |  |     |  |  |  |      |  |
   2343       x  0  0  11010 00 0  Rm 000000 Rn Rd   ADC  Rd,Rn,Rm
   2344       x  0  1  11010 00 0  Rm 000000 Rn Rd   ADCS Rd,Rn,Rm
   2345       x  1  0  11010 00 0  Rm 000000 Rn Rd   SBC  Rd,Rn,Rm
   2346       x  1  1  11010 00 0  Rm 000000 Rn Rd   SBCS Rd,Rn,Rm
   2347    */
   2348 
   2349    if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) {
   2350       UInt   bX    = INSN(31,31);
   2351       UInt   bOP   = INSN(30,30); /* 0: ADC, 1: SBC */
   2352       UInt   bS    = INSN(29,29); /* set flags */
   2353       UInt   rM    = INSN(20,16);
   2354       UInt   rN    = INSN(9,5);
   2355       UInt   rD    = INSN(4,0);
   2356 
   2357       Bool   isSUB = bOP == 1;
   2358       Bool   is64  = bX == 1;
   2359       IRType ty    = is64 ? Ity_I64 : Ity_I32;
   2360 
   2361       IRTemp oldC = newTemp(ty);
   2362       assign(oldC,
   2363              is64 ? mk_arm64g_calculate_flag_c()
   2364                   : unop(Iop_64to32, mk_arm64g_calculate_flag_c()) );
   2365 
   2366       IRTemp argL = newTemp(ty);
   2367       assign(argL, getIRegOrZR(is64, rN));
   2368       IRTemp argR = newTemp(ty);
   2369       assign(argR, getIRegOrZR(is64, rM));
   2370 
   2371       IROp   op   = isSUB ? mkSUB(ty) : mkADD(ty);
   2372       IRTemp res  = newTemp(ty);
   2373       if (isSUB) {
   2374          IRExpr* one = is64 ? mkU64(1) : mkU32(1);
   2375          IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32;
   2376          assign(res,
   2377                 binop(op,
   2378                       binop(op, mkexpr(argL), mkexpr(argR)),
   2379                       binop(xorOp, mkexpr(oldC), one)));
   2380       } else {
   2381          assign(res,
   2382                 binop(op,
   2383                       binop(op, mkexpr(argL), mkexpr(argR)),
   2384                       mkexpr(oldC)));
   2385       }
   2386 
   2387       if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
   2388 
   2389       if (bS) {
   2390          setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC);
   2391       }
   2392 
   2393       DIP("%s%s %s, %s, %s\n",
   2394           bOP ? "sbc" : "adc", bS ? "s" : "",
   2395           nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
   2396           nameIRegOrZR(is64, rM));
   2397       return True;
   2398    }
   2399 
   2400 
   2401 
   2402    /* -------------------- LOGIC(reg) -------------------- */
   2403    /* x==0 => 32 bit op      x==1 => 64 bit op
   2404       N==0 => inv? is no-op (no inversion)
   2405       N==1 => inv? is NOT
   2406       sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
   2407 
   2408       31 30 28    23 21 20 15   9  4
   2409       |  |  |     |  |  |  |    |  |
   2410       x  00 01010 sh N  Rm imm6 Rn Rd  AND  Rd,Rn, inv?(sh(Rm,imm6))
   2411       x  01 01010 sh N  Rm imm6 Rn Rd  ORR  Rd,Rn, inv?(sh(Rm,imm6))
   2412       x  10 01010 sh N  Rm imm6 Rn Rd  EOR  Rd,Rn, inv?(sh(Rm,imm6))
   2413       x  11 01010 sh N  Rm imm6 Rn Rd  ANDS Rd,Rn, inv?(sh(Rm,imm6))
   2414       With N=1, the names are: BIC ORN EON BICS
   2415    */
   2416    if (INSN(28,24) == BITS5(0,1,0,1,0)) {
   2417       UInt   bX   = INSN(31,31);
   2418       UInt   sh   = INSN(23,22);
   2419       UInt   bN   = INSN(21,21);
   2420       UInt   rM   = INSN(20,16);
   2421       UInt   imm6 = INSN(15,10);
   2422       UInt   rN   = INSN(9,5);
   2423       UInt   rD   = INSN(4,0);
   2424       Bool   is64 = bX == 1;
   2425       IRType ty   = is64 ? Ity_I64 : Ity_I32;
   2426       if (!is64 && imm6 > 31) {
   2427          /* invalid; fall though */
   2428       } else {
   2429          IRTemp argL = newTemp(ty);
   2430          assign(argL, getIRegOrZR(is64, rN));
   2431          IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
   2432          IROp   op   = Iop_INVALID;
   2433          switch (INSN(30,29)) {
   2434             case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
   2435             case BITS2(0,1):                  op = mkOR(ty);  break;
   2436             case BITS2(1,0):                  op = mkXOR(ty); break;
   2437             default: vassert(0);
   2438          }
   2439          IRTemp res = newTemp(ty);
   2440          assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
   2441          if (INSN(30,29) == BITS2(1,1)) {
   2442             setFlags_LOGIC(is64, res);
   2443          }
   2444          putIRegOrZR(is64, rD, mkexpr(res));
   2445 
   2446          static const HChar* names_op[8]
   2447             = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
   2448          vassert(((bN << 2) | INSN(30,29)) < 8);
   2449          const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)];
   2450          /* Special-case the printing of "MOV" */
   2451          if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) {
   2452             DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
   2453                                 nameIRegOrZR(is64, rM));
   2454          } else {
   2455             DIP("%s %s, %s, %s, %s #%u\n", nm_op,
   2456                 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
   2457                 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
   2458          }
   2459          return True;
   2460       }
   2461    }
   2462 
   2463    /* -------------------- {U,S}MULH -------------------- */
   2464    /* 31       23 22 20 15     9   4
   2465       10011011 1  10 Rm 011111 Rn Rd   UMULH Xd,Xn,Xm
   2466       10011011 0  10 Rm 011111 Rn Rd   SMULH Xd,Xn,Xm
   2467    */
   2468    if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
   2469        && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
   2470       Bool isU = INSN(23,23) == 1;
   2471       UInt mm  = INSN(20,16);
   2472       UInt nn  = INSN(9,5);
   2473       UInt dd  = INSN(4,0);
   2474       putIReg64orZR(dd, unop(Iop_128HIto64,
   2475                              binop(isU ? Iop_MullU64 : Iop_MullS64,
   2476                                    getIReg64orZR(nn), getIReg64orZR(mm))));
   2477       DIP("%cmulh %s, %s, %s\n",
   2478           isU ? 'u' : 's',
   2479           nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
   2480       return True;
   2481    }
   2482 
   2483    /* -------------------- M{ADD,SUB} -------------------- */
   2484    /* 31 30           20 15 14 9 4
   2485       sf 00 11011 000 m  0  a  n r   MADD Rd,Rn,Rm,Ra  d = a+m*n
   2486       sf 00 11011 000 m  1  a  n r   MADD Rd,Rn,Rm,Ra  d = a-m*n
   2487    */
   2488    if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
   2489       Bool is64  = INSN(31,31) == 1;
   2490       UInt mm    = INSN(20,16);
   2491       Bool isAdd = INSN(15,15) == 0;
   2492       UInt aa    = INSN(14,10);
   2493       UInt nn    = INSN(9,5);
   2494       UInt dd    = INSN(4,0);
   2495       if (is64) {
   2496          putIReg64orZR(
   2497             dd,
   2498             binop(isAdd ? Iop_Add64 : Iop_Sub64,
   2499                   getIReg64orZR(aa),
   2500                   binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
   2501       } else {
   2502          putIReg32orZR(
   2503             dd,
   2504             binop(isAdd ? Iop_Add32 : Iop_Sub32,
   2505                   getIReg32orZR(aa),
   2506                   binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
   2507       }
   2508       DIP("%s %s, %s, %s, %s\n",
   2509           isAdd ? "madd" : "msub",
   2510           nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
   2511           nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
   2512       return True;
   2513    }
   2514 
   2515    /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
   2516    /* 31 30 28        20 15   11 9  4
   2517       sf 00 1101 0100 mm cond 00 nn dd   CSEL  Rd,Rn,Rm
   2518       sf 00 1101 0100 mm cond 01 nn dd   CSINC Rd,Rn,Rm
   2519       sf 10 1101 0100 mm cond 00 nn dd   CSINV Rd,Rn,Rm
   2520       sf 10 1101 0100 mm cond 01 nn dd   CSNEG Rd,Rn,Rm
   2521       In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
   2522    */
   2523    if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
   2524       Bool    is64 = INSN(31,31) == 1;
   2525       UInt    b30  = INSN(30,30);
   2526       UInt    mm   = INSN(20,16);
   2527       UInt    cond = INSN(15,12);
   2528       UInt    b10  = INSN(10,10);
   2529       UInt    nn   = INSN(9,5);
   2530       UInt    dd   = INSN(4,0);
   2531       UInt    op   = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */
   2532       IRType  ty   = is64 ? Ity_I64 : Ity_I32;
   2533       IRExpr* argL = getIRegOrZR(is64, nn);
   2534       IRExpr* argR = getIRegOrZR(is64, mm);
   2535       switch (op) {
   2536          case BITS2(0,0):
   2537             break;
   2538          case BITS2(0,1):
   2539             argR = binop(mkADD(ty), argR, mkU(ty,1));
   2540             break;
   2541          case BITS2(1,0):
   2542             argR = unop(mkNOT(ty), argR);
   2543             break;
   2544          case BITS2(1,1):
   2545             argR = binop(mkSUB(ty), mkU(ty,0), argR);
   2546             break;
   2547          default:
   2548             vassert(0);
   2549       }
   2550       putIRegOrZR(
   2551          is64, dd,
   2552          IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
   2553                     argL, argR)
   2554       );
   2555       const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
   2556       DIP("%s %s, %s, %s, %s\n", op_nm[op],
   2557           nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
   2558           nameIRegOrZR(is64, mm), nameCC(cond));
   2559       return True;
   2560    }
   2561 
   2562    /* -------------- ADD/SUB(extended reg) -------------- */
   2563    /*     28         20 15  12   9 4
   2564       000 01011 00 1 m  opt imm3 n d   ADD  Wd|SP, Wn|SP, Wm ext&lsld
   2565       100 01011 00 1 m  opt imm3 n d   ADD  Xd|SP, Xn|SP, Rm ext&lsld
   2566 
   2567       001 01011 00 1 m  opt imm3 n d   ADDS Wd,    Wn|SP, Wm ext&lsld
   2568       101 01011 00 1 m  opt imm3 n d   ADDS Xd,    Xn|SP, Rm ext&lsld
   2569 
   2570       010 01011 00 1 m  opt imm3 n d   SUB  Wd|SP, Wn|SP, Wm ext&lsld
   2571       110 01011 00 1 m  opt imm3 n d   SUB  Xd|SP, Xn|SP, Rm ext&lsld
   2572 
   2573       011 01011 00 1 m  opt imm3 n d   SUBS Wd,    Wn|SP, Wm ext&lsld
   2574       111 01011 00 1 m  opt imm3 n d   SUBS Xd,    Xn|SP, Rm ext&lsld
   2575 
   2576       The 'm' operand is extended per opt, thusly:
   2577 
   2578         000   Xm & 0xFF           UXTB
   2579         001   Xm & 0xFFFF         UXTH
   2580         010   Xm & (2^32)-1       UXTW
   2581         011   Xm                  UXTX
   2582 
   2583         100   Xm sx from bit 7    SXTB
   2584         101   Xm sx from bit 15   SXTH
   2585         110   Xm sx from bit 31   SXTW
   2586         111   Xm                  SXTX
   2587 
   2588       In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
   2589       operation on Xm.  In the 32 bit case, UXTW, UXTX, SXTW and SXTX
   2590       are the identity operation on Wm.
   2591 
   2592       After extension, the value is shifted left by imm3 bits, which
   2593       may only be in the range 0 .. 4 inclusive.
   2594    */
   2595    if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
   2596       Bool is64  = INSN(31,31) == 1;
   2597       Bool isSub = INSN(30,30) == 1;
   2598       Bool setCC = INSN(29,29) == 1;
   2599       UInt mm    = INSN(20,16);
   2600       UInt opt   = INSN(15,13);
   2601       UInt imm3  = INSN(12,10);
   2602       UInt nn    = INSN(9,5);
   2603       UInt dd    = INSN(4,0);
   2604       const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
   2605                                   "sxtb", "sxth", "sxtw", "sxtx" };
   2606       /* Do almost the same thing in the 32- and 64-bit cases. */
   2607       IRTemp xN = newTemp(Ity_I64);
   2608       IRTemp xM = newTemp(Ity_I64);
   2609       assign(xN, getIReg64orSP(nn));
   2610       assign(xM, getIReg64orZR(mm));
   2611       IRExpr* xMw  = mkexpr(xM); /* "xM widened" */
   2612       Int     shSX = 0;
   2613       /* widen Xm .. */
   2614       switch (opt) {
   2615          case BITS3(0,0,0): // UXTB
   2616             xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
   2617          case BITS3(0,0,1): // UXTH
   2618             xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
   2619          case BITS3(0,1,0): // UXTW -- noop for the 32bit case
   2620             if (is64) {
   2621                xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
   2622             }
   2623             break;
   2624          case BITS3(0,1,1): // UXTX -- always a noop
   2625             break;
   2626          case BITS3(1,0,0): // SXTB
   2627             shSX = 56; goto sxTo64;
   2628          case BITS3(1,0,1): // SXTH
   2629             shSX = 48; goto sxTo64;
   2630          case BITS3(1,1,0): // SXTW -- noop for the 32bit case
   2631             if (is64) {
   2632                shSX = 32; goto sxTo64;
   2633             }
   2634             break;
   2635          case BITS3(1,1,1): // SXTX -- always a noop
   2636             break;
   2637          sxTo64:
   2638             vassert(shSX >= 32);
   2639             xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
   2640                         mkU8(shSX));
   2641             break;
   2642          default:
   2643             vassert(0);
   2644       }
   2645       /* and now shift */
   2646       IRTemp argL = xN;
   2647       IRTemp argR = newTemp(Ity_I64);
   2648       assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
   2649       IRTemp res = newTemp(Ity_I64);
   2650       assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
   2651                         mkexpr(argL), mkexpr(argR)));
   2652       if (is64) {
   2653          if (setCC) {
   2654             putIReg64orZR(dd, mkexpr(res));
   2655             setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
   2656          } else {
   2657             putIReg64orSP(dd, mkexpr(res));
   2658          }
   2659       } else {
   2660          if (setCC) {
   2661             IRTemp argL32 = newTemp(Ity_I32);
   2662             IRTemp argR32 = newTemp(Ity_I32);
   2663             putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
   2664             assign(argL32, unop(Iop_64to32, mkexpr(argL)));
   2665             assign(argR32, unop(Iop_64to32, mkexpr(argR)));
   2666             setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32);
   2667          } else {
   2668             putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
   2669          }
   2670       }
   2671       DIP("%s%s %s, %s, %s %s lsl %u\n",
   2672           isSub ? "sub" : "add", setCC ? "s" : "",
   2673           setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
   2674           nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
   2675           nameExt[opt], imm3);
   2676       return True;
   2677    }
   2678 
   2679    /* ---------------- CCMP/CCMN(imm) ---------------- */
   2680    /* Bizarrely, these appear in the "data processing register"
   2681       category, even though they are operations against an
   2682       immediate. */
   2683    /* 31   29        20   15   11 9    3
   2684       sf 1 111010010 imm5 cond 10 Rn 0 nzcv   CCMP Rn, #imm5, #nzcv, cond
   2685       sf 0 111010010 imm5 cond 10 Rn 0 nzcv   CCMN Rn, #imm5, #nzcv, cond
   2686 
   2687       Operation is:
   2688          (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
   2689          (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
   2690    */
   2691    if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
   2692        && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
   2693       Bool is64  = INSN(31,31) == 1;
   2694       Bool isSUB = INSN(30,30) == 1;
   2695       UInt imm5  = INSN(20,16);
   2696       UInt cond  = INSN(15,12);
   2697       UInt nn    = INSN(9,5);
   2698       UInt nzcv  = INSN(3,0);
   2699 
   2700       IRTemp condT = newTemp(Ity_I1);
   2701       assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
   2702 
   2703       IRType ty   = is64 ? Ity_I64 : Ity_I32;
   2704       IRTemp argL = newTemp(ty);
   2705       IRTemp argR = newTemp(ty);
   2706 
   2707       if (is64) {
   2708          assign(argL, getIReg64orZR(nn));
   2709          assign(argR, mkU64(imm5));
   2710       } else {
   2711          assign(argL, getIReg32orZR(nn));
   2712          assign(argR, mkU32(imm5));
   2713       }
   2714       setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
   2715 
   2716       DIP("ccm%c %s, #%u, #%u, %s\n",
   2717           isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
   2718           imm5, nzcv, nameCC(cond));
   2719       return True;
   2720    }
   2721 
   2722    /* ---------------- CCMP/CCMN(reg) ---------------- */
   2723    /* 31   29        20 15   11 9    3
   2724       sf 1 111010010 Rm cond 00 Rn 0 nzcv   CCMP Rn, Rm, #nzcv, cond
   2725       sf 0 111010010 Rm cond 00 Rn 0 nzcv   CCMN Rn, Rm, #nzcv, cond
   2726       Operation is:
   2727          (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
   2728          (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
   2729    */
   2730    if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
   2731        && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
   2732       Bool is64  = INSN(31,31) == 1;
   2733       Bool isSUB = INSN(30,30) == 1;
   2734       UInt mm    = INSN(20,16);
   2735       UInt cond  = INSN(15,12);
   2736       UInt nn    = INSN(9,5);
   2737       UInt nzcv  = INSN(3,0);
   2738 
   2739       IRTemp condT = newTemp(Ity_I1);
   2740       assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
   2741 
   2742       IRType ty   = is64 ? Ity_I64 : Ity_I32;
   2743       IRTemp argL = newTemp(ty);
   2744       IRTemp argR = newTemp(ty);
   2745 
   2746       if (is64) {
   2747          assign(argL, getIReg64orZR(nn));
   2748          assign(argR, getIReg64orZR(mm));
   2749       } else {
   2750          assign(argL, getIReg32orZR(nn));
   2751          assign(argR, getIReg32orZR(mm));
   2752       }
   2753       setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
   2754 
   2755       DIP("ccm%c %s, %s, #%u, %s\n",
   2756           isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
   2757           nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
   2758       return True;
   2759    }
   2760 
   2761 
   2762    /* -------------- REV/REV16/REV32/RBIT -------------- */
   2763    /* 31 30 28       20    15   11 9 4
   2764 
   2765       1  10 11010110 00000 0000 11 n d    (1) REV   Xd, Xn
   2766       0  10 11010110 00000 0000 10 n d    (2) REV   Wd, Wn
   2767 
   2768       1  10 11010110 00000 0000 00 n d    (3) RBIT  Xd, Xn
   2769       0  10 11010110 00000 0000 00 n d    (4) RBIT  Wd, Wn
   2770 
   2771       1  10 11010110 00000 0000 01 n d    (5) REV16 Xd, Xn
   2772       0  10 11010110 00000 0000 01 n d    (6) REV16 Wd, Wn
   2773 
   2774       1  10 11010110 00000 0000 10 n d    (7) REV32 Xd, Xn
   2775    */
   2776    if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
   2777        && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
   2778       UInt b31 = INSN(31,31);
   2779       UInt opc = INSN(11,10);
   2780 
   2781       UInt ix = 0;
   2782       /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
   2783       else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
   2784       else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
   2785       else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
   2786       else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
   2787       else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
   2788       else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
   2789       if (ix >= 1 && ix <= 7) {
   2790          Bool   is64  = ix == 1 || ix == 3 || ix == 5 || ix == 7;
   2791          UInt   nn    = INSN(9,5);
   2792          UInt   dd    = INSN(4,0);
   2793          IRTemp src   = newTemp(Ity_I64);
   2794          IRTemp dst   = IRTemp_INVALID;
   2795          IRTemp (*math)(IRTemp) = NULL;
   2796          switch (ix) {
   2797             case 1: case 2: math = math_BYTESWAP64;   break;
   2798             case 3: case 4: math = math_BITSWAP64;    break;
   2799             case 5: case 6: math = math_USHORTSWAP64; break;
   2800             case 7:         math = math_UINTSWAP64;   break;
   2801             default: vassert(0);
   2802          }
   2803          const HChar* names[7]
   2804            = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
   2805          const HChar* nm = names[ix-1];
   2806          vassert(math);
   2807          if (ix == 6) {
   2808             /* This has to be special cased, since the logic below doesn't
   2809                handle it correctly. */
   2810             assign(src, getIReg64orZR(nn));
   2811             dst = math(src);
   2812             putIReg64orZR(dd,
   2813                           unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
   2814          } else if (is64) {
   2815             assign(src, getIReg64orZR(nn));
   2816             dst = math(src);
   2817             putIReg64orZR(dd, mkexpr(dst));
   2818          } else {
   2819             assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
   2820             dst = math(src);
   2821             putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
   2822          }
   2823          DIP("%s %s, %s\n", nm,
   2824              nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
   2825          return True;
   2826       }
   2827       /* else fall through */
   2828    }
   2829 
   2830    /* -------------------- CLZ/CLS -------------------- */
   2831    /*    30 28   24   20    15      9 4
   2832       sf 10 1101 0110 00000 00010 0 n d    CLZ Rd, Rn
   2833       sf 10 1101 0110 00000 00010 1 n d    CLS Rd, Rn
   2834    */
   2835    if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
   2836        && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
   2837       Bool   is64  = INSN(31,31) == 1;
   2838       Bool   isCLS = INSN(10,10) == 1;
   2839       UInt   nn    = INSN(9,5);
   2840       UInt   dd    = INSN(4,0);
   2841       IRTemp src   = newTemp(Ity_I64);
   2842       IRTemp dst   = newTemp(Ity_I64);
   2843       if (!isCLS) { // CLS not yet supported
   2844          if (is64) {
   2845             assign(src, getIReg64orZR(nn));
   2846             assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)),
   2847                                    mkU64(64),
   2848                                    unop(Iop_Clz64, mkexpr(src))));
   2849             putIReg64orZR(dd, mkexpr(dst));
   2850          } else {
   2851             assign(src, binop(Iop_Shl64,
   2852                               unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
   2853             assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)),
   2854                                    mkU64(32),
   2855                                    unop(Iop_Clz64, mkexpr(src))));
   2856             putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
   2857          }
   2858          DIP("cl%c %s, %s\n",
   2859              isCLS ? 's' : 'z', nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
   2860          return True;
   2861       }
   2862    }
   2863 
   2864    /* -------------------- LSLV/LSRV/ASRV -------------------- */
   2865    /*    30 28        20 15   11 9 4
   2866       sf 00 1101 0110 m  0010 00 n d   LSLV Rd,Rn,Rm
   2867       sf 00 1101 0110 m  0010 01 n d   LSRV Rd,Rn,Rm
   2868       sf 00 1101 0110 m  0010 10 n d   ASRV Rd,Rn,Rm
   2869    */
   2870    if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
   2871        && INSN(15,12) == BITS4(0,0,1,0) && INSN(11,10) < BITS2(1,1)) {
   2872       Bool   is64 = INSN(31,31) == 1;
   2873       UInt   mm   = INSN(20,16);
   2874       UInt   op   = INSN(11,10);
   2875       UInt   nn   = INSN(9,5);
   2876       UInt   dd   = INSN(4,0);
   2877       IRType ty   = is64 ? Ity_I64 : Ity_I32;
   2878       IRTemp srcL = newTemp(ty);
   2879       IRTemp srcR = newTemp(Ity_I8);
   2880       IRTemp res  = newTemp(ty);
   2881       IROp   iop  = Iop_INVALID;
   2882       assign(srcL, getIRegOrZR(is64, nn));
   2883       assign(srcR,
   2884              unop(Iop_64to8,
   2885                   binop(Iop_And64,
   2886                         getIReg64orZR(mm), mkU64(is64 ? 63 : 31))));
   2887       switch (op) {
   2888          case BITS2(0,0): iop = mkSHL(ty); break;
   2889          case BITS2(0,1): iop = mkSHR(ty); break;
   2890          case BITS2(1,0): iop = mkSAR(ty); break;
   2891          default: vassert(0);
   2892       }
   2893       assign(res, binop(iop, mkexpr(srcL), mkexpr(srcR)));
   2894       putIRegOrZR(is64, dd, mkexpr(res));
   2895       vassert(op < 3);
   2896       const HChar* names[3] = { "lslv", "lsrv", "asrv" };
   2897       DIP("%s %s, %s, %s\n",
   2898           names[op], nameIRegOrZR(is64,dd),
   2899                      nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
   2900       return True;
   2901    }
   2902 
   2903    /* -------------------- SDIV/UDIV -------------------- */
   2904    /*    30 28        20 15    10 9 4
   2905       sf 00 1101 0110 m  00001  1 n d  SDIV Rd,Rn,Rm
   2906       sf 00 1101 0110 m  00001  0 n d  UDIV Rd,Rn,Rm
   2907    */
   2908    if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
   2909        && INSN(15,11) == BITS5(0,0,0,0,1)) {
   2910       Bool is64 = INSN(31,31) == 1;
   2911       UInt mm   = INSN(20,16);
   2912       Bool isS  = INSN(10,10) == 1;
   2913       UInt nn   = INSN(9,5);
   2914       UInt dd   = INSN(4,0);
   2915       if (isS) {
   2916          putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
   2917                                      getIRegOrZR(is64, nn),
   2918                                      getIRegOrZR(is64, mm)));
   2919       } else {
   2920          putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
   2921                                      getIRegOrZR(is64, nn),
   2922                                      getIRegOrZR(is64, mm)));
   2923       }
   2924       DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
   2925           nameIRegOrZR(is64, dd),
   2926           nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
   2927       return True;
   2928    }
   2929 
   2930    /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
   2931    /* 31        23  20 15 14 9 4
   2932       1001 1011 101 m  0  a  n d   UMADDL Xd,Wn,Wm,Xa
   2933       1001 1011 001 m  0  a  n d   SMADDL Xd,Wn,Wm,Xa
   2934       1001 1011 101 m  1  a  n d   UMSUBL Xd,Wn,Wm,Xa
   2935       1001 1011 001 m  1  a  n d   SMSUBL Xd,Wn,Wm,Xa
   2936       with operation
   2937          Xd = Xa +/- (Wn *u/s Wm)
   2938    */
   2939    if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
   2940       Bool   isU   = INSN(23,23) == 1;
   2941       UInt   mm    = INSN(20,16);
   2942       Bool   isAdd = INSN(15,15) == 0;
   2943       UInt   aa    = INSN(14,10);
   2944       UInt   nn    = INSN(9,5);
   2945       UInt   dd    = INSN(4,0);
   2946       IRTemp wN    = newTemp(Ity_I32);
   2947       IRTemp wM    = newTemp(Ity_I32);
   2948       IRTemp xA    = newTemp(Ity_I64);
   2949       IRTemp muld  = newTemp(Ity_I64);
   2950       IRTemp res   = newTemp(Ity_I64);
   2951       assign(wN, getIReg32orZR(nn));
   2952       assign(wM, getIReg32orZR(mm));
   2953       assign(xA, getIReg64orZR(aa));
   2954       assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
   2955                          mkexpr(wN), mkexpr(wM)));
   2956       assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
   2957                         mkexpr(xA), mkexpr(muld)));
   2958       putIReg64orZR(dd, mkexpr(res));
   2959       DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
   2960           nameIReg64orZR(dd), nameIReg32orZR(nn),
   2961           nameIReg32orZR(mm), nameIReg64orZR(aa));
   2962       return True;
   2963    }
   2964    vex_printf("ARM64 front end: data_processing_register\n");
   2965    return False;
   2966 #  undef INSN
   2967 }
   2968 
   2969 
   2970 /*------------------------------------------------------------*/
   2971 /*--- Load and Store instructions                          ---*/
   2972 /*------------------------------------------------------------*/
   2973 
   2974 /* Generate the EA for a "reg + reg" style amode.  This is done from
   2975    parts of the insn, but for sanity checking sake it takes the whole
   2976    insn.  This appears to depend on insn[15:12], with opt=insn[15:13]
   2977    and S=insn[12]:
   2978 
   2979    The possible forms, along with their opt:S values, are:
   2980       011:0   Xn|SP + Xm
   2981       111:0   Xn|SP + Xm
   2982       011:1   Xn|SP + Xm * transfer_szB
   2983       111:1   Xn|SP + Xm * transfer_szB
   2984       010:0   Xn|SP + 32Uto64(Wm)
   2985       010:1   Xn|SP + 32Uto64(Wm) * transfer_szB
   2986       110:0   Xn|SP + 32Sto64(Wm)
   2987       110:1   Xn|SP + 32Sto64(Wm) * transfer_szB
   2988 
   2989    Rm is insn[20:16].  Rn is insn[9:5].  Rt is insn[4:0].  Log2 of
   2990    the transfer size is insn[23,31,30].  For integer loads/stores,
   2991    insn[23] is zero, hence szLg2 can be at most 3 in such cases.
   2992 
   2993    If the decoding fails, it returns IRTemp_INVALID.
   2994 
   2995    isInt is True iff this is decoding is for transfers to/from integer
   2996    registers.  If False it is for transfers to/from vector registers.
   2997 */
   2998 static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt )
   2999 {
   3000    UInt    optS  = SLICE_UInt(insn, 15, 12);
   3001    UInt    mm    = SLICE_UInt(insn, 20, 16);
   3002    UInt    nn    = SLICE_UInt(insn, 9, 5);
   3003    UInt    szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
   3004                    | SLICE_UInt(insn, 31, 30); // Log2 of the size
   3005 
   3006    buf[0] = 0;
   3007 
   3008    /* Sanity checks, that this really is a load/store insn. */
   3009    if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
   3010       goto fail;
   3011 
   3012    if (isInt
   3013        && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
   3014        && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
   3015        && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
   3016        && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
   3017       goto fail;
   3018 
   3019    if (!isInt
   3020        && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
   3021       goto fail;
   3022 
   3023    /* Throw out non-verified but possibly valid cases. */
   3024    switch (szLg2) {
   3025       case BITS3(0,0,0): break; //  8 bit, valid for both int and vec
   3026       case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
   3027       case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
   3028       case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
   3029       case BITS3(1,0,0): // can only ever be valid for the vector case
   3030                          if (isInt) goto fail; else goto fail;
   3031       case BITS3(1,0,1): // these sizes are never valid
   3032       case BITS3(1,1,0):
   3033       case BITS3(1,1,1): goto fail;
   3034 
   3035       default: vassert(0);
   3036    }
   3037 
   3038    IRExpr* rhs  = NULL;
   3039    switch (optS) {
   3040       case BITS4(1,1,1,0): goto fail; //ATC
   3041       case BITS4(0,1,1,0):
   3042          rhs = getIReg64orZR(mm);
   3043          vex_sprintf(buf, "[%s, %s]",
   3044                      nameIReg64orZR(nn), nameIReg64orZR(mm));
   3045          break;
   3046       case BITS4(1,1,1,1): goto fail; //ATC
   3047       case BITS4(0,1,1,1):
   3048          rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
   3049          vex_sprintf(buf, "[%s, %s lsl %u]",
   3050                      nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
   3051          break;
   3052       case BITS4(0,1,0,0):
   3053          rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
   3054          vex_sprintf(buf, "[%s, %s uxtx]",
   3055                      nameIReg64orZR(nn), nameIReg32orZR(mm));
   3056          break;
   3057       case BITS4(0,1,0,1):
   3058          rhs = binop(Iop_Shl64,
   3059                      unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
   3060          vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
   3061                      nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
   3062          break;
   3063       case BITS4(1,1,0,0):
   3064          rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
   3065          vex_sprintf(buf, "[%s, %s sxtx]",
   3066                      nameIReg64orZR(nn), nameIReg32orZR(mm));
   3067          break;
   3068       case BITS4(1,1,0,1):
   3069          rhs = binop(Iop_Shl64,
   3070                      unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
   3071          vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
   3072                      nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
   3073          break;
   3074       default:
   3075          /* The rest appear to be genuinely invalid */
   3076          goto fail;
   3077    }
   3078 
   3079    vassert(rhs);
   3080    IRTemp res = newTemp(Ity_I64);
   3081    assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
   3082    return res;
   3083 
   3084   fail:
   3085    vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
   3086    return IRTemp_INVALID;
   3087 }
   3088 
   3089 
   3090 /* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
   3091    bits of DATAE :: Ity_I64. */
   3092 static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
   3093 {
   3094    IRExpr* addrE = mkexpr(addr);
   3095    switch (szB) {
   3096       case 8:
   3097          storeLE(addrE, dataE);
   3098          break;
   3099       case 4:
   3100          storeLE(addrE, unop(Iop_64to32, dataE));
   3101          break;
   3102       case 2:
   3103          storeLE(addrE, unop(Iop_64to16, dataE));
   3104          break;
   3105       case 1:
   3106          storeLE(addrE, unop(Iop_64to8, dataE));
   3107          break;
   3108       default:
   3109          vassert(0);
   3110    }
   3111 }
   3112 
   3113 
   3114 /* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
   3115    placing the result in an Ity_I64 temporary. */
   3116 static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
   3117 {
   3118    IRTemp  res   = newTemp(Ity_I64);
   3119    IRExpr* addrE = mkexpr(addr);
   3120    switch (szB) {
   3121       case 8:
   3122          assign(res, loadLE(Ity_I64,addrE));
   3123          break;
   3124       case 4:
   3125          assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
   3126          break;
   3127       case 2:
   3128          assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
   3129          break;
   3130       case 1:
   3131          assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
   3132          break;
   3133       default:
   3134          vassert(0);
   3135    }
   3136    return res;
   3137 }
   3138 
   3139 
   3140 static
   3141 Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn)
   3142 {
   3143 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   3144 
   3145    /* ------------ LDR,STR (immediate, uimm12) ----------- */
   3146    /* uimm12 is scaled by the transfer size
   3147 
   3148       31 29  26    21    9  4
   3149       |  |   |     |     |  |
   3150       11 111 00100 imm12 nn tt    STR  Xt, [Xn|SP, #imm12 * 8]
   3151       11 111 00101 imm12 nn tt    LDR  Xt, [Xn|SP, #imm12 * 8]
   3152 
   3153       10 111 00100 imm12 nn tt    STR  Wt, [Xn|SP, #imm12 * 4]
   3154       10 111 00101 imm12 nn tt    LDR  Wt, [Xn|SP, #imm12 * 4]
   3155 
   3156       01 111 00100 imm12 nn tt    STRH Wt, [Xn|SP, #imm12 * 2]
   3157       01 111 00101 imm12 nn tt    LDRH Wt, [Xn|SP, #imm12 * 2]
   3158 
   3159       00 111 00100 imm12 nn tt    STRB Wt, [Xn|SP, #imm12 * 1]
   3160       00 111 00101 imm12 nn tt    LDRB Wt, [Xn|SP, #imm12 * 1]
   3161    */
   3162    if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
   3163       UInt   szLg2 = INSN(31,30);
   3164       UInt   szB   = 1 << szLg2;
   3165       Bool   isLD  = INSN(22,22) == 1;
   3166       UInt   offs  = INSN(21,10) * szB;
   3167       UInt   nn    = INSN(9,5);
   3168       UInt   tt    = INSN(4,0);
   3169       IRTemp ta    = newTemp(Ity_I64);
   3170       assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
   3171       if (nn == 31) { /* FIXME generate stack alignment check */ }
   3172       vassert(szLg2 < 4);
   3173       if (isLD) {
   3174          putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
   3175       } else {
   3176          gen_narrowing_store(szB, ta, getIReg64orZR(tt));
   3177       }
   3178       const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
   3179       const HChar* st_name[4] = { "strb", "strh", "str", "str" };
   3180       DIP("%s %s, [%s, #%u]\n",
   3181           (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
   3182           nameIReg64orSP(nn), offs);
   3183       return True;
   3184    }
   3185 
   3186    /* ------------ LDUR,STUR (immediate, simm9) ----------- */
   3187    /*
   3188       31 29  26      20   11 9  4
   3189       |  |   |       |    |  |  |
   3190       (at-Rn-then-Rn=EA)  |  |  |
   3191       sz 111 00000 0 imm9 01 Rn Rt   STR Rt, [Xn|SP], #simm9
   3192       sz 111 00001 0 imm9 01 Rn Rt   LDR Rt, [Xn|SP], #simm9
   3193 
   3194       (at-EA-then-Rn=EA)
   3195       sz 111 00000 0 imm9 11 Rn Rt   STR Rt, [Xn|SP, #simm9]!
   3196       sz 111 00001 0 imm9 11 Rn Rt   LDR Rt, [Xn|SP, #simm9]!
   3197 
   3198       (at-EA)
   3199       sz 111 00000 0 imm9 00 Rn Rt   STR Rt, [Xn|SP, #simm9]
   3200       sz 111 00001 0 imm9 00 Rn Rt   LDR Rt, [Xn|SP, #simm9]
   3201 
   3202       simm9 is unscaled.
   3203 
   3204       The case 'wback && Rn == Rt && Rt != 31' is disallowed.  In the
   3205       load case this is because would create two competing values for
   3206       Rt.  In the store case the reason is unclear, but the spec
   3207       disallows it anyway.
   3208 
   3209       Stores are narrowing, loads are unsigned widening.  sz encodes
   3210       the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
   3211    */
   3212    if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
   3213        == BITS9(1,1,1, 0,0,0,0,0, 0)) {
   3214       UInt szLg2  = INSN(31,30);
   3215       UInt szB    = 1 << szLg2;
   3216       Bool isLoad = INSN(22,22) == 1;
   3217       UInt imm9   = INSN(20,12);
   3218       UInt nn     = INSN(9,5);
   3219       UInt tt     = INSN(4,0);
   3220       Bool wBack  = INSN(10,10) == 1;
   3221       UInt how    = INSN(11,10);
   3222       if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) {
   3223          /* undecodable; fall through */
   3224       } else {
   3225          if (nn == 31) { /* FIXME generate stack alignment check */ }
   3226 
   3227          // Compute the transfer address TA and the writeback address WA.
   3228          IRTemp tRN = newTemp(Ity_I64);
   3229          assign(tRN, getIReg64orSP(nn));
   3230          IRTemp tEA = newTemp(Ity_I64);
   3231          Long simm9 = (Long)sx_to_64(imm9, 9);
   3232          assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
   3233 
   3234          IRTemp tTA = newTemp(Ity_I64);
   3235          IRTemp tWA = newTemp(Ity_I64);
   3236          switch (how) {
   3237             case BITS2(0,1):
   3238                assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
   3239             case BITS2(1,1):
   3240                assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
   3241             case BITS2(0,0):
   3242                assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
   3243             default:
   3244                vassert(0); /* NOTREACHED */
   3245          }
   3246 
   3247          /* Normally rN would be updated after the transfer.  However, in
   3248             the special case typifed by
   3249                str x30, [sp,#-16]!
   3250             it is necessary to update SP before the transfer, (1)
   3251             because Memcheck will otherwise complain about a write
   3252             below the stack pointer, and (2) because the segfault
   3253             stack extension mechanism will otherwise extend the stack
   3254             only down to SP before the instruction, which might not be
   3255             far enough, if the -16 bit takes the actual access
   3256             address to the next page.
   3257          */
   3258          Bool earlyWBack
   3259            = wBack && simm9 < 0 && szB == 8
   3260              && how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn;
   3261 
   3262          if (wBack && earlyWBack)
   3263             putIReg64orSP(nn, mkexpr(tEA));
   3264 
   3265          if (isLoad) {
   3266             putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
   3267          } else {
   3268             gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
   3269          }
   3270 
   3271          if (wBack && !earlyWBack)
   3272             putIReg64orSP(nn, mkexpr(tEA));
   3273 
   3274          const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
   3275          const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
   3276          const HChar* fmt_str = NULL;
   3277          switch (how) {
   3278             case BITS2(0,1):
   3279                fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
   3280                break;
   3281             case BITS2(1,1):
   3282                fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
   3283                break;
   3284             case BITS2(0,0):
   3285                fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
   3286                break;
   3287             default:
   3288                vassert(0);
   3289          }
   3290          DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
   3291                       nameIRegOrZR(szB == 8, tt),
   3292                       nameIReg64orSP(nn), simm9);
   3293          return True;
   3294       }
   3295    }
   3296 
   3297    /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
   3298    /* L==1 => mm==LD
   3299       L==0 => mm==ST
   3300       x==0 => 32 bit transfers, and zero extended loads
   3301       x==1 => 64 bit transfers
   3302       simm7 is scaled by the (single-register) transfer size
   3303 
   3304       (at-Rn-then-Rn=EA)
   3305       x0 101 0001 L imm7 Rt2 Rn Rt1  mmP Rt1,Rt2, [Xn|SP], #imm
   3306 
   3307       (at-EA-then-Rn=EA)
   3308       x0 101 0011 L imm7 Rt2 Rn Rt1  mmP Rt1,Rt2, [Xn|SP, #imm]!
   3309 
   3310       (at-EA)
   3311       x0 101 0010 L imm7 Rt2 Rn Rt1  mmP Rt1,Rt2, [Xn|SP, #imm]
   3312    */
   3313 
   3314    UInt insn_30_23 = INSN(30,23);
   3315    if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
   3316        || insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
   3317        || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
   3318       UInt bL     = INSN(22,22);
   3319       UInt bX     = INSN(31,31);
   3320       UInt bWBack = INSN(23,23);
   3321       UInt rT1    = INSN(4,0);
   3322       UInt rN     = INSN(9,5);
   3323       UInt rT2    = INSN(14,10);
   3324       Long simm7  = (Long)sx_to_64(INSN(21,15), 7);
   3325       if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
   3326           || (bL && rT1 == rT2)) {
   3327          /* undecodable; fall through */
   3328       } else {
   3329          if (rN == 31) { /* FIXME generate stack alignment check */ }
   3330 
   3331          // Compute the transfer address TA and the writeback address WA.
   3332          IRTemp tRN = newTemp(Ity_I64);
   3333          assign(tRN, getIReg64orSP(rN));
   3334          IRTemp tEA = newTemp(Ity_I64);
   3335          simm7 = (bX ? 8 : 4) * simm7;
   3336          assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
   3337 
   3338          IRTemp tTA = newTemp(Ity_I64);
   3339          IRTemp tWA = newTemp(Ity_I64);
   3340          switch (INSN(24,23)) {
   3341             case BITS2(0,1):
   3342                assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
   3343             case BITS2(1,1):
   3344                assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
   3345             case BITS2(1,0):
   3346                assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
   3347             default:
   3348                vassert(0); /* NOTREACHED */
   3349          }
   3350 
   3351          /* Normally rN would be updated after the transfer.  However, in
   3352             the special case typifed by
   3353                stp x29, x30, [sp,#-112]!
   3354             it is necessary to update SP before the transfer, (1)
   3355             because Memcheck will otherwise complain about a write
   3356             below the stack pointer, and (2) because the segfault
   3357             stack extension mechanism will otherwise extend the stack
   3358             only down to SP before the instruction, which might not be
   3359             far enough, if the -112 bit takes the actual access
   3360             address to the next page.
   3361          */
   3362          Bool earlyWBack
   3363            = bWBack && simm7 < 0
   3364              && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
   3365 
   3366          if (bWBack && earlyWBack)
   3367             putIReg64orSP(rN, mkexpr(tEA));
   3368 
   3369          /**/ if (bL == 1 && bX == 1) {
   3370             // 64 bit load
   3371             putIReg64orZR(rT1, loadLE(Ity_I64,
   3372                                       binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
   3373             putIReg64orZR(rT2, loadLE(Ity_I64,
   3374                                       binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
   3375          } else if (bL == 1 && bX == 0) {
   3376             // 32 bit load
   3377             putIReg32orZR(rT1, loadLE(Ity_I32,
   3378                                       binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
   3379             putIReg32orZR(rT2, loadLE(Ity_I32,
   3380                                       binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
   3381          } else if (bL == 0 && bX == 1) {
   3382             // 64 bit store
   3383             storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
   3384                     getIReg64orZR(rT1));
   3385             storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
   3386                     getIReg64orZR(rT2));
   3387          } else {
   3388             vassert(bL == 0 && bX == 0);
   3389             // 32 bit store
   3390             storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
   3391                     getIReg32orZR(rT1));
   3392             storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
   3393                     getIReg32orZR(rT2));
   3394          }
   3395 
   3396          if (bWBack && !earlyWBack)
   3397             putIReg64orSP(rN, mkexpr(tEA));
   3398 
   3399          const HChar* fmt_str = NULL;
   3400          switch (INSN(24,23)) {
   3401             case BITS2(0,1):
   3402                fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
   3403                break;
   3404             case BITS2(1,1):
   3405                fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
   3406                break;
   3407             case BITS2(1,0):
   3408                fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
   3409                break;
   3410             default:
   3411                vassert(0);
   3412          }
   3413          DIP(fmt_str, bL == 0 ? "st" : "ld",
   3414                       nameIRegOrZR(bX == 1, rT1),
   3415                       nameIRegOrZR(bX == 1, rT2),
   3416                       nameIReg64orSP(rN), simm7);
   3417          return True;
   3418       }
   3419    }
   3420 
   3421    /* ---------------- LDR (literal, int reg) ---------------- */
   3422    /* 31 29      23    4
   3423       00 011 000 imm19 Rt   LDR   Wt, [PC + sxTo64(imm19 << 2)]
   3424       01 011 000 imm19 Rt   LDR   Xt, [PC + sxTo64(imm19 << 2)]
   3425       10 011 000 imm19 Rt   LDRSW Xt, [PC + sxTo64(imm19 << 2)]
   3426       11 011 000 imm19 Rt   prefetch  [PC + sxTo64(imm19 << 2)]
   3427       Just handles the first two cases for now.
   3428    */
   3429    if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
   3430       UInt  imm19 = INSN(23,5);
   3431       UInt  rT    = INSN(4,0);
   3432       UInt  bX    = INSN(30,30);
   3433       ULong ea    = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
   3434       if (bX) {
   3435          putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
   3436       } else {
   3437          putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
   3438       }
   3439       DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
   3440       return True;
   3441    }
   3442 
   3443    /* -------------- {LD,ST}R (integer register) --------------- */
   3444    /* 31 29        20 15     12 11 9  4
   3445       |  |         |  |      |  |  |  |
   3446       11 111000011 Rm option S  10 Rn Rt  LDR  Xt, [Xn|SP, R<m>{ext/sh}]
   3447       10 111000011 Rm option S  10 Rn Rt  LDR  Wt, [Xn|SP, R<m>{ext/sh}]
   3448       01 111000011 Rm option S  10 Rn Rt  LDRH Wt, [Xn|SP, R<m>{ext/sh}]
   3449       00 111000011 Rm option S  10 Rn Rt  LDRB Wt, [Xn|SP, R<m>{ext/sh}]
   3450 
   3451       11 111000001 Rm option S  10 Rn Rt  STR  Xt, [Xn|SP, R<m>{ext/sh}]
   3452       10 111000001 Rm option S  10 Rn Rt  STR  Wt, [Xn|SP, R<m>{ext/sh}]
   3453       01 111000001 Rm option S  10 Rn Rt  STRH Wt, [Xn|SP, R<m>{ext/sh}]
   3454       00 111000001 Rm option S  10 Rn Rt  STRB Wt, [Xn|SP, R<m>{ext/sh}]
   3455    */
   3456    if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
   3457        && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
   3458       HChar  dis_buf[64];
   3459       UInt   szLg2 = INSN(31,30);
   3460       Bool   isLD  = INSN(22,22) == 1;
   3461       UInt   tt    = INSN(4,0);
   3462       IRTemp ea    = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
   3463       if (ea != IRTemp_INVALID) {
   3464          switch (szLg2) {
   3465             case 3: /* 64 bit */
   3466                if (isLD) {
   3467                   putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
   3468                   DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
   3469                } else {
   3470                   storeLE(mkexpr(ea), getIReg64orZR(tt));
   3471                   DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
   3472                }
   3473                break;
   3474             case 2: /* 32 bit */
   3475                if (isLD) {
   3476                   putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
   3477                   DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
   3478                } else {
   3479                   storeLE(mkexpr(ea), getIReg32orZR(tt));
   3480                   DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
   3481                }
   3482                break;
   3483             case 1: /* 16 bit */
   3484                if (isLD) {
   3485                   putIReg64orZR(tt, unop(Iop_16Uto64,
   3486                                          loadLE(Ity_I16, mkexpr(ea))));
   3487                   DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
   3488                } else {
   3489                   storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
   3490                   DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
   3491                }
   3492                break;
   3493             case 0: /* 8 bit */
   3494                if (isLD) {
   3495                   putIReg64orZR(tt, unop(Iop_8Uto64,
   3496                                          loadLE(Ity_I8, mkexpr(ea))));
   3497                   DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
   3498                } else {
   3499                   storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
   3500                   DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
   3501                }
   3502                break;
   3503             default:
   3504                vassert(0);
   3505          }
   3506          return True;
   3507       }
   3508    }
   3509 
   3510    /* -------------- LDRS{B,H,W} (uimm12) -------------- */
   3511    /* 31 29  26  23 21    9 4
   3512       10 111 001 10 imm12 n t   LDRSW Xt, [Xn|SP, #pimm12 * 4]
   3513       01 111 001 1x imm12 n t   LDRSH Rt, [Xn|SP, #pimm12 * 2]
   3514       00 111 001 1x imm12 n t   LDRSB Rt, [Xn|SP, #pimm12 * 1]
   3515       where
   3516          Rt is Wt when x==1, Xt when x==0
   3517    */
   3518    if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
   3519       /* Further checks on bits 31:30 and 22 */
   3520       Bool valid = False;
   3521       switch ((INSN(31,30) << 1) | INSN(22,22)) {
   3522          case BITS3(1,0,0):
   3523          case BITS3(0,1,0): case BITS3(0,1,1):
   3524          case BITS3(0,0,0): case BITS3(0,0,1):
   3525             valid = True;
   3526             break;
   3527       }
   3528       if (valid) {
   3529          UInt    szLg2 = INSN(31,30);
   3530          UInt    bitX  = INSN(22,22);
   3531          UInt    imm12 = INSN(21,10);
   3532          UInt    nn    = INSN(9,5);
   3533          UInt    tt    = INSN(4,0);
   3534          UInt    szB   = 1 << szLg2;
   3535          IRExpr* ea    = binop(Iop_Add64,
   3536                                getIReg64orSP(nn), mkU64(imm12 * szB));
   3537          switch (szB) {
   3538             case 4:
   3539                vassert(bitX == 0);
   3540                putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
   3541                DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
   3542                    nameIReg64orSP(nn), imm12 * szB);
   3543                break;
   3544             case 2:
   3545                if (bitX == 1) {
   3546                   putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
   3547                } else {
   3548                   putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
   3549                }
   3550                DIP("ldrsh %s, [%s, #%u]\n",
   3551                    nameIRegOrZR(bitX == 0, tt),
   3552                    nameIReg64orSP(nn), imm12 * szB);
   3553                break;
   3554             case 1:
   3555                if (bitX == 1) {
   3556                   putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
   3557                } else {
   3558                   putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
   3559                }
   3560                DIP("ldrsb %s, [%s, #%u]\n",
   3561                    nameIRegOrZR(bitX == 0, tt),
   3562                    nameIReg64orSP(nn), imm12 * szB);
   3563                break;
   3564             default:
   3565                vassert(0);
   3566          }
   3567          return True;
   3568       }
   3569       /* else fall through */
   3570    }
   3571 
   3572    /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
   3573    /* (at-Rn-then-Rn=EA)
   3574       31 29      23 21 20   11 9 4
   3575       00 111 000 1x 0  imm9 01 n t  LDRSB Rt, [Xn|SP], #simm9
   3576       01 111 000 1x 0  imm9 01 n t  LDRSH Rt, [Xn|SP], #simm9
   3577       10 111 000 10 0  imm9 01 n t  LDRSW Xt, [Xn|SP], #simm9
   3578 
   3579       (at-EA-then-Rn=EA)
   3580       00 111 000 1x 0  imm9 11 n t  LDRSB Rt, [Xn|SP, #simm9]!
   3581       01 111 000 1x 0  imm9 11 n t  LDRSH Rt, [Xn|SP, #simm9]!
   3582       10 111 000 10 0  imm9 11 n t  LDRSW Xt, [Xn|SP, #simm9]!
   3583       where
   3584          Rt is Wt when x==1, Xt when x==0
   3585          transfer-at-Rn when [11]==0, at EA when [11]==1
   3586    */
   3587    if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
   3588        && INSN(21,21) == 0 && INSN(10,10) == 1) {
   3589       /* Further checks on bits 31:30 and 22 */
   3590       Bool valid = False;
   3591       switch ((INSN(31,30) << 1) | INSN(22,22)) {
   3592          case BITS3(1,0,0):                    // LDRSW Xt
   3593          case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
   3594          case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
   3595             valid = True;
   3596             break;
   3597       }
   3598       if (valid) {
   3599          UInt   szLg2 = INSN(31,30);
   3600          UInt   imm9  = INSN(20,12);
   3601          Bool   atRN  = INSN(11,11) == 0;
   3602          UInt   nn    = INSN(9,5);
   3603          UInt   tt    = INSN(4,0);
   3604          IRTemp tRN   = newTemp(Ity_I64);
   3605          IRTemp tEA   = newTemp(Ity_I64);
   3606          IRTemp tTA   = IRTemp_INVALID;
   3607          ULong  simm9 = sx_to_64(imm9, 9);
   3608          Bool   is64  = INSN(22,22) == 0;
   3609          assign(tRN, getIReg64orSP(nn));
   3610          assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
   3611          tTA = atRN ? tRN : tEA;
   3612          HChar ch = '?';
   3613          /* There are 5 cases:
   3614                byte     load,           SX to 64
   3615                byte     load, SX to 32, ZX to 64
   3616                halfword load,           SX to 64
   3617                halfword load, SX to 32, ZX to 64
   3618                word     load,           SX to 64
   3619             The ifs below handle them in the listed order.
   3620          */
   3621          if (szLg2 == 0) {
   3622             ch = 'b';
   3623             if (is64) {
   3624                putIReg64orZR(tt, unop(Iop_8Sto64,
   3625                                       loadLE(Ity_I8, mkexpr(tTA))));
   3626             } else {
   3627                putIReg32orZR(tt, unop(Iop_8Sto32,
   3628                                       loadLE(Ity_I8, mkexpr(tTA))));
   3629             }
   3630          }
   3631          else if (szLg2 == 1) {
   3632             ch = 'h';
   3633             if (is64) {
   3634                putIReg64orZR(tt, unop(Iop_16Sto64,
   3635                                       loadLE(Ity_I16, mkexpr(tTA))));
   3636             } else {
   3637                putIReg32orZR(tt, unop(Iop_16Sto32,
   3638                                       loadLE(Ity_I16, mkexpr(tTA))));
   3639             }
   3640          }
   3641          else if (szLg2 == 2 && is64) {
   3642             ch = 'w';
   3643             putIReg64orZR(tt, unop(Iop_32Sto64,
   3644                                    loadLE(Ity_I32, mkexpr(tTA))));
   3645          }
   3646          else {
   3647             vassert(0);
   3648          }
   3649          putIReg64orSP(nn, mkexpr(tEA));
   3650          DIP(atRN ? "ldrs%c %s, [%s], #%lld\n" : "ldrs%c %s, [%s, #%lld]!",
   3651              ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
   3652          return True;
   3653       }
   3654       /* else fall through */
   3655    }
   3656 
   3657    /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
   3658    /* 31 29      23 21 20   11 9 4
   3659       00 111 000 1x 0  imm9 00 n t  LDURSB Rt, [Xn|SP, #simm9]
   3660       01 111 000 1x 0  imm9 00 n t  LDURSH Rt, [Xn|SP, #simm9]
   3661       10 111 000 10 0  imm9 00 n t  LDURSW Xt, [Xn|SP, #simm9]
   3662       where
   3663          Rt is Wt when x==1, Xt when x==0
   3664    */
   3665    if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
   3666        && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
   3667       /* Further checks on bits 31:30 and 22 */
   3668       Bool valid = False;
   3669       switch ((INSN(31,30) << 1) | INSN(22,22)) {
   3670          case BITS3(1,0,0):                    // LDURSW Xt
   3671          case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
   3672          case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
   3673             valid = True;
   3674             break;
   3675       }
   3676       if (valid) {
   3677          UInt   szLg2 = INSN(31,30);
   3678          UInt   imm9  = INSN(20,12);
   3679          UInt   nn    = INSN(9,5);
   3680          UInt   tt    = INSN(4,0);
   3681          IRTemp tRN   = newTemp(Ity_I64);
   3682          IRTemp tEA   = newTemp(Ity_I64);
   3683          ULong  simm9 = sx_to_64(imm9, 9);
   3684          Bool   is64  = INSN(22,22) == 0;
   3685          assign(tRN, getIReg64orSP(nn));
   3686          assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
   3687          HChar ch = '?';
   3688          /* There are 5 cases:
   3689                byte     load,           SX to 64
   3690                byte     load, SX to 32, ZX to 64
   3691                halfword load,           SX to 64
   3692                halfword load, SX to 32, ZX to 64
   3693                word     load,           SX to 64
   3694             The ifs below handle them in the listed order.
   3695          */
   3696          if (szLg2 == 0) {
   3697             ch = 'b';
   3698             if (is64) {
   3699                putIReg64orZR(tt, unop(Iop_8Sto64,
   3700                                       loadLE(Ity_I8, mkexpr(tEA))));
   3701             } else {
   3702                putIReg32orZR(tt, unop(Iop_8Sto32,
   3703                                       loadLE(Ity_I8, mkexpr(tEA))));
   3704             }
   3705          }
   3706          else if (szLg2 == 1) {
   3707             ch = 'h';
   3708             if (is64) {
   3709                putIReg64orZR(tt, unop(Iop_16Sto64,
   3710                                       loadLE(Ity_I16, mkexpr(tEA))));
   3711             } else {
   3712                putIReg32orZR(tt, unop(Iop_16Sto32,
   3713                                       loadLE(Ity_I16, mkexpr(tEA))));
   3714             }
   3715          }
   3716          else if (szLg2 == 2 && is64) {
   3717             ch = 'w';
   3718             putIReg64orZR(tt, unop(Iop_32Sto64,
   3719                                    loadLE(Ity_I32, mkexpr(tEA))));
   3720          }
   3721          else {
   3722             vassert(0);
   3723          }
   3724          DIP("ldurs%c %s, [%s, #%lld]",
   3725              ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
   3726          return True;
   3727       }
   3728       /* else fall through */
   3729    }
   3730 
   3731    /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
   3732    /* L==1    => mm==LD
   3733       L==0    => mm==ST
   3734       sz==00  => 32 bit (S) transfers
   3735       sz==01  => 64 bit (D) transfers
   3736       sz==10  => 128 bit (Q) transfers
   3737       sz==11  isn't allowed
   3738       simm7 is scaled by the (single-register) transfer size
   3739 
   3740       31 29       22 21   14 9 4
   3741       sz 101 1001 L  imm7 t2 n t1   mmP SDQt1, SDQt2, [Xn|SP], #imm
   3742       (at-Rn-then-Rn=EA)
   3743 
   3744       sz 101 1011 L  imm7 t2 n t1   mmP SDQt1, SDQt2, [Xn|SP, #imm]!
   3745       (at-EA-then-Rn=EA)
   3746 
   3747       sz 101 1010 L  imm7 t2 n t1   mmP SDQt1, SDQt2, [Xn|SP, #imm]
   3748       (at-EA)
   3749    */
   3750 
   3751    UInt insn_29_23 = INSN(29,23);
   3752    if (insn_29_23 == BITS7(1,0,1,1,0,0,1)
   3753        || insn_29_23 == BITS7(1,0,1,1,0,1,1)
   3754        || insn_29_23 == BITS7(1,0,1,1,0,1,0)) {
   3755       UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
   3756       Bool isLD   = INSN(22,22) == 1;
   3757       Bool wBack  = INSN(23,23) == 1;
   3758       Long simm7  = (Long)sx_to_64(INSN(21,15), 7);
   3759       UInt tt2    = INSN(14,10);
   3760       UInt nn     = INSN(9,5);
   3761       UInt tt1    = INSN(4,0);
   3762       if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) {
   3763          /* undecodable; fall through */
   3764       } else {
   3765          if (nn == 31) { /* FIXME generate stack alignment check */ }
   3766 
   3767          // Compute the transfer address TA and the writeback address WA.
   3768          UInt   szB = 4 << szSlg2; /* szB is the per-register size */
   3769          IRTemp tRN = newTemp(Ity_I64);
   3770          assign(tRN, getIReg64orSP(nn));
   3771          IRTemp tEA = newTemp(Ity_I64);
   3772          simm7 = szB * simm7;
   3773          assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
   3774 
   3775          IRTemp tTA = newTemp(Ity_I64);
   3776          IRTemp tWA = newTemp(Ity_I64);
   3777          switch (INSN(24,23)) {
   3778             case BITS2(0,1):
   3779                assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
   3780             case BITS2(1,1):
   3781                assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
   3782             case BITS2(1,0):
   3783                assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
   3784             default:
   3785                vassert(0); /* NOTREACHED */
   3786          }
   3787 
   3788          IRType ty = Ity_INVALID;
   3789          switch (szB) {
   3790             case 4:  ty = Ity_F32;  break;
   3791             case 8:  ty = Ity_F64;  break;
   3792             case 16: ty = Ity_V128; break;
   3793             default: vassert(0);
   3794          }
   3795 
   3796          /* Normally rN would be updated after the transfer.  However, in
   3797             the special cases typifed by
   3798                stp q0, q1, [sp,#-512]!
   3799                stp d0, d1, [sp,#-512]!
   3800                stp s0, s1, [sp,#-512]!
   3801             it is necessary to update SP before the transfer, (1)
   3802             because Memcheck will otherwise complain about a write
   3803             below the stack pointer, and (2) because the segfault
   3804             stack extension mechanism will otherwise extend the stack
   3805             only down to SP before the instruction, which might not be
   3806             far enough, if the -512 bit takes the actual access
   3807             address to the next page.
   3808          */
   3809          Bool earlyWBack
   3810            = wBack && simm7 < 0
   3811              && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
   3812 
   3813          if (wBack && earlyWBack)
   3814             putIReg64orSP(nn, mkexpr(tEA));
   3815 
   3816          if (isLD) {
   3817             if (szB < 16) {
   3818                putQReg128(tt1, mkV128(0x0000));
   3819             }
   3820             putQRegLO(tt1,
   3821                       loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
   3822             if (szB < 16) {
   3823                putQReg128(tt2, mkV128(0x0000));
   3824             }
   3825             putQRegLO(tt2,
   3826                       loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
   3827          } else {
   3828             storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
   3829                     getQRegLO(tt1, ty));
   3830             storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
   3831                     getQRegLO(tt2, ty));
   3832          }
   3833 
   3834          if (wBack && !earlyWBack)
   3835             putIReg64orSP(nn, mkexpr(tEA));
   3836 
   3837          const HChar* fmt_str = NULL;
   3838          switch (INSN(24,23)) {
   3839             case BITS2(0,1):
   3840                fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
   3841                break;
   3842             case BITS2(1,1):
   3843                fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
   3844                break;
   3845             case BITS2(1,0):
   3846                fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
   3847                break;
   3848             default:
   3849                vassert(0);
   3850          }
   3851          DIP(fmt_str, isLD ? "ld" : "st",
   3852                       nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
   3853                       nameIReg64orSP(nn), simm7);
   3854          return True;
   3855       }
   3856    }
   3857 
   3858    /* -------------- {LD,ST}R (vector register) --------------- */
   3859    /* 31 29     23  20 15     12 11 9  4
   3860       |  |      |   |  |      |  |  |  |
   3861       00 111100 011 Rm option S  10 Rn Rt  LDR Bt, [Xn|SP, R<m>{ext/sh}]
   3862       01 111100 011 Rm option S  10 Rn Rt  LDR Ht, [Xn|SP, R<m>{ext/sh}]
   3863       10 111100 011 Rm option S  10 Rn Rt  LDR St, [Xn|SP, R<m>{ext/sh}]
   3864       11 111100 011 Rm option S  10 Rn Rt  LDR Dt, [Xn|SP, R<m>{ext/sh}]
   3865       00 111100 111 Rm option S  10 Rn Rt  LDR Qt, [Xn|SP, R<m>{ext/sh}]
   3866 
   3867       00 111100 001 Rm option S  10 Rn Rt  STR Bt, [Xn|SP, R<m>{ext/sh}]
   3868       01 111100 001 Rm option S  10 Rn Rt  STR Ht, [Xn|SP, R<m>{ext/sh}]
   3869       10 111100 001 Rm option S  10 Rn Rt  STR St, [Xn|SP, R<m>{ext/sh}]
   3870       11 111100 001 Rm option S  10 Rn Rt  STR Dt, [Xn|SP, R<m>{ext/sh}]
   3871       00 111100 101 Rm option S  10 Rn Rt  STR Qt, [Xn|SP, R<m>{ext/sh}]
   3872    */
   3873    if (INSN(29,24) == BITS6(1,1,1,1,0,0)
   3874        && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
   3875       HChar  dis_buf[64];
   3876       UInt   szLg2 = (INSN(23,23) << 2) | INSN(31,30);
   3877       Bool   isLD  = INSN(22,22) == 1;
   3878       UInt   tt    = INSN(4,0);
   3879       if (szLg2 >= 4) goto after_LDR_STR_vector_register;
   3880       IRTemp ea    = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/);
   3881       if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
   3882       switch (szLg2) {
   3883          case 0: /* 8 bit */
   3884             if (isLD) {
   3885                putQReg128(tt, mkV128(0x0000));
   3886                putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
   3887                DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
   3888             } else {
   3889                vassert(0); //ATC
   3890                storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
   3891                DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
   3892             }
   3893             break;
   3894          case 1:
   3895             if (isLD) {
   3896                putQReg128(tt, mkV128(0x0000));
   3897                putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
   3898                DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
   3899             } else {
   3900                vassert(0); //ATC
   3901                storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
   3902                DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
   3903             }
   3904             break;
   3905          case 2: /* 32 bit */
   3906             if (isLD) {
   3907                putQReg128(tt, mkV128(0x0000));
   3908                putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
   3909                DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
   3910             } else {
   3911                storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
   3912                DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
   3913             }
   3914             break;
   3915          case 3: /* 64 bit */
   3916             if (isLD) {
   3917                putQReg128(tt, mkV128(0x0000));
   3918                putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
   3919                DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
   3920             } else {
   3921                storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
   3922                DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
   3923             }
   3924             break;
   3925          case 4:  return False; //ATC
   3926          default: vassert(0);
   3927       }
   3928       return True;
   3929    }
   3930   after_LDR_STR_vector_register:
   3931 
   3932    /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
   3933    /* 31 29      22 20 15  12 11 9  4
   3934       |  |       |  |  |   |  |  |  |
   3935       10 1110001 01 Rm opt S 10 Rn Rt    LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
   3936 
   3937       01 1110001 01 Rm opt S 10 Rn Rt    LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
   3938       01 1110001 11 Rm opt S 10 Rn Rt    LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
   3939 
   3940       00 1110001 01 Rm opt S 10 Rn Rt    LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
   3941       00 1110001 11 Rm opt S 10 Rn Rt    LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
   3942    */
   3943    if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
   3944        && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
   3945       HChar  dis_buf[64];
   3946       UInt   szLg2  = INSN(31,30);
   3947       Bool   sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
   3948       UInt   tt     = INSN(4,0);
   3949       if (szLg2 == 3) goto after_LDRS_integer_register;
   3950       IRTemp ea     = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
   3951       if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
   3952       /* Enumerate the 5 variants explicitly. */
   3953       if (szLg2 == 2/*32 bit*/ && sxTo64) {
   3954          putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
   3955          DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
   3956          return True;
   3957       }
   3958       else
   3959       if (szLg2 == 1/*16 bit*/) {
   3960          if (sxTo64) {
   3961             putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
   3962             DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
   3963          } else {
   3964             putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
   3965             DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
   3966          }
   3967          return True;
   3968       }
   3969       else
   3970       if (szLg2 == 0/*8 bit*/) {
   3971          if (sxTo64) {
   3972             putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
   3973             DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
   3974          } else {
   3975             putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
   3976             DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
   3977          }
   3978          return True;
   3979       }
   3980       /* else it's an invalid combination */
   3981    }
   3982   after_LDRS_integer_register:
   3983 
   3984    /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
   3985    /* This is the Unsigned offset variant only.  The Post-Index and
   3986       Pre-Index variants are below.
   3987 
   3988       31 29      23 21    9 4
   3989       00 111 101 01 imm12 n t   LDR Bt, [Xn|SP + imm12 * 1]
   3990       01 111 101 01 imm12 n t   LDR Ht, [Xn|SP + imm12 * 2]
   3991       10 111 101 01 imm12 n t   LDR St, [Xn|SP + imm12 * 4]
   3992       11 111 101 01 imm12 n t   LDR Dt, [Xn|SP + imm12 * 8]
   3993       00 111 101 11 imm12 n t   LDR Qt, [Xn|SP + imm12 * 16]
   3994 
   3995       00 111 101 00 imm12 n t   STR Bt, [Xn|SP + imm12 * 1]
   3996       01 111 101 00 imm12 n t   STR Ht, [Xn|SP + imm12 * 2]
   3997       10 111 101 00 imm12 n t   STR St, [Xn|SP + imm12 * 4]
   3998       11 111 101 00 imm12 n t   STR Dt, [Xn|SP + imm12 * 8]
   3999       00 111 101 10 imm12 n t   STR Qt, [Xn|SP + imm12 * 16]
   4000    */
   4001    if (INSN(29,24) == BITS6(1,1,1,1,0,1)
   4002        && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
   4003       UInt   szLg2  = (INSN(23,23) << 2) | INSN(31,30);
   4004       Bool   isLD   = INSN(22,22) == 1;
   4005       UInt   pimm12 = INSN(21,10) << szLg2;
   4006       UInt   nn     = INSN(9,5);
   4007       UInt   tt     = INSN(4,0);
   4008       IRTemp tEA    = newTemp(Ity_I64);
   4009       IRType ty     = preferredVectorSubTypeFromSize(1 << szLg2);
   4010       assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
   4011       if (isLD) {
   4012          if (szLg2 < 4) {
   4013             putQReg128(tt, mkV128(0x0000));
   4014          }
   4015          putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
   4016       } else {
   4017          storeLE(mkexpr(tEA), getQRegLO(tt, ty));
   4018       }
   4019       DIP("%s %s, [%s, #%u]\n",
   4020           isLD ? "ldr" : "str",
   4021           nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
   4022       return True;
   4023    }
   4024 
   4025    /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
   4026    /* These are the Post-Index and Pre-Index variants.
   4027 
   4028       31 29      23   20   11 9 4
   4029       (at-Rn-then-Rn=EA)
   4030       00 111 100 01 0 imm9 01 n t   LDR Bt, [Xn|SP], #simm
   4031       01 111 100 01 0 imm9 01 n t   LDR Ht, [Xn|SP], #simm
   4032       10 111 100 01 0 imm9 01 n t   LDR St, [Xn|SP], #simm
   4033       11 111 100 01 0 imm9 01 n t   LDR Dt, [Xn|SP], #simm
   4034       00 111 100 11 0 imm9 01 n t   LDR Qt, [Xn|SP], #simm
   4035 
   4036       (at-EA-then-Rn=EA)
   4037       00 111 100 01 0 imm9 11 n t   LDR Bt, [Xn|SP, #simm]!
   4038       01 111 100 01 0 imm9 11 n t   LDR Ht, [Xn|SP, #simm]!
   4039       10 111 100 01 0 imm9 11 n t   LDR St, [Xn|SP, #simm]!
   4040       11 111 100 01 0 imm9 11 n t   LDR Dt, [Xn|SP, #simm]!
   4041       00 111 100 11 0 imm9 11 n t   LDR Qt, [Xn|SP, #simm]!
   4042 
   4043       Stores are the same except with bit 22 set to 0.
   4044    */
   4045    if (INSN(29,24) == BITS6(1,1,1,1,0,0)
   4046        && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
   4047        && INSN(21,21) == 0 && INSN(10,10) == 1) {
   4048       UInt   szLg2  = (INSN(23,23) << 2) | INSN(31,30);
   4049       Bool   isLD   = INSN(22,22) == 1;
   4050       UInt   imm9   = INSN(20,12);
   4051       Bool   atRN   = INSN(11,11) == 0;
   4052       UInt   nn     = INSN(9,5);
   4053       UInt   tt     = INSN(4,0);
   4054       IRTemp tRN    = newTemp(Ity_I64);
   4055       IRTemp tEA    = newTemp(Ity_I64);
   4056       IRTemp tTA    = IRTemp_INVALID;
   4057       IRType ty     = preferredVectorSubTypeFromSize(1 << szLg2);
   4058       ULong  simm9  = sx_to_64(imm9, 9);
   4059       assign(tRN, getIReg64orSP(nn));
   4060       assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
   4061       tTA = atRN ? tRN : tEA;
   4062       if (isLD) {
   4063          if (szLg2 < 4) {
   4064             putQReg128(tt, mkV128(0x0000));
   4065          }
   4066          putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
   4067       } else {
   4068          storeLE(mkexpr(tTA), getQRegLO(tt, ty));
   4069       }
   4070       putIReg64orSP(nn, mkexpr(tEA));
   4071       DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
   4072           isLD ? "ldr" : "str",
   4073           nameQRegLO(tt, ty), nameIReg64orSP(nn), simm9);
   4074       return True;
   4075    }
   4076 
   4077    /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
   4078    /* 31 29      23   20   11 9 4
   4079       00 111 100 01 0 imm9 00 n t   LDR Bt, [Xn|SP, #simm]
   4080       01 111 100 01 0 imm9 00 n t   LDR Ht, [Xn|SP, #simm]
   4081       10 111 100 01 0 imm9 00 n t   LDR St, [Xn|SP, #simm]
   4082       11 111 100 01 0 imm9 00 n t   LDR Dt, [Xn|SP, #simm]
   4083       00 111 100 11 0 imm9 00 n t   LDR Qt, [Xn|SP, #simm]
   4084 
   4085       00 111 100 00 0 imm9 00 n t   STR Bt, [Xn|SP, #simm]
   4086       01 111 100 00 0 imm9 00 n t   STR Ht, [Xn|SP, #simm]
   4087       10 111 100 00 0 imm9 00 n t   STR St, [Xn|SP, #simm]
   4088       11 111 100 00 0 imm9 00 n t   STR Dt, [Xn|SP, #simm]
   4089       00 111 100 10 0 imm9 00 n t   STR Qt, [Xn|SP, #simm]
   4090    */
   4091    if (INSN(29,24) == BITS6(1,1,1,1,0,0)
   4092        && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
   4093        && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
   4094       UInt   szLg2  = (INSN(23,23) << 2) | INSN(31,30);
   4095       Bool   isLD   = INSN(22,22) == 1;
   4096       UInt   imm9   = INSN(20,12);
   4097       UInt   nn     = INSN(9,5);
   4098       UInt   tt     = INSN(4,0);
   4099       ULong  simm9  = sx_to_64(imm9, 9);
   4100       IRTemp tEA    = newTemp(Ity_I64);
   4101       IRType ty     = preferredVectorSubTypeFromSize(1 << szLg2);
   4102       assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
   4103       if (isLD) {
   4104          if (szLg2 < 4) {
   4105             putQReg128(tt, mkV128(0x0000));
   4106          }
   4107          putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
   4108       } else {
   4109          storeLE(mkexpr(tEA), getQRegLO(tt, ty));
   4110       }
   4111       DIP("%s %s, [%s, #%lld]\n",
   4112           isLD ? "ldur" : "stur",
   4113           nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
   4114       return True;
   4115    }
   4116 
   4117    /* ---------------- LDR (literal, SIMD&FP) ---------------- */
   4118    /* 31 29      23    4
   4119       00 011 100 imm19 t    LDR St, [PC + sxTo64(imm19 << 2)]
   4120       01 011 100 imm19 t    LDR Dt, [PC + sxTo64(imm19 << 2)]
   4121       10 011 100 imm19 t    LDR Qt, [PC + sxTo64(imm19 << 2)]
   4122    */
   4123    if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
   4124       UInt   szB   = 4 << INSN(31,30);
   4125       UInt   imm19 = INSN(23,5);
   4126       UInt   tt    = INSN(4,0);
   4127       ULong  ea    = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
   4128       IRType ty    = preferredVectorSubTypeFromSize(szB);
   4129       putQReg128(tt, mkV128(0x0000));
   4130       putQRegLO(tt, loadLE(ty, mkU64(ea)));
   4131       DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
   4132       return True;
   4133    }
   4134 
   4135    /* ---------- LD1/ST1 (single structure, no offset) ---------- */
   4136    /* 31        23        15
   4137       0Q00 1101 0L00 0000 xx0S sz N T
   4138                           ----
   4139                           opcode
   4140       1011 1111 1011 1111 0010 00 0 0 <- mask
   4141       0000 1101 0000 0000 0000 00 0 0 <- result
   4142 
   4143       FIXME does this assume that the host is little endian?
   4144    */
   4145 
   4146    if ((insn & 0xBFBF2000) == 0x0D000000) {
   4147       Bool   isLD = INSN(22,22) == 1;
   4148       UInt   rN   = INSN(9,5);
   4149       UInt   vT   = INSN(4,0);
   4150       UInt   q    = INSN(30, 30);
   4151       UInt   xx   = INSN(15, 14);
   4152       UInt   opcode = INSN(15, 13);
   4153       UInt   s    = INSN(12, 12);
   4154       UInt   sz   = INSN(11, 10);
   4155 
   4156       UInt   index = (q << 3) | (s << 2) | sz;
   4157       const HChar* name = "";
   4158       Bool   valid = False;
   4159       IRType laneTy = Ity_I8;
   4160 
   4161       if (opcode == 0x0) { // 8 bit variant
   4162          name = "b";
   4163          valid = True;
   4164       } else if (opcode == 0x2 && (sz & 1) == 0) { // 16 bit variant
   4165          name = "h";
   4166          laneTy = Ity_I16;
   4167          index >>= 1;
   4168          valid = True;
   4169       } else if (opcode == 0x4 && sz == 0x0) { // 32 bit variant
   4170          name = "s";
   4171          laneTy = Ity_I32;
   4172          index >>= 2;
   4173          valid = True;
   4174       } else if (opcode == 0x4 && sz == 0x1 && s == 0) { // 64 bit variant
   4175          name = "d";
   4176          laneTy = Ity_I64;
   4177          index >>= 3;
   4178          valid = True;
   4179       }
   4180 
   4181       if (valid) {
   4182          IRTemp tEA  = newTemp(Ity_I64);
   4183          assign(tEA, getIReg64orSP(rN));
   4184          if (rN == 31) { /* FIXME generate stack alignment check */ }
   4185          if (isLD) {
   4186             putQRegLane(vT, index, loadLE(laneTy, mkexpr(tEA)));
   4187          } else {
   4188             storeLE(mkexpr(tEA), getQRegLane(vT, index, laneTy));
   4189          }
   4190 
   4191          DIP("%s {v%u.%s}[%d], [%s]\n", isLD ? "ld1" : "st1",
   4192              vT, name, index, nameIReg64orSP(rN));
   4193          return True;
   4194       }
   4195 
   4196    }
   4197 
   4198 
   4199    /* ---------- LD1/ST1 (multiple structure, no offset, one register variant) ---------- */
   4200    /* 31        23
   4201       0100 1100 0100 0000 0111 11 N T   LD1 {vT.2d},  [Xn|SP]
   4202       0100 1100 0000 0000 0111 11 N T   ST1 {vT.2d},  [Xn|SP]
   4203       0100 1100 0100 0000 0111 10 N T   LD1 {vT.4s},  [Xn|SP]
   4204       0100 1100 0000 0000 0111 10 N T   ST1 {vT.4s},  [Xn|SP]
   4205       0100 1100 0100 0000 0111 01 N T   LD1 {vT.8h},  [Xn|SP]
   4206       0100 1100 0000 0000 0111 01 N T   ST1 {vT.8h},  [Xn|SP]
   4207       0100 1100 0100 0000 0111 00 N T   LD1 {vT.16b}, [Xn|SP]
   4208       0100 1100 0000 0000 0111 00 N T   ST1 {vT.16b}, [Xn|SP]
   4209       FIXME does this assume that the host is little endian?
   4210    */
   4211    if (   (insn & 0xFFFFF000) == 0x4C407000 // LD1 cases
   4212        || (insn & 0xFFFFF000) == 0x4C007000 // ST1 cases
   4213       ) {
   4214       Bool   isLD = INSN(22,22) == 1;
   4215       UInt   rN   = INSN(9,5);
   4216       UInt   vT   = INSN(4,0);
   4217       IRTemp tEA  = newTemp(Ity_I64);
   4218       const HChar* names[4] = { "2d", "4s", "8h", "16b" };
   4219       const HChar* name = names[INSN(11,10)];
   4220       assign(tEA, getIReg64orSP(rN));
   4221       if (rN == 31) { /* FIXME generate stack alignment check */ }
   4222       if (isLD) {
   4223          putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA)));
   4224       } else {
   4225          storeLE(mkexpr(tEA), getQReg128(vT));
   4226       }
   4227       DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
   4228           vT, name, nameIReg64orSP(rN));
   4229       return True;
   4230    }
   4231 
   4232    /* 31        23
   4233       0000 1100 0100 0000 0111 11 N T   LD1 {vT.1d}, [Xn|SP]
   4234       0000 1100 0000 0000 0111 11 N T   ST1 {vT.1d}, [Xn|SP]
   4235       0000 1100 0100 0000 0111 10 N T   LD1 {vT.2s}, [Xn|SP]
   4236       0000 1100 0000 0000 0111 10 N T   ST1 {vT.2s}, [Xn|SP]
   4237       0000 1100 0100 0000 0111 01 N T   LD1 {vT.4h}, [Xn|SP]
   4238       0000 1100 0000 0000 0111 01 N T   ST1 {vT.4h}, [Xn|SP]
   4239       0000 1100 0100 0000 0111 00 N T   LD1 {vT.8b}, [Xn|SP]
   4240       0000 1100 0000 0000 0111 00 N T   ST1 {vT.8b}, [Xn|SP]
   4241       FIXME does this assume that the host is little endian?
   4242    */
   4243    if (   (insn & 0xFFFFF000) == 0x0C407000 // LD1 cases
   4244        || (insn & 0xFFFFF000) == 0x0C007000 // ST1 cases
   4245       ) {
   4246       Bool   isLD = INSN(22,22) == 1;
   4247       UInt   rN   = INSN(9,5);
   4248       UInt   vT   = INSN(4,0);
   4249       IRTemp tEA  = newTemp(Ity_I64);
   4250       const HChar* names[4] = { "1d", "2s", "4h", "8b" };
   4251       const HChar* name = names[INSN(11,10)];
   4252       assign(tEA, getIReg64orSP(rN));
   4253       if (rN == 31) { /* FIXME generate stack alignment check */ }
   4254       if (isLD) {
   4255          putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA)));
   4256          putQRegLane(vT, 1, mkU64(0));
   4257       } else {
   4258          storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64));
   4259       }
   4260       DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
   4261           vT, name, nameIReg64orSP(rN));
   4262       return True;
   4263    }
   4264 
   4265    /* ---------- LD1/ST1 (multiple structure, post-index, one register variant) ---------- */
   4266    /* 31        23
   4267       0100 1100 1001 1111 0111 11 N T  ST1 {vT.2d},  [xN|SP], #16
   4268       0100 1100 1101 1111 0111 11 N T  LD1 {vT.2d},  [xN|SP], #16
   4269       0100 1100 1001 1111 0111 10 N T  ST1 {vT.4s},  [xN|SP], #16
   4270       0100 1100 1101 1111 0111 10 N T  LD1 {vT.4s},  [xN|SP], #16
   4271       0100 1100 1001 1111 0111 01 N T  ST1 {vT.8h},  [xN|SP], #16
   4272       0100 1100 1101 1111 0111 01 N T  LD1 {vT.8h},  [xN|SP], #16
   4273       0100 1100 1001 1111 0111 00 N T  ST1 {vT.16b}, [xN|SP], #16
   4274       0100 1100 1101 1111 0111 00 N T  LD1 {vT.16b}, [xN|SP], #16
   4275       Note that #16 is implied and cannot be any other value.
   4276       FIXME does this assume that the host is little endian?
   4277    */
   4278    if (   (insn & 0xFFFFF000) == 0x4CDF7000 // LD1 cases
   4279        || (insn & 0xFFFFF000) == 0x4C9F7000 // ST1 cases
   4280       ) {
   4281       Bool   isLD = INSN(22,22) == 1;
   4282       UInt   rN   = INSN(9,5);
   4283       UInt   vT   = INSN(4,0);
   4284       IRTemp tEA  = newTemp(Ity_I64);
   4285       const HChar* names[4] = { "2d", "4s", "8h", "16b" };
   4286       const HChar* name = names[INSN(11,10)];
   4287       assign(tEA, getIReg64orSP(rN));
   4288       if (rN == 31) { /* FIXME generate stack alignment check */ }
   4289       if (isLD) {
   4290          putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA)));
   4291       } else {
   4292          storeLE(mkexpr(tEA), getQReg128(vT));
   4293       }
   4294       putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(16)));
   4295       DIP("%s {v%u.%s}, [%s], #16\n", isLD ? "ld1" : "st1",
   4296           vT, name, nameIReg64orSP(rN));
   4297       return True;
   4298    }
   4299 
   4300    /* 31        23
   4301       0000 1100 1001 1111 0111 11 N T  ST1 {vT.1d}, [xN|SP], #8
   4302       0000 1100 1101 1111 0111 11 N T  LD1 {vT.1d}, [xN|SP], #8
   4303       0000 1100 1001 1111 0111 10 N T  ST1 {vT.2s}, [xN|SP], #8
   4304       0000 1100 1101 1111 0111 10 N T  LD1 {vT.2s}, [xN|SP], #8
   4305       0000 1100 1001 1111 0111 01 N T  ST1 {vT.4h}, [xN|SP], #8
   4306       0000 1100 1101 1111 0111 01 N T  LD1 {vT.4h}, [xN|SP], #8
   4307       0000 1100 1001 1111 0111 00 N T  ST1 {vT.8b}, [xN|SP], #8
   4308       0000 1100 1101 1111 0111 00 N T  LD1 {vT.8b}, [xN|SP], #8
   4309       Note that #8 is implied and cannot be any other value.
   4310       FIXME does this assume that the host is little endian?
   4311    */
   4312    if (   (insn & 0xFFFFF000) == 0x0CDF7000 // LD1 cases
   4313        || (insn & 0xFFFFF000) == 0x0C9F7000 // ST1 cases
   4314       ) {
   4315       Bool   isLD = INSN(22,22) == 1;
   4316       UInt   rN  = INSN(9,5);
   4317       UInt   vT  = INSN(4,0);
   4318       IRTemp tEA = newTemp(Ity_I64);
   4319       const HChar* names[4] = { "1d", "2s", "4h", "8b" };
   4320       const HChar* name = names[INSN(11,10)];
   4321       assign(tEA, getIReg64orSP(rN));
   4322       if (rN == 31) { /* FIXME generate stack alignment check */ }
   4323       if (isLD) {
   4324          putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA)));
   4325          putQRegLane(vT, 1, mkU64(0));
   4326       } else {
   4327          storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64));
   4328       }
   4329       putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(8)));
   4330       DIP("%s {v%u.%s}, [%s], #8\n",  isLD ? "ld1" : "st1",
   4331           vT, name, nameIReg64orSP(rN));
   4332       return True;
   4333    }
   4334 
   4335    /* ---------- LD2/ST2 (multiple structures, post index) ---------- */
   4336    /* Only a very few cases. */
   4337    /* 31        23             11 9 4
   4338       0100 1100 1101 1111 1000 11 n t  LD2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32
   4339       0100 1100 1001 1111 1000 11 n t  ST2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32
   4340       0100 1100 1101 1111 1000 10 n t  LD2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32
   4341       0100 1100 1001 1111 1000 10 n t  ST2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32
   4342    */
   4343    if (   (insn & 0xFFFFFC00) == 0x4CDF8C00 // LD2 .2d
   4344        || (insn & 0xFFFFFC00) == 0x4C9F8C00 // ST2 .2d
   4345        || (insn & 0xFFFFFC00) == 0x4CDF8800 // LD2 .4s
   4346        || (insn & 0xFFFFFC00) == 0x4C9F8800 // ST2 .4s
   4347       ) {
   4348       Bool   isLD = INSN(22,22) == 1;
   4349       UInt   rN   = INSN(9,5);
   4350       UInt   vT   = INSN(4,0);
   4351       IRTemp tEA  = newTemp(Ity_I64);
   4352       UInt   sz   = INSN(11,10);
   4353       const HChar* name = "??";
   4354       assign(tEA, getIReg64orSP(rN));
   4355       if (rN == 31) { /* FIXME generate stack alignment check */ }
   4356       IRExpr* tEA_0  = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
   4357       IRExpr* tEA_8  = binop(Iop_Add64, mkexpr(tEA), mkU64(8));
   4358       IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
   4359       IRExpr* tEA_24 = binop(Iop_Add64, mkexpr(tEA), mkU64(24));
   4360       if (sz == BITS2(1,1)) {
   4361          name = "2d";
   4362          if (isLD) {
   4363             putQRegLane((vT+0) % 32, 0, loadLE(Ity_I64, tEA_0));
   4364             putQRegLane((vT+0) % 32, 1, loadLE(Ity_I64, tEA_16));
   4365             putQRegLane((vT+1) % 32, 0, loadLE(Ity_I64, tEA_8));
   4366             putQRegLane((vT+1) % 32, 1, loadLE(Ity_I64, tEA_24));
   4367          } else {
   4368             storeLE(tEA_0,  getQRegLane((vT+0) % 32, 0, Ity_I64));
   4369             storeLE(tEA_16, getQRegLane((vT+0) % 32, 1, Ity_I64));
   4370             storeLE(tEA_8,  getQRegLane((vT+1) % 32, 0, Ity_I64));
   4371             storeLE(tEA_24, getQRegLane((vT+1) % 32, 1, Ity_I64));
   4372          }
   4373       }
   4374       else if (sz == BITS2(1,0)) {
   4375          /* Uh, this is ugly.  TODO: better. */
   4376          name = "4s";
   4377          IRExpr* tEA_4  = binop(Iop_Add64, mkexpr(tEA), mkU64(4));
   4378          IRExpr* tEA_12 = binop(Iop_Add64, mkexpr(tEA), mkU64(12));
   4379          IRExpr* tEA_20 = binop(Iop_Add64, mkexpr(tEA), mkU64(20));
   4380          IRExpr* tEA_28 = binop(Iop_Add64, mkexpr(tEA), mkU64(28));
   4381          if (isLD) {
   4382             putQRegLane((vT+0) % 32, 0, loadLE(Ity_I32, tEA_0));
   4383             putQRegLane((vT+0) % 32, 1, loadLE(Ity_I32, tEA_8));
   4384             putQRegLane((vT+0) % 32, 2, loadLE(Ity_I32, tEA_16));
   4385             putQRegLane((vT+0) % 32, 3, loadLE(Ity_I32, tEA_24));
   4386             putQRegLane((vT+1) % 32, 0, loadLE(Ity_I32, tEA_4));
   4387             putQRegLane((vT+1) % 32, 1, loadLE(Ity_I32, tEA_12));
   4388             putQRegLane((vT+1) % 32, 2, loadLE(Ity_I32, tEA_20));
   4389             putQRegLane((vT+1) % 32, 3, loadLE(Ity_I32, tEA_28));
   4390          } else {
   4391             storeLE(tEA_0,  getQRegLane((vT+0) % 32, 0, Ity_I32));
   4392             storeLE(tEA_8,  getQRegLane((vT+0) % 32, 1, Ity_I32));
   4393             storeLE(tEA_16, getQRegLane((vT+0) % 32, 2, Ity_I32));
   4394             storeLE(tEA_24, getQRegLane((vT+0) % 32, 3, Ity_I32));
   4395             storeLE(tEA_4,  getQRegLane((vT+1) % 32, 0, Ity_I32));
   4396             storeLE(tEA_12, getQRegLane((vT+1) % 32, 1, Ity_I32));
   4397             storeLE(tEA_20, getQRegLane((vT+1) % 32, 2, Ity_I32));
   4398             storeLE(tEA_28, getQRegLane((vT+1) % 32, 3, Ity_I32));
   4399          }
   4400       }
   4401       else {
   4402          vassert(0); // Can't happen.
   4403       }
   4404       putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(32)));
   4405       DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld2" : "st2",
   4406           (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
   4407       return True;
   4408    }
   4409 
   4410    /* ---------- LD1/ST1 (multiple structures, no offset) ---------- */
   4411    /* Only a very few cases. */
   4412    /* 31        23
   4413       0100 1100 0100 0000 1010 00 n t  LD1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP]
   4414       0100 1100 0000 0000 1010 00 n t  ST1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP]
   4415    */
   4416    if (   (insn & 0xFFFFFC00) == 0x4C40A000 // LD1
   4417        || (insn & 0xFFFFFC00) == 0x4C00A000 // ST1
   4418       ) {
   4419       Bool   isLD = INSN(22,22) == 1;
   4420       UInt   rN   = INSN(9,5);
   4421       UInt   vT   = INSN(4,0);
   4422       IRTemp tEA  = newTemp(Ity_I64);
   4423       const HChar* name = "16b";
   4424       assign(tEA, getIReg64orSP(rN));
   4425       if (rN == 31) { /* FIXME generate stack alignment check */ }
   4426       IRExpr* tEA_0  = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
   4427       IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
   4428       if (isLD) {
   4429          putQReg128((vT+0) % 32, loadLE(Ity_V128, tEA_0));
   4430          putQReg128((vT+1) % 32, loadLE(Ity_V128, tEA_16));
   4431       } else {
   4432          storeLE(tEA_0,  getQReg128((vT+0) % 32));
   4433          storeLE(tEA_16, getQReg128((vT+1) % 32));
   4434       }
   4435       DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld1" : "st1",
   4436           (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
   4437       return True;
   4438    }
   4439 
   4440    /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
   4441    /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
   4442    /* 31 29     23  20      14    9 4
   4443       sz 001000 010 11111 0 11111 n t   LDX{R,RH,RB}  Rt, [Xn|SP]
   4444       sz 001000 010 11111 1 11111 n t   LDAX{R,RH,RB} Rt, [Xn|SP]
   4445       sz 001000 000 s     0 11111 n t   STX{R,RH,RB}  Ws, Rt, [Xn|SP]
   4446       sz 001000 000 s     1 11111 n t   STLX{R,RH,RB} Ws, Rt, [Xn|SP]
   4447    */
   4448    if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
   4449        && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
   4450        && INSN(14,10) == BITS5(1,1,1,1,1)) {
   4451       UInt szBlg2     = INSN(31,30);
   4452       Bool isLD       = INSN(22,22) == 1;
   4453       Bool isAcqOrRel = INSN(15,15) == 1;
   4454       UInt ss         = INSN(20,16);
   4455       UInt nn         = INSN(9,5);
   4456       UInt tt         = INSN(4,0);
   4457 
   4458       vassert(szBlg2 < 4);
   4459       UInt   szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
   4460       IRType ty  = integerIRTypeOfSize(szB);
   4461       const HChar* suffix[4] = { "rb", "rh", "r", "r" };
   4462 
   4463       IRTemp ea = newTemp(Ity_I64);
   4464       assign(ea, getIReg64orSP(nn));
   4465       /* FIXME generate check that ea is szB-aligned */
   4466 
   4467       if (isLD && ss == BITS5(1,1,1,1,1)) {
   4468          IRTemp res = newTemp(ty);
   4469          stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
   4470          putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
   4471          if (isAcqOrRel) {
   4472             stmt(IRStmt_MBE(Imbe_Fence));
   4473          }
   4474          DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
   4475              nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
   4476          return True;
   4477       }
   4478       if (!isLD) {
   4479          if (isAcqOrRel) {
   4480             stmt(IRStmt_MBE(Imbe_Fence));
   4481          }
   4482          IRTemp  res  = newTemp(Ity_I1);
   4483          IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
   4484          stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
   4485          /* IR semantics: res is 1 if store succeeds, 0 if it fails.
   4486             Need to set rS to 1 on failure, 0 on success. */
   4487          putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
   4488                                             mkU64(1)));
   4489          DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
   4490              nameIRegOrZR(False, ss),
   4491              nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
   4492          return True;
   4493       }
   4494       /* else fall through */
   4495    }
   4496 
   4497    /* ------------------ LDA{R,RH,RB} ------------------ */
   4498    /* ------------------ STL{R,RH,RB} ------------------ */
   4499    /* 31 29     23  20      14    9 4
   4500       sz 001000 110 11111 1 11111 n t   LDAR<sz> Rt, [Xn|SP]
   4501       sz 001000 100 11111 1 11111 n t   STLR<sz> Rt, [Xn|SP]
   4502    */
   4503    if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
   4504        && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
   4505       UInt szBlg2 = INSN(31,30);
   4506       Bool isLD   = INSN(22,22) == 1;
   4507       UInt nn     = INSN(9,5);
   4508       UInt tt     = INSN(4,0);
   4509 
   4510       vassert(szBlg2 < 4);
   4511       UInt   szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
   4512       IRType ty  = integerIRTypeOfSize(szB);
   4513       const HChar* suffix[4] = { "rb", "rh", "r", "r" };
   4514 
   4515       IRTemp ea = newTemp(Ity_I64);
   4516       assign(ea, getIReg64orSP(nn));
   4517       /* FIXME generate check that ea is szB-aligned */
   4518 
   4519       if (isLD) {
   4520          IRTemp res = newTemp(ty);
   4521          assign(res, loadLE(ty, mkexpr(ea)));
   4522          putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
   4523          stmt(IRStmt_MBE(Imbe_Fence));
   4524          DIP("lda%s %s, [%s]\n", suffix[szBlg2],
   4525              nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
   4526       } else {
   4527          stmt(IRStmt_MBE(Imbe_Fence));
   4528          IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
   4529          storeLE(mkexpr(ea), data);
   4530          DIP("stl%s %s, [%s]\n", suffix[szBlg2],
   4531              nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
   4532       }
   4533       return True;
   4534    }
   4535 
   4536    /* ------------------ PRFM (immediate) ------------------ */
   4537    /* 31 29        21    9 4
   4538       11 11100110  imm12 n t PRFM <option>, [Xn|SP{, #pimm}]
   4539    */
   4540 
   4541    if (INSN(31, 22) == BITS10(1,1,1,1,1,0,0,1,1,0)) {
   4542       /* TODO: decode */
   4543       DIP("prfm ??? (imm)");
   4544       return True;
   4545    }
   4546 
   4547    vex_printf("ARM64 front end: load_store\n");
   4548    return False;
   4549 #  undef INSN
   4550 }
   4551 
   4552 
   4553 /*------------------------------------------------------------*/
   4554 /*--- Control flow and misc instructions                   ---*/
   4555 /*------------------------------------------------------------*/
   4556 
   4557 static
   4558 Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn,
   4559                           VexArchInfo* archinfo)
   4560 {
   4561 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   4562 
   4563    /* ---------------------- B cond ----------------------- */
   4564    /* 31        24    4 3
   4565       0101010 0 imm19 0 cond */
   4566    if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
   4567       UInt  cond   = INSN(3,0);
   4568       ULong uimm64 = INSN(23,5) << 2;
   4569       Long  simm64 = (Long)sx_to_64(uimm64, 21);
   4570       vassert(dres->whatNext    == Dis_Continue);
   4571       vassert(dres->len         == 4);
   4572       vassert(dres->continueAt  == 0);
   4573       vassert(dres->jk_StopHere == Ijk_INVALID);
   4574       stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
   4575                         Ijk_Boring,
   4576                         IRConst_U64(guest_PC_curr_instr + simm64),
   4577                         OFFB_PC) );
   4578       putPC(mkU64(guest_PC_curr_instr + 4));
   4579       dres->whatNext    = Dis_StopHere;
   4580       dres->jk_StopHere = Ijk_Boring;
   4581       DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
   4582       return True;
   4583    }
   4584 
   4585    /* -------------------- B{L} uncond -------------------- */
   4586    if (INSN(30,26) == BITS5(0,0,1,0,1)) {
   4587       /* 000101 imm26  B  (PC + sxTo64(imm26 << 2))
   4588          100101 imm26  B  (PC + sxTo64(imm26 << 2))
   4589       */
   4590       UInt  bLink  = INSN(31,31);
   4591       ULong uimm64 = INSN(25,0) << 2;
   4592       Long  simm64 = (Long)sx_to_64(uimm64, 28);
   4593       if (bLink) {
   4594          putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
   4595       }
   4596       putPC(mkU64(guest_PC_curr_instr + simm64));
   4597       dres->whatNext = Dis_StopHere;
   4598       dres->jk_StopHere = Ijk_Call;
   4599       DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
   4600                           guest_PC_curr_instr + simm64);
   4601       return True;
   4602    }
   4603 
   4604    /* --------------------- B{L} reg --------------------- */
   4605    /* 31      24 22 20    15     9  4
   4606       1101011 00 10 11111 000000 nn 00000  RET  Rn
   4607       1101011 00 01 11111 000000 nn 00000  CALL Rn
   4608       1101011 00 00 11111 000000 nn 00000  JMP  Rn
   4609    */
   4610    if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
   4611        && INSN(20,16) == BITS5(1,1,1,1,1)
   4612        && INSN(15,10) == BITS6(0,0,0,0,0,0)
   4613        && INSN(4,0) == BITS5(0,0,0,0,0)) {
   4614       UInt branch_type = INSN(22,21);
   4615       UInt nn          = INSN(9,5);
   4616       if (branch_type == BITS2(1,0) /* RET */) {
   4617          putPC(getIReg64orZR(nn));
   4618          dres->whatNext = Dis_StopHere;
   4619          dres->jk_StopHere = Ijk_Ret;
   4620          DIP("ret %s\n", nameIReg64orZR(nn));
   4621          return True;
   4622       }
   4623       if (branch_type == BITS2(0,1) /* CALL */) {
   4624          IRTemp dst = newTemp(Ity_I64);
   4625          assign(dst, getIReg64orZR(nn));
   4626          putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
   4627          putPC(mkexpr(dst));
   4628          dres->whatNext = Dis_StopHere;
   4629          dres->jk_StopHere = Ijk_Call;
   4630          DIP("blr %s\n", nameIReg64orZR(nn));
   4631          return True;
   4632       }
   4633       if (branch_type == BITS2(0,0) /* JMP */) {
   4634          putPC(getIReg64orZR(nn));
   4635          dres->whatNext = Dis_StopHere;
   4636          dres->jk_StopHere = Ijk_Boring;
   4637          DIP("jmp %s\n", nameIReg64orZR(nn));
   4638          return True;
   4639       }
   4640    }
   4641 
   4642    /* -------------------- CB{N}Z -------------------- */
   4643    /* sf 011 010 1 imm19 Rt   CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
   4644       sf 011 010 0 imm19 Rt   CBZ  Xt|Wt, (PC + sxTo64(imm19 << 2))
   4645    */
   4646    if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
   4647       Bool    is64   = INSN(31,31) == 1;
   4648       Bool    bIfZ   = INSN(24,24) == 0;
   4649       ULong   uimm64 = INSN(23,5) << 2;
   4650       UInt    rT     = INSN(4,0);
   4651       Long    simm64 = (Long)sx_to_64(uimm64, 21);
   4652       IRExpr* cond   = NULL;
   4653       if (is64) {
   4654          cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
   4655                       getIReg64orZR(rT), mkU64(0));
   4656       } else {
   4657          cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
   4658                       getIReg32orZR(rT), mkU32(0));
   4659       }
   4660       stmt( IRStmt_Exit(cond,
   4661                         Ijk_Boring,
   4662                         IRConst_U64(guest_PC_curr_instr + simm64),
   4663                         OFFB_PC) );
   4664       putPC(mkU64(guest_PC_curr_instr + 4));
   4665       dres->whatNext    = Dis_StopHere;
   4666       dres->jk_StopHere = Ijk_Boring;
   4667       DIP("cb%sz %s, 0x%llx\n",
   4668           bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
   4669           guest_PC_curr_instr + simm64);
   4670       return True;
   4671    }
   4672 
   4673    /* -------------------- TB{N}Z -------------------- */
   4674    /* 31 30      24 23  18  5 4
   4675       b5 011 011 1  b40 imm14 t  TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
   4676       b5 011 011 0  b40 imm14 t  TBZ  Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
   4677    */
   4678    if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
   4679       UInt    b5     = INSN(31,31);
   4680       Bool    bIfZ   = INSN(24,24) == 0;
   4681       UInt    b40    = INSN(23,19);
   4682       UInt    imm14  = INSN(18,5);
   4683       UInt    tt     = INSN(4,0);
   4684       UInt    bitNo  = (b5 << 5) | b40;
   4685       ULong   uimm64 = imm14 << 2;
   4686       Long    simm64 = sx_to_64(uimm64, 16);
   4687       IRExpr* cond
   4688          = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
   4689                  binop(Iop_And64,
   4690                        binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
   4691                        mkU64(1)),
   4692                  mkU64(0));
   4693       stmt( IRStmt_Exit(cond,
   4694                         Ijk_Boring,
   4695                         IRConst_U64(guest_PC_curr_instr + simm64),
   4696                         OFFB_PC) );
   4697       putPC(mkU64(guest_PC_curr_instr + 4));
   4698       dres->whatNext    = Dis_StopHere;
   4699       dres->jk_StopHere = Ijk_Boring;
   4700       DIP("tb%sz %s, #%u, 0x%llx\n",
   4701           bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
   4702           guest_PC_curr_instr + simm64);
   4703       return True;
   4704    }
   4705 
   4706    /* -------------------- SVC -------------------- */
   4707    /* 11010100 000 imm16 000 01
   4708       Don't bother with anything except the imm16==0 case.
   4709    */
   4710    if (INSN(31,0) == 0xD4000001) {
   4711       putPC(mkU64(guest_PC_curr_instr + 4));
   4712       dres->whatNext    = Dis_StopHere;
   4713       dres->jk_StopHere = Ijk_Sys_syscall;
   4714       DIP("svc #0\n");
   4715       return True;
   4716    }
   4717 
   4718    /* ------------------ M{SR,RS} ------------------ */
   4719    /* Only handles the case where the system register is TPIDR_EL0.
   4720       0xD51BD0 010 Rt   MSR tpidr_el0, rT
   4721       0xD53BD0 010 Rt   MRS rT, tpidr_el0
   4722    */
   4723    if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
   4724        || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
   4725       Bool toSys = INSN(21,21) == 0;
   4726       UInt tt    = INSN(4,0);
   4727       if (toSys) {
   4728          stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
   4729          DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
   4730       } else {
   4731          putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
   4732          DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
   4733       }
   4734       return True;
   4735    }
   4736    /* Cases for FPCR
   4737       0xD51B44 000 Rt  MSR fpcr, rT
   4738       0xD53B44 000 Rt  MSR rT, fpcr
   4739    */
   4740    if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
   4741        || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
   4742       Bool toSys = INSN(21,21) == 0;
   4743       UInt tt    = INSN(4,0);
   4744       if (toSys) {
   4745          stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
   4746          DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
   4747       } else {
   4748          putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
   4749          DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
   4750       }
   4751       return True;
   4752    }
   4753    /* Cases for FPSR
   4754       0xD51B44 001 Rt  MSR fpsr, rT
   4755       0xD53B44 001 Rt  MSR rT, fpsr
   4756    */
   4757    if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
   4758        || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
   4759       Bool toSys = INSN(21,21) == 0;
   4760       UInt tt    = INSN(4,0);
   4761       if (toSys) {
   4762          stmt( IRStmt_Put( OFFB_FPSR, getIReg32orZR(tt)) );
   4763          DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
   4764       } else {
   4765          putIReg32orZR(tt, IRExpr_Get(OFFB_FPSR, Ity_I32));
   4766          DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
   4767       }
   4768       return True;
   4769    }
   4770    /* Cases for NZCV
   4771       D51B42 000 Rt  MSR nzcv, rT
   4772       D53B42 000 Rt  MRS rT, nzcv
   4773    */
   4774    if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
   4775        || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
   4776       Bool  toSys = INSN(21,21) == 0;
   4777       UInt  tt    = INSN(4,0);
   4778       if (toSys) {
   4779          IRTemp t = newTemp(Ity_I64);
   4780          assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
   4781          setFlags_COPY(t);
   4782          DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
   4783       } else {
   4784          IRTemp res = newTemp(Ity_I64);
   4785          assign(res, mk_arm64g_calculate_flags_nzcv());
   4786          putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
   4787          DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
   4788       }
   4789       return True;
   4790    }
   4791    /* Cases for DCZID_EL0
   4792       Don't support arbitrary reads and writes to this register.  Just
   4793       return the value 16, which indicates that the DC ZVA instruction
   4794       is not permitted, so we don't have to emulate it.
   4795       D5 3B 00 111 Rt  MRS rT, dczid_el0
   4796    */
   4797    if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
   4798       UInt tt = INSN(4,0);
   4799       putIReg64orZR(tt, mkU64(1<<4));
   4800       DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt));
   4801       return True;
   4802    }
   4803    /* Cases for CTR_EL0
   4804       We just handle reads, and make up a value from the D and I line
   4805       sizes in the VexArchInfo we are given, and patch in the following
   4806       fields that the Foundation model gives ("natively"):
   4807       CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
   4808       D5 3B 00 001 Rt  MRS rT, dczid_el0
   4809    */
   4810    if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
   4811       UInt tt = INSN(4,0);
   4812       /* Need to generate a value from dMinLine_lg2_szB and
   4813          dMinLine_lg2_szB.  The value in the register is in 32-bit
   4814          units, so need to subtract 2 from the values in the
   4815          VexArchInfo.  We can assume that the values here are valid --
   4816          disInstr_ARM64 checks them -- so there's no need to deal with
   4817          out-of-range cases. */
   4818       vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
   4819               && archinfo->arm64_dMinLine_lg2_szB <= 17
   4820               && archinfo->arm64_iMinLine_lg2_szB >= 2
   4821               && archinfo->arm64_iMinLine_lg2_szB <= 17);
   4822       UInt val
   4823          = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16)
   4824                       | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0);
   4825       putIReg64orZR(tt, mkU64(val));
   4826       DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt));
   4827       return True;
   4828    }
   4829 
   4830    /* ------------------ IC_IVAU ------------------ */
   4831    /* D5 0B 75 001 Rt  ic ivau, rT
   4832    */
   4833    if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
   4834       /* We will always be provided with a valid iMinLine value. */
   4835       vassert(archinfo->arm64_iMinLine_lg2_szB >= 2
   4836               && archinfo->arm64_iMinLine_lg2_szB <= 17);
   4837       /* Round the requested address, in rT, down to the start of the
   4838          containing block. */
   4839       UInt   tt      = INSN(4,0);
   4840       ULong  lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB;
   4841       IRTemp addr    = newTemp(Ity_I64);
   4842       assign( addr, binop( Iop_And64,
   4843                            getIReg64orZR(tt),
   4844                            mkU64(~(lineszB - 1))) );
   4845       /* Set the invalidation range, request exit-and-invalidate, with
   4846          continuation at the next instruction. */
   4847       stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
   4848       stmt(IRStmt_Put(OFFB_CMLEN,   mkU64(lineszB)));
   4849       /* be paranoid ... */
   4850       stmt( IRStmt_MBE(Imbe_Fence) );
   4851       putPC(mkU64( guest_PC_curr_instr + 4 ));
   4852       dres->whatNext    = Dis_StopHere;
   4853       dres->jk_StopHere = Ijk_InvalICache;
   4854       DIP("ic ivau, %s\n", nameIReg64orZR(tt));
   4855       return True;
   4856    }
   4857 
   4858    /* ------------------ DC_CVAU ------------------ */
   4859    /* D5 0B 7B 001 Rt  dc cvau, rT
   4860    */
   4861    if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) {
   4862       /* Exactly the same scheme as for IC IVAU, except we observe the
   4863          dMinLine size, and request an Ijk_FlushDCache instead of
   4864          Ijk_InvalICache. */
   4865       /* We will always be provided with a valid dMinLine value. */
   4866       vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
   4867               && archinfo->arm64_dMinLine_lg2_szB <= 17);
   4868       /* Round the requested address, in rT, down to the start of the
   4869          containing block. */
   4870       UInt   tt      = INSN(4,0);
   4871       ULong  lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB;
   4872       IRTemp addr    = newTemp(Ity_I64);
   4873       assign( addr, binop( Iop_And64,
   4874                            getIReg64orZR(tt),
   4875                            mkU64(~(lineszB - 1))) );
   4876       /* Set the flush range, request exit-and-flush, with
   4877          continuation at the next instruction. */
   4878       stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
   4879       stmt(IRStmt_Put(OFFB_CMLEN,   mkU64(lineszB)));
   4880       /* be paranoid ... */
   4881       stmt( IRStmt_MBE(Imbe_Fence) );
   4882       putPC(mkU64( guest_PC_curr_instr + 4 ));
   4883       dres->whatNext    = Dis_StopHere;
   4884       dres->jk_StopHere = Ijk_FlushDCache;
   4885       DIP("dc cvau, %s\n", nameIReg64orZR(tt));
   4886       return True;
   4887    }
   4888 
   4889    /* ------------------ ISB, DMB, DSB ------------------ */
   4890    if (INSN(31,0) == 0xD5033FDF) {
   4891       stmt(IRStmt_MBE(Imbe_Fence));
   4892       DIP("isb\n");
   4893       return True;
   4894    }
   4895    if (INSN(31,0) == 0xD5033BBF) {
   4896       stmt(IRStmt_MBE(Imbe_Fence));
   4897       DIP("dmb ish\n");
   4898       return True;
   4899    }
   4900    if (INSN(31,0) == 0xD5033ABF) {
   4901       stmt(IRStmt_MBE(Imbe_Fence));
   4902       DIP("dmb ishst\n");
   4903       return True;
   4904    }
   4905    if (INSN(31,0) == 0xD50339BF) {
   4906       stmt(IRStmt_MBE(Imbe_Fence));
   4907       DIP("dmb ishld\n");
   4908       return True;
   4909    }
   4910    if (INSN(31,0) == 0xD5033B9F) {
   4911       stmt(IRStmt_MBE(Imbe_Fence));
   4912       DIP("dsb ish\n");
   4913       return True;
   4914    }
   4915    if (INSN(31,0) == 0xD5033F9F) {
   4916       stmt(IRStmt_MBE(Imbe_Fence));
   4917       DIP("dsb sy\n");
   4918       return True;
   4919    }
   4920 
   4921    /* -------------------- NOP -------------------- */
   4922    if (INSN(31,0) == 0xD503201F) {
   4923       DIP("nop\n");
   4924       return True;
   4925    }
   4926 
   4927   //fail:
   4928    vex_printf("ARM64 front end: branch_etc\n");
   4929    return False;
   4930 #  undef INSN
   4931 }
   4932 
   4933 
   4934 /*------------------------------------------------------------*/
   4935 /*--- SIMD and FP instructions                             ---*/
   4936 /*------------------------------------------------------------*/
   4937 
   4938 /* begin FIXME -- rm temp scaffolding */
   4939 static IRExpr* mk_CatEvenLanes64x2 ( IRTemp, IRTemp );
   4940 static IRExpr* mk_CatOddLanes64x2  ( IRTemp, IRTemp );
   4941 
   4942 static IRExpr* mk_CatEvenLanes32x4 ( IRTemp, IRTemp );
   4943 static IRExpr* mk_CatOddLanes32x4  ( IRTemp, IRTemp );
   4944 static IRExpr* mk_InterleaveLO32x4 ( IRTemp, IRTemp );
   4945 static IRExpr* mk_InterleaveHI32x4 ( IRTemp, IRTemp );
   4946 
   4947 static IRExpr* mk_CatEvenLanes16x8 ( IRTemp, IRTemp );
   4948 static IRExpr* mk_CatOddLanes16x8  ( IRTemp, IRTemp );
   4949 static IRExpr* mk_InterleaveLO16x8 ( IRTemp, IRTemp );
   4950 static IRExpr* mk_InterleaveHI16x8 ( IRTemp, IRTemp );
   4951 
   4952 static IRExpr* mk_CatEvenLanes8x16 ( IRTemp, IRTemp );
   4953 static IRExpr* mk_CatOddLanes8x16  ( IRTemp, IRTemp );
   4954 static IRExpr* mk_InterleaveLO8x16 ( IRTemp, IRTemp );
   4955 static IRExpr* mk_InterleaveHI8x16 ( IRTemp, IRTemp );
   4956 /* end FIXME -- rm temp scaffolding */
   4957 
   4958 /* Generate N copies of |bit| in the bottom of a ULong. */
   4959 static ULong Replicate ( ULong bit, Int N )
   4960 {
   4961    vassert(bit <= 1 && N >= 1 && N < 64);
   4962    if (bit == 0) {
   4963       return 0;
   4964     } else {
   4965       /* Careful.  This won't work for N == 64. */
   4966       return (1ULL << N) - 1;
   4967    }
   4968 }
   4969 
   4970 static ULong Replicate32x2 ( ULong bits32 )
   4971 {
   4972    vassert(0 == (bits32 & ~0xFFFFFFFFULL));
   4973    return (bits32 << 32) | bits32;
   4974 }
   4975 
   4976 static ULong Replicate16x4 ( ULong bits16 )
   4977 {
   4978    vassert(0 == (bits16 & ~0xFFFFULL));
   4979    return Replicate32x2((bits16 << 16) | bits16);
   4980 }
   4981 
   4982 static ULong Replicate8x8 ( ULong bits8 )
   4983 {
   4984    vassert(0 == (bits8 & ~0xFFULL));
   4985    return Replicate16x4((bits8 << 8) | bits8);
   4986 }
   4987 
   4988 /* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
   4989    |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
   4990    is 64.  In the former case, the upper 32 bits of the returned value
   4991    are guaranteed to be zero. */
   4992 static ULong VFPExpandImm ( ULong imm8, Int N )
   4993 {
   4994    vassert(imm8 <= 0xFF);
   4995    vassert(N == 32 || N == 64);
   4996    Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
   4997    Int F = N - E - 1;
   4998    ULong imm8_6 = (imm8 >> 6) & 1;
   4999    /* sign: 1 bit */
   5000    /* exp:  E bits */
   5001    /* frac: F bits */
   5002    ULong sign = (imm8 >> 7) & 1;
   5003    ULong exp  = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1);
   5004    ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6);
   5005    vassert(sign < (1ULL << 1));
   5006    vassert(exp  < (1ULL << E));
   5007    vassert(frac < (1ULL << F));
   5008    vassert(1 + E + F == N);
   5009    ULong res = (sign << (E+F)) | (exp << F) | frac;
   5010    return res;
   5011 }
   5012 
   5013 /* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
   5014    This might fail, as indicated by the returned Bool.  Page 2530 of
   5015    the manual. */
   5016 static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res,
   5017                                UInt op, UInt cmode, UInt imm8 )
   5018 {
   5019    vassert(op <= 1);
   5020    vassert(cmode <= 15);
   5021    vassert(imm8 <= 255);
   5022 
   5023    *res = 0; /* will overwrite iff returning True */
   5024 
   5025    ULong imm64    = 0;
   5026    Bool  testimm8 = False;
   5027 
   5028    switch (cmode >> 1) {
   5029       case 0:
   5030          testimm8 = False; imm64 = Replicate32x2(imm8); break;
   5031       case 1:
   5032          testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break;
   5033       case 2:
   5034          testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break;
   5035       case 3:
   5036          testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break;
   5037       case 4:
   5038           testimm8 = False; imm64 = Replicate16x4(imm8); break;
   5039       case 5:
   5040           testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break;
   5041       case 6:
   5042           testimm8 = True;
   5043           if ((cmode & 1) == 0)
   5044               imm64 = Replicate32x2((imm8 << 8) | 0xFF);
   5045           else
   5046               imm64 = Replicate32x2((imm8 << 16) | 0xFFFF);
   5047           break;
   5048       case 7:
   5049          testimm8 = False;
   5050          if ((cmode & 1) == 0 && op == 0)
   5051              imm64 = Replicate8x8(imm8);
   5052          if ((cmode & 1) == 0 && op == 1) {
   5053              imm64 = 0;   imm64 |= (imm8 & 0x80) ? 0xFF : 0x00;
   5054              imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00;
   5055              imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00;
   5056              imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00;
   5057              imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00;
   5058              imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00;
   5059              imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00;
   5060              imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00;
   5061          }
   5062          if ((cmode & 1) == 1 && op == 0) {
   5063             ULong imm8_7  = (imm8 >> 7) & 1;
   5064             ULong imm8_6  = (imm8 >> 6) & 1;
   5065             ULong imm8_50 = imm8 & 63;
   5066             ULong imm32 = (imm8_7                 << (1 + 5 + 6 + 19))
   5067                           | ((imm8_6 ^ 1)         << (5 + 6 + 19))
   5068                           | (Replicate(imm8_6, 5) << (6 + 19))
   5069                           | (imm8_50              << 19);
   5070             imm64 = Replicate32x2(imm32);
   5071          }
   5072          if ((cmode & 1) == 1 && op == 1) {
   5073             // imm64 = imm8<7>:NOT(imm8<6>)
   5074             //                :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
   5075             ULong imm8_7  = (imm8 >> 7) & 1;
   5076             ULong imm8_6  = (imm8 >> 6) & 1;
   5077             ULong imm8_50 = imm8 & 63;
   5078             imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62)
   5079                     | (Replicate(imm8_6, 8) << 54)
   5080                     | (imm8_50 << 48);
   5081          }
   5082          break;
   5083       default:
   5084         vassert(0);
   5085    }
   5086 
   5087    if (testimm8 && imm8 == 0)
   5088       return False;
   5089 
   5090    *res = imm64;
   5091    return True;
   5092 }
   5093 
   5094 
   5095 /* Help a bit for decoding laneage for vector operations that can be
   5096    of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
   5097    and SZ bits, typically for vector floating point. */
   5098 static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI,  /*OUT*/IRType* tyF,
   5099                                /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper,
   5100                                /*OUT*/const HChar** arrSpec,
   5101                                Bool bitQ, Bool bitSZ )
   5102 {
   5103    vassert(bitQ == True || bitQ == False);
   5104    vassert(bitSZ == True || bitSZ == False);
   5105    if (bitQ && bitSZ) { // 2x64
   5106       if (tyI)       *tyI       = Ity_I64;
   5107       if (tyF)       *tyF       = Ity_F64;
   5108       if (nLanes)    *nLanes    = 2;
   5109       if (zeroUpper) *zeroUpper = False;
   5110       if (arrSpec)   *arrSpec   = "2d";
   5111       return True;
   5112    }
   5113    if (bitQ && !bitSZ) { // 4x32
   5114       if (tyI)       *tyI       = Ity_I32;
   5115       if (tyF)       *tyF       = Ity_F32;
   5116       if (nLanes)    *nLanes    = 4;
   5117       if (zeroUpper) *zeroUpper = False;
   5118       if (arrSpec)   *arrSpec   = "4s";
   5119       return True;
   5120    }
   5121    if (!bitQ && !bitSZ) { // 2x32
   5122       if (tyI)       *tyI       = Ity_I32;
   5123       if (tyF)       *tyF       = Ity_F32;
   5124       if (nLanes)    *nLanes    = 2;
   5125       if (zeroUpper) *zeroUpper = True;
   5126       if (arrSpec)   *arrSpec   = "2s";
   5127       return True;
   5128    }
   5129    // Else impliedly 1x64, which isn't allowed.
   5130    return False;
   5131 }
   5132 
   5133 /* Helper for decoding laneage for simple vector operations,
   5134    eg integer add. */
   5135 static Bool getLaneInfo_SIMPLE ( /*OUT*/Bool* zeroUpper,
   5136                                  /*OUT*/const HChar** arrSpec,
   5137                                  Bool bitQ, UInt szBlg2 )
   5138 {
   5139    vassert(bitQ == True || bitQ == False);
   5140    vassert(szBlg2 < 4);
   5141    Bool zu = False;
   5142    const HChar* as = NULL;
   5143    switch ((szBlg2 << 1) | (bitQ ? 1 : 0)) {
   5144       case 0: zu = True;  as = "8b";  break;
   5145       case 1: zu = False; as = "16b"; break;
   5146       case 2: zu = True;  as = "4h";  break;
   5147       case 3: zu = False; as = "8h";  break;
   5148       case 4: zu = True;  as = "2s";  break;
   5149       case 5: zu = False; as = "4s";  break;
   5150       case 6: return False; // impliedly 1x64
   5151       case 7: zu = False; as = "2d";  break;
   5152       default: vassert(0);
   5153    }
   5154    vassert(as);
   5155    if (arrSpec)   *arrSpec = as;
   5156    if (zeroUpper) *zeroUpper = zu;
   5157    return True;
   5158 }
   5159 
   5160 
   5161 /* Helper for decoding laneage for shift-style vector operations
   5162    that involve an immediate shift amount. */
   5163 static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2,
   5164                                     UInt immh, UInt immb )
   5165 {
   5166    vassert(immh < (1<<4));
   5167    vassert(immb < (1<<3));
   5168    UInt immhb = (immh << 3) | immb;
   5169    if (immh & 8) {
   5170       if (shift)  *shift  = 128 - immhb;
   5171       if (szBlg2) *szBlg2 = 3;
   5172       return True;
   5173    }
   5174    if (immh & 4) {
   5175       if (shift)  *shift  = 64 - immhb;
   5176       if (szBlg2) *szBlg2 = 2;
   5177       return True;
   5178    }
   5179    if (immh & 2) {
   5180       if (shift)  *shift  = 32 - immhb;
   5181       if (szBlg2) *szBlg2 = 1;
   5182       return True;
   5183    }
   5184    if (immh & 1) {
   5185       if (shift)  *shift  = 16 - immhb;
   5186       if (szBlg2) *szBlg2 = 0;
   5187       return True;
   5188    }
   5189    return False;
   5190 }
   5191 
   5192 
   5193 /* Generate IR to fold all lanes of the V128 value in 'src' as
   5194    characterised by the operator 'op', and return the result in the
   5195    bottom bits of a V128, with all other bits set to zero. */
   5196 static IRTemp math_MINMAXV ( IRTemp src, IROp op )
   5197 {
   5198    /* The basic idea is to use repeated applications of Iop_CatEven*
   5199       and Iop_CatOdd* operators to 'src' so as to clone each lane into
   5200       a complete vector.  Then fold all those vectors with 'op' and
   5201       zero out all but the least significant lane. */
   5202    switch (op) {
   5203       case Iop_Min8Sx16: case Iop_Min8Ux16:
   5204       case Iop_Max8Sx16: case Iop_Max8Ux16: {
   5205          /* NB: temp naming here is misleading -- the naming is for 8
   5206             lanes of 16 bit, whereas what is being operated on is 16
   5207             lanes of 8 bits. */
   5208          IRTemp x76543210 = src;
   5209          IRTemp x76547654 = newTemp(Ity_V128);
   5210          IRTemp x32103210 = newTemp(Ity_V128);
   5211          assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
   5212          assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
   5213          IRTemp x76767676 = newTemp(Ity_V128);
   5214          IRTemp x54545454 = newTemp(Ity_V128);
   5215          IRTemp x32323232 = newTemp(Ity_V128);
   5216          IRTemp x10101010 = newTemp(Ity_V128);
   5217          assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
   5218          assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
   5219          assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
   5220          assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
   5221          IRTemp x77777777 = newTemp(Ity_V128);
   5222          IRTemp x66666666 = newTemp(Ity_V128);
   5223          IRTemp x55555555 = newTemp(Ity_V128);
   5224          IRTemp x44444444 = newTemp(Ity_V128);
   5225          IRTemp x33333333 = newTemp(Ity_V128);
   5226          IRTemp x22222222 = newTemp(Ity_V128);
   5227          IRTemp x11111111 = newTemp(Ity_V128);
   5228          IRTemp x00000000 = newTemp(Ity_V128);
   5229          assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
   5230          assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
   5231          assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
   5232          assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
   5233          assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
   5234          assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
   5235          assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
   5236          assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
   5237          /* Naming not misleading after here. */
   5238          IRTemp xAllF = newTemp(Ity_V128);
   5239          IRTemp xAllE = newTemp(Ity_V128);
   5240          IRTemp xAllD = newTemp(Ity_V128);
   5241          IRTemp xAllC = newTemp(Ity_V128);
   5242          IRTemp xAllB = newTemp(Ity_V128);
   5243          IRTemp xAllA = newTemp(Ity_V128);
   5244          IRTemp xAll9 = newTemp(Ity_V128);
   5245          IRTemp xAll8 = newTemp(Ity_V128);
   5246          IRTemp xAll7 = newTemp(Ity_V128);
   5247          IRTemp xAll6 = newTemp(Ity_V128);
   5248          IRTemp xAll5 = newTemp(Ity_V128);
   5249          IRTemp xAll4 = newTemp(Ity_V128);
   5250          IRTemp xAll3 = newTemp(Ity_V128);
   5251          IRTemp xAll2 = newTemp(Ity_V128);
   5252          IRTemp xAll1 = newTemp(Ity_V128);
   5253          IRTemp xAll0 = newTemp(Ity_V128);
   5254          assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777));
   5255          assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777));
   5256          assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666));
   5257          assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666));
   5258          assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555));
   5259          assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555));
   5260          assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444));
   5261          assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444));
   5262          assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333));
   5263          assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333));
   5264          assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222));
   5265          assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222));
   5266          assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111));
   5267          assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111));
   5268          assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000));
   5269          assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000));
   5270          IRTemp maxFE = newTemp(Ity_V128);
   5271          IRTemp maxDC = newTemp(Ity_V128);
   5272          IRTemp maxBA = newTemp(Ity_V128);
   5273          IRTemp max98 = newTemp(Ity_V128);
   5274          IRTemp max76 = newTemp(Ity_V128);
   5275          IRTemp max54 = newTemp(Ity_V128);
   5276          IRTemp max32 = newTemp(Ity_V128);
   5277          IRTemp max10 = newTemp(Ity_V128);
   5278          assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE)));
   5279          assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC)));
   5280          assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA)));
   5281          assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8)));
   5282          assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6)));
   5283          assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4)));
   5284          assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2)));
   5285          assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0)));
   5286          IRTemp maxFEDC = newTemp(Ity_V128);
   5287          IRTemp maxBA98 = newTemp(Ity_V128);
   5288          IRTemp max7654 = newTemp(Ity_V128);
   5289          IRTemp max3210 = newTemp(Ity_V128);
   5290          assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC)));
   5291          assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98)));
   5292          assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
   5293          assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
   5294          IRTemp maxFEDCBA98 = newTemp(Ity_V128);
   5295          IRTemp max76543210 = newTemp(Ity_V128);
   5296          assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98)));
   5297          assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
   5298          IRTemp maxAllLanes = newTemp(Ity_V128);
   5299          assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98),
   5300                                        mkexpr(max76543210)));
   5301          IRTemp res = newTemp(Ity_V128);
   5302          assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes)));
   5303          return res;
   5304       }
   5305       case Iop_Min16Sx8: case Iop_Min16Ux8:
   5306       case Iop_Max16Sx8: case Iop_Max16Ux8: {
   5307          IRTemp x76543210 = src;
   5308          IRTemp x76547654 = newTemp(Ity_V128);
   5309          IRTemp x32103210 = newTemp(Ity_V128);
   5310          assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
   5311          assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
   5312          IRTemp x76767676 = newTemp(Ity_V128);
   5313          IRTemp x54545454 = newTemp(Ity_V128);
   5314          IRTemp x32323232 = newTemp(Ity_V128);
   5315          IRTemp x10101010 = newTemp(Ity_V128);
   5316          assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
   5317          assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
   5318          assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
   5319          assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
   5320          IRTemp x77777777 = newTemp(Ity_V128);
   5321          IRTemp x66666666 = newTemp(Ity_V128);
   5322          IRTemp x55555555 = newTemp(Ity_V128);
   5323          IRTemp x44444444 = newTemp(Ity_V128);
   5324          IRTemp x33333333 = newTemp(Ity_V128);
   5325          IRTemp x22222222 = newTemp(Ity_V128);
   5326          IRTemp x11111111 = newTemp(Ity_V128);
   5327          IRTemp x00000000 = newTemp(Ity_V128);
   5328          assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
   5329          assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
   5330          assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
   5331          assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
   5332          assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
   5333          assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
   5334          assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
   5335          assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
   5336          IRTemp max76 = newTemp(Ity_V128);
   5337          IRTemp max54 = newTemp(Ity_V128);
   5338          IRTemp max32 = newTemp(Ity_V128);
   5339          IRTemp max10 = newTemp(Ity_V128);
   5340          assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
   5341          assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
   5342          assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
   5343          assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
   5344          IRTemp max7654 = newTemp(Ity_V128);
   5345          IRTemp max3210 = newTemp(Ity_V128);
   5346          assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
   5347          assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
   5348          IRTemp max76543210 = newTemp(Ity_V128);
   5349          assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
   5350          IRTemp res = newTemp(Ity_V128);
   5351          assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
   5352          return res;
   5353       }
   5354       case Iop_Min32Sx4: case Iop_Min32Ux4:
   5355       case Iop_Max32Sx4: case Iop_Max32Ux4: {
   5356          IRTemp x3210 = src;
   5357          IRTemp x3232 = newTemp(Ity_V128);
   5358          IRTemp x1010 = newTemp(Ity_V128);
   5359          assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
   5360          assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
   5361          IRTemp x3333 = newTemp(Ity_V128);
   5362          IRTemp x2222 = newTemp(Ity_V128);
   5363          IRTemp x1111 = newTemp(Ity_V128);
   5364          IRTemp x0000 = newTemp(Ity_V128);
   5365          assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
   5366          assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
   5367          assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
   5368          assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
   5369          IRTemp max32 = newTemp(Ity_V128);
   5370          IRTemp max10 = newTemp(Ity_V128);
   5371          assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
   5372          assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
   5373          IRTemp max3210 = newTemp(Ity_V128);
   5374          assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
   5375          IRTemp res = newTemp(Ity_V128);
   5376          assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
   5377          return res;
   5378       }
   5379       default:
   5380          vassert(0);
   5381    }
   5382 }
   5383 
   5384 
   5385 /* Generate IR for TBL and TBX.  This deals with the 128 bit case
   5386    only. */
   5387 static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
   5388                              IRTemp oor_values )
   5389 {
   5390    vassert(len >= 0 && len <= 3);
   5391 
   5392    /* Generate some useful constants as concisely as possible. */
   5393    IRTemp half15 = newTemp(Ity_I64);
   5394    assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
   5395    IRTemp half16 = newTemp(Ity_I64);
   5396    assign(half16, mkU64(0x1010101010101010ULL));
   5397 
   5398    /* A zero vector */
   5399    IRTemp allZero = newTemp(Ity_V128);
   5400    assign(allZero, mkV128(0x0000));
   5401    /* A vector containing 15 in each 8-bit lane */
   5402    IRTemp all15 = newTemp(Ity_V128);
   5403    assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
   5404    /* A vector containing 16 in each 8-bit lane */
   5405    IRTemp all16 = newTemp(Ity_V128);
   5406    assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
   5407    /* A vector containing 32 in each 8-bit lane */
   5408    IRTemp all32 = newTemp(Ity_V128);
   5409    assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
   5410    /* A vector containing 48 in each 8-bit lane */
   5411    IRTemp all48 = newTemp(Ity_V128);
   5412    assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
   5413    /* A vector containing 64 in each 8-bit lane */
   5414    IRTemp all64 = newTemp(Ity_V128);
   5415    assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
   5416 
   5417    /* Group the 16/32/48/64 vectors so as to be indexable. */
   5418    IRTemp allXX[4] = { all16, all32, all48, all64 };
   5419 
   5420    /* Compute the result for each table vector, with zeroes in places
   5421       where the index values are out of range, and OR them into the
   5422       running vector. */
   5423    IRTemp running_result = newTemp(Ity_V128);
   5424    assign(running_result, mkV128(0));
   5425 
   5426    UInt tabent;
   5427    for (tabent = 0; tabent <= len; tabent++) {
   5428       vassert(tabent >= 0 && tabent < 4);
   5429       IRTemp bias = newTemp(Ity_V128);
   5430       assign(bias,
   5431              mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
   5432       IRTemp biased_indices = newTemp(Ity_V128);
   5433       assign(biased_indices,
   5434              binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
   5435       IRTemp valid_mask = newTemp(Ity_V128);
   5436       assign(valid_mask,
   5437              binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
   5438       IRTemp safe_biased_indices = newTemp(Ity_V128);
   5439       assign(safe_biased_indices,
   5440              binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
   5441       IRTemp results_or_junk = newTemp(Ity_V128);
   5442       assign(results_or_junk,
   5443              binop(Iop_Perm8x16, mkexpr(tab[tabent]),
   5444                                  mkexpr(safe_biased_indices)));
   5445       IRTemp results_or_zero = newTemp(Ity_V128);
   5446       assign(results_or_zero,
   5447              binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
   5448       /* And OR that into the running result. */
   5449       IRTemp tmp = newTemp(Ity_V128);
   5450       assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
   5451                         mkexpr(running_result)));
   5452       running_result = tmp;
   5453    }
   5454 
   5455    /* So now running_result holds the overall result where the indices
   5456       are in range, and zero in out-of-range lanes.  Now we need to
   5457       compute an overall validity mask and use this to copy in the
   5458       lanes in the oor_values for out of range indices.  This is
   5459       unnecessary for TBL but will get folded out by iropt, so we lean
   5460       on that and generate the same code for TBL and TBX here. */
   5461    IRTemp overall_valid_mask = newTemp(Ity_V128);
   5462    assign(overall_valid_mask,
   5463           binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
   5464    IRTemp result = newTemp(Ity_V128);
   5465    assign(result,
   5466           binop(Iop_OrV128,
   5467                 mkexpr(running_result),
   5468                 binop(Iop_AndV128,
   5469                       mkexpr(oor_values),
   5470                       unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
   5471    return result;
   5472 }
   5473 
   5474 
   5475 static
   5476 Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
   5477 {
   5478 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   5479 
   5480    /* ---------------- FMOV (general) ---------------- */
   5481    /* case   30       23   20 18  15     9 4
   5482        (1) 0 00 11110 00 1 00 111 000000 n d     FMOV Sd,      Wn
   5483        (2) 1 00 11110 01 1 00 111 000000 n d     FMOV Dd,      Xn
   5484        (3) 1 00 11110 10 1 01 111 000000 n d     FMOV Vd.D[1], Xn
   5485 
   5486        (4) 0 00 11110 00 1 00 110 000000 n d     FMOV Wd, Sn
   5487        (5) 1 00 11110 01 1 00 110 000000 n d     FMOV Xd, Dn
   5488        (6) 1 00 11110 10 1 01 110 000000 n d     FMOV Xd, Vn.D[1]
   5489    */
   5490    if (INSN(30,24) == BITS7(0,0,1,1,1,1,0)
   5491        && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
   5492       UInt sf = INSN(31,31);
   5493       UInt ty = INSN(23,22); // type
   5494       UInt rm = INSN(20,19); // rmode
   5495       UInt op = INSN(18,16); // opcode
   5496       UInt nn = INSN(9,5);
   5497       UInt dd = INSN(4,0);
   5498       UInt ix = 0; // case
   5499       if (sf == 0) {
   5500          if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
   5501             ix = 1;
   5502          else
   5503          if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
   5504             ix = 4;
   5505       } else {
   5506          vassert(sf == 1);
   5507          if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
   5508             ix = 2;
   5509          else
   5510          if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
   5511             ix = 5;
   5512          else
   5513          if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
   5514             ix = 3;
   5515          else
   5516          if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
   5517             ix = 6;
   5518       }
   5519       if (ix > 0) {
   5520          switch (ix) {
   5521             case 1:
   5522                putQReg128(dd, mkV128(0));
   5523                putQRegLO(dd, getIReg32orZR(nn));
   5524                DIP("fmov s%u, w%u\n", dd, nn);
   5525                break;
   5526             case 2:
   5527                putQReg128(dd, mkV128(0));
   5528                putQRegLO(dd, getIReg64orZR(nn));
   5529                DIP("fmov d%u, x%u\n", dd, nn);
   5530                break;
   5531             case 3:
   5532                putQRegHI64(dd, getIReg64orZR(nn));
   5533                DIP("fmov v%u.d[1], x%u\n", dd, nn);
   5534                break;
   5535             case 4:
   5536                putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
   5537                DIP("fmov w%u, s%u\n", dd, nn);
   5538                break;
   5539             case 5:
   5540                putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
   5541                DIP("fmov x%u, d%u\n", dd, nn);
   5542                break;
   5543             case 6:
   5544                putIReg64orZR(dd, getQRegHI64(nn));
   5545                DIP("fmov x%u, v%u.d[1]\n", dd, nn);
   5546                break;
   5547             default:
   5548                vassert(0);
   5549          }
   5550          return True;
   5551       }
   5552       /* undecodable; fall through */
   5553    }
   5554 
   5555    /* -------------- FMOV (scalar, immediate) -------------- */
   5556    /* 31  28    23   20   12  9     4
   5557       000 11110 00 1 imm8 100 00000 d  FMOV Sd, #imm
   5558       000 11110 01 1 imm8 100 00000 d  FMOV Dd, #imm
   5559    */
   5560    if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
   5561        && INSN(21,21) == 1 && INSN(12,5) == BITS8(1,0,0,0,0,0,0,0)) {
   5562       Bool  isD  = INSN(22,22) == 1;
   5563       UInt  imm8 = INSN(20,13);
   5564       UInt  dd   = INSN(4,0);
   5565       ULong imm  = VFPExpandImm(imm8, isD ? 64 : 32);
   5566       if (!isD) {
   5567          vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
   5568       }
   5569       putQReg128(dd, mkV128(0));
   5570       putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
   5571       DIP("fmov %s, #0x%llx\n",
   5572           nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
   5573       return True;
   5574    }
   5575 
   5576    /* -------------- {FMOV,MOVI} (vector, immediate) -------------- */
   5577    /* 31    28          18  15    11 9     4
   5578       0q op 01111 00000 abc cmode 01 defgh d  MOV Dd,   #imm (q=0)
   5579                                               MOV Vd.2d #imm (q=1)
   5580       Allowable op:cmode
   5581          FMOV = 1:1111
   5582          MOVI = 0:xx00, 1:0x00, 1:10x0, 1:110x, x:1110
   5583    */
   5584    if (INSN(31,31) == 0
   5585        && INSN(28,19) == BITS10(0,1,1,1,1,0,0,0,0,0)
   5586        && INSN(11,10) == BITS2(0,1)) {
   5587       UInt  bitQ     = INSN(30,30);
   5588       UInt  bitOP    = INSN(29,29);
   5589       UInt  cmode    = INSN(15,12);
   5590       UInt  imm8     = (INSN(18,16) << 5) | INSN(9,5);
   5591       UInt  dd       = INSN(4,0);
   5592       ULong imm64lo  = 0;
   5593       UInt  op_cmode = (bitOP << 4) | cmode;
   5594       Bool  ok       = False;
   5595       switch (op_cmode) {
   5596          case BITS5(1,1,1,1,1): // 1:1111
   5597          case BITS5(0,0,0,0,0): case BITS5(0,0,1,0,0):
   5598          case BITS5(0,1,0,0,0): case BITS5(0,1,1,0,0): // 0:xx00
   5599          case BITS5(1,0,0,0,0): case BITS5(1,0,1,0,0): // 1:0x00
   5600          case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
   5601          case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
   5602          case BITS5(1,1,1,1,0): case BITS5(0,1,1,1,0): // x:1110
   5603             ok = True; break;
   5604          default:
   5605            break;
   5606       }
   5607       if (ok) {
   5608          ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, imm8);
   5609       }
   5610       if (ok) {
   5611          ULong imm64hi = (bitQ == 0 && bitOP == 0)  ? 0  : imm64lo;
   5612          putQReg128(dd, binop(Iop_64HLtoV128, mkU64(imm64hi), mkU64(imm64lo)));
   5613          DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
   5614          return True;
   5615       }
   5616       /* else fall through */
   5617    }
   5618 
   5619    /* -------------- {S,U}CVTF (vector, integer, scalar) -------------- */
   5620    /* 31  28    23 21     15     9 4                ix (u:sz)
   5621       010 11110 00 100001 110110 n d  SCVTF Sd, Sn   0
   5622         0       01                    SCVTF Dd, Dn   1
   5623         1       00                    UCVTF Sd, Sn   2
   5624         1       01                    UCVTF Dd, Dn   3
   5625    */
   5626    if (INSN(31,30) == BITS2(0,1) && INSN(28,23) == BITS6(1,1,1,1,0,0)
   5627        && INSN(21, 10) == BITS12(1,0,0,0,0,1,1,1,0,1,1,0)) {
   5628       Bool is64 = INSN(22,22);
   5629       Bool isU  = INSN(29,29);
   5630       UInt nn   = INSN(9,5);
   5631       UInt dd   = INSN(4,0);
   5632 
   5633       UInt ix   = (INSN(29,29) << 1) | INSN(22,22);
   5634 
   5635       const IROp ops[4]
   5636         = { Iop_I32StoF32, Iop_I64StoF64,
   5637             Iop_I32UtoF32, Iop_I64UtoF64 };
   5638 
   5639       putQReg128(dd, mkV128(0));
   5640       putQRegLO(dd, binop(ops[ix], mkexpr(mk_get_IR_rounding_mode()), getQRegLO(nn, is64 ? Ity_I64 : Ity_I32)));
   5641 
   5642       DIP("%ccvtf %s, %s\n",
   5643           isU ? 'u' : 's', nameQRegLO(dd, is64 ? Ity_F64 : Ity_F32),
   5644           nameQRegLO(nn, is64 ? Ity_I64 : Ity_I32));
   5645 
   5646       return True;
   5647    }
   5648    /* -------------- {S,U}CVTF (scalar, fixed-point) -------------- */
   5649    /* 31  28    23 21 20 18  15     9 4                  ix
   5650       000 11110 00 1  00 010 000000 n d  SCVTF Sd, Wn    0
   5651       000 11110 01 1  00 010 000000 n d  SCVTF Dd, Wn    1
   5652       100 11110 00 1  00 010 000000 n d  SCVTF Sd, Xn    2
   5653       100 11110 01 1  00 010 000000 n d  SCVTF Dd, Xn    3
   5654 
   5655       000 11110 00 1  00 011 000000 n d  UCVTF Sd, Wn    4
   5656       000 11110 01 1  00 011 000000 n d  UCVTF Dd, Wn    5
   5657       100 11110 00 1  00 011 000000 n d  UCVTF Sd, Xn    6
   5658       100 11110 01 1  00 011 000000 n d  UCVTF Dd, Xn    7
   5659 
   5660       These are signed/unsigned conversion from integer registers to
   5661       FP registers, all 4 32/64-bit combinations, rounded per FPCR.
   5662    */
   5663    if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) && INSN(21,17) == BITS5(1,0,0,0,1)
   5664        && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
   5665       Bool isI64 = INSN(31,31) == 1;
   5666       Bool isF64 = INSN(22,22) == 1;
   5667       Bool isU   = INSN(16,16) == 1;
   5668       UInt nn    = INSN(9,5);
   5669       UInt dd    = INSN(4,0);
   5670       UInt ix    = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
   5671       const IROp ops[8]
   5672         = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
   5673             Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
   5674       IRExpr* src = getIRegOrZR(isI64, nn);
   5675       IRExpr* res = (isF64 && !isI64)
   5676                        ? unop(ops[ix], src)
   5677                        : binop(ops[ix], mkexpr(mk_get_IR_rounding_mode()), src);
   5678       putQReg128(dd, mkV128(0));
   5679       putQRegLO(dd, res);
   5680       DIP("%ccvtf %s, %s\n",
   5681           isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
   5682           nameIRegOrZR(isI64, nn));
   5683       return True;
   5684    }
   5685 
   5686    /* ------------ F{ADD,SUB,MUL,DIV,NMUL} (scalar) ------------ */
   5687    /* 31        23  20 15   11 9 4
   5688       ---------------- 0000 ------   FMUL  --------
   5689       000 11110 001 m  0001 10 n d   FDIV  Sd,Sn,Sm
   5690       000 11110 011 m  0001 10 n d   FDIV  Dd,Dn,Dm
   5691       ---------------- 0010 ------   FADD  --------
   5692       ---------------- 0011 ------   FSUB  --------
   5693       ---------------- 1000 ------   FNMUL --------
   5694    */
   5695    if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
   5696        && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
   5697       Bool   isD = INSN(22,22) == 1;
   5698       UInt   mm  = INSN(20,16);
   5699       UInt   op  = INSN(15,12);
   5700       UInt   nn  = INSN(9,5);
   5701       UInt   dd  = INSN(4,0);
   5702       IROp   iop = Iop_INVALID;
   5703       IRType ty  = isD ? Ity_F64 : Ity_F32;
   5704       Bool   neg = False;
   5705       const HChar* nm = "???";
   5706       switch (op) {
   5707          case BITS4(0,0,0,0): nm = "fmul";  iop = mkMULF(ty); break;
   5708          case BITS4(0,0,0,1): nm = "fdiv";  iop = mkDIVF(ty); break;
   5709          case BITS4(0,0,1,0): nm = "fadd";  iop = mkADDF(ty); break;
   5710          case BITS4(0,0,1,1): nm = "fsub";  iop = mkSUBF(ty); break;
   5711          case BITS4(1,0,0,0): nm = "fnmul"; iop = mkMULF(ty);
   5712                               neg = True; break;
   5713          default:             return False;
   5714       }
   5715       vassert(iop != Iop_INVALID);
   5716       IRExpr* resE = triop(iop, mkexpr(mk_get_IR_rounding_mode()),
   5717                            getQRegLO(nn, ty), getQRegLO(mm, ty));
   5718       IRTemp res = newTemp(ty);
   5719       assign(res, neg ? unop(mkNEGF(ty),resE) : resE);
   5720       putQReg128(dd, mkV128(0));
   5721       putQRegLO(dd, mkexpr(res));
   5722       DIP("%s %s, %s, %s\n",
   5723           nm, nameQRegLO(dd, ty), nameQRegLO(nn, ty), nameQRegLO(mm, ty));
   5724       return True;
   5725    }
   5726 
   5727    /* ------------ F{MOV,ABS,NEG,SQRT} D/D or S/S ------------ */
   5728    /* 31        23 21    16 14    9 4
   5729       000 11110 00 10000 00 10000 n d  FMOV Sd, Sn
   5730       000 11110 01 10000 00 10000 n d  FMOV Dd, Dn
   5731       ------------------ 01 ---------  FABS ------
   5732       ------------------ 10 ---------  FNEG ------
   5733       ------------------ 11 ---------  FSQRT -----
   5734    */
   5735    if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
   5736        && INSN(21,17) == BITS5(1,0,0,0,0)
   5737        && INSN(14,10) == BITS5(1,0,0,0,0)) {
   5738       Bool   isD = INSN(22,22) == 1;
   5739       UInt   opc = INSN(16,15);
   5740       UInt   nn  = INSN(9,5);
   5741       UInt   dd  = INSN(4,0);
   5742       IRType ty  = isD ? Ity_F64 : Ity_F32;
   5743       IRTemp res = newTemp(ty);
   5744       if (opc == BITS2(0,0)) {
   5745          assign(res, getQRegLO(nn, ty));
   5746          putQReg128(dd, mkV128(0x0000));
   5747          putQRegLO(dd, mkexpr(res));
   5748          DIP("fmov %s, %s\n",
   5749              nameQRegLO(dd, ty), nameQRegLO(nn, ty));
   5750          return True;
   5751       }
   5752       if (opc == BITS2(1,0) || opc == BITS2(0,1)) {
   5753          Bool isAbs = opc == BITS2(0,1);
   5754          IROp op    = isAbs ? mkABSF(ty) : mkNEGF(ty);
   5755          assign(res, unop(op, getQRegLO(nn, ty)));
   5756          putQReg128(dd, mkV128(0x0000));
   5757          putQRegLO(dd, mkexpr(res));
   5758          DIP("%s %s, %s\n", isAbs ? "fabs" : "fneg",
   5759              nameQRegLO(dd, ty), nameQRegLO(nn, ty));
   5760          return True;
   5761       }
   5762       if (opc == BITS2(1,1)) {
   5763          assign(res,
   5764                 binop(mkSQRTF(ty),
   5765                       mkexpr(mk_get_IR_rounding_mode()), getQRegLO(nn, ty)));
   5766          putQReg128(dd, mkV128(0x0000));
   5767          putQRegLO(dd, mkexpr(res));
   5768          DIP("fsqrt %s, %s\n", nameQRegLO(dd, ty), nameQRegLO(nn, ty));
   5769          return True;
   5770       }
   5771       /* else fall through; other cases are ATC */
   5772    }
   5773 
   5774    /* ---------------- F{ABS,NEG} (vector) ---------------- */
   5775    /* 31  28      22 21    16       9 4
   5776       0q0 01110 1 sz 10000 01111 10 n d  FABS Vd.T, Vn.T
   5777       0q1 01110 1 sz 10000 01111 10 n d  FNEG Vd.T, Vn.T
   5778    */
   5779    if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,0,1)
   5780        && INSN(21,17) == BITS5(1,0,0,0,0)
   5781        && INSN(16,10) == BITS7(0,1,1,1,1,1,0)) {
   5782       UInt bitQ   = INSN(30,30);
   5783       UInt bitSZ  = INSN(22,22);
   5784       Bool isFNEG = INSN(29,29) == 1;
   5785       UInt nn     = INSN(9,5);
   5786       UInt dd     = INSN(4,0);
   5787       const HChar* ar = "??";
   5788       IRType tyF    = Ity_INVALID;
   5789       Bool   zeroHI = False;
   5790       Bool   ok     = getLaneInfo_Q_SZ(NULL, &tyF, NULL, &zeroHI, &ar,
   5791                                        (Bool)bitQ, (Bool)bitSZ);
   5792       if (ok) {
   5793          vassert(tyF == Ity_F64 || tyF == Ity_F32);
   5794          IROp op = (tyF == Ity_F64) ? (isFNEG ? Iop_Neg64Fx2 : Iop_Abs64Fx2)
   5795                                     : (isFNEG ? Iop_Neg32Fx4 : Iop_Abs32Fx4);
   5796          IRTemp res = newTemp(Ity_V128);
   5797          assign(res, unop(op, getQReg128(nn)));
   5798          putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(res))
   5799                                : mkexpr(res));
   5800          DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
   5801              nameQReg128(dd), ar, nameQReg128(nn), ar);
   5802          return True;
   5803       }
   5804       /* else fall through */
   5805    }
   5806 
   5807    /* -------------------- FCMP,FCMPE -------------------- */
   5808    /* 31        23   20    15      9 4
   5809       000 11110 01 1     m 00 1000 n 10 000  FCMPE Dn, Dm
   5810       000 11110 01 1 00000 00 1000 n 11 000  FCMPE Dn, #0.0
   5811       000 11110 01 1     m 00 1000 n 00 000  FCMP  Dn, Dm
   5812       000 11110 01 1 00000 00 1000 n 01 000  FCMP  Dn, #0.0
   5813 
   5814       000 11110 00 1     m 00 1000 n 10 000  FCMPE Sn, Sm
   5815       000 11110 00 1 00000 00 1000 n 11 000  FCMPE Sn, #0.0
   5816       000 11110 00 1     m 00 1000 n 00 000  FCMP  Sn, Sm
   5817       000 11110 00 1 00000 00 1000 n 01 000  FCMP  Sn, #0.0
   5818 
   5819       FCMPE generates Invalid Operation exn if either arg is any kind
   5820       of NaN.  FCMP generates Invalid Operation exn if either arg is a
   5821       signalling NaN.  We ignore this detail here and produce the same
   5822       IR for both.
   5823    */
   5824    if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0) && INSN(21,21) == 1
   5825        && INSN(15,10) == BITS6(0,0,1,0,0,0) && INSN(2,0) == BITS3(0,0,0)) {
   5826       Bool   isD     = INSN(22,22) == 1;
   5827       UInt   mm      = INSN(20,16);
   5828       UInt   nn      = INSN(9,5);
   5829       Bool   isCMPE  = INSN(4,4) == 1;
   5830       Bool   cmpZero = INSN(3,3) == 1;
   5831       IRType ty      = isD ? Ity_F64 : Ity_F32;
   5832       Bool   valid   = True;
   5833       if (cmpZero && mm != 0) valid = False;
   5834       if (valid) {
   5835          IRTemp argL  = newTemp(ty);
   5836          IRTemp argR  = newTemp(ty);
   5837          IRTemp irRes = newTemp(Ity_I32);
   5838          assign(argL, getQRegLO(nn, ty));
   5839          assign(argR,
   5840                 cmpZero
   5841                    ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
   5842                    : getQRegLO(mm, ty));
   5843          assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
   5844                              mkexpr(argL), mkexpr(argR)));
   5845          IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
   5846          IRTemp nzcv_28x0 = newTemp(Ity_I64);
   5847          assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
   5848          setFlags_COPY(nzcv_28x0);
   5849          DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ty),
   5850              cmpZero ? "#0.0" : nameQRegLO(mm, ty));
   5851          return True;
   5852       }
   5853    }
   5854 
   5855    /* -------------------- F{N}M{ADD,SUB} -------------------- */
   5856    /* 31          22   20 15 14 9 4   ix
   5857       000 11111 0 sz 0 m  0  a  n d   0   FMADD  Fd,Fn,Fm,Fa
   5858       000 11111 0 sz 0 m  1  a  n d   1   FMSUB  Fd,Fn,Fm,Fa
   5859       000 11111 0 sz 1 m  0  a  n d   2   FNMADD Fd,Fn,Fm,Fa
   5860       000 11111 0 sz 1 m  1  a  n d   3   FNMSUB Fd,Fn,Fm,Fa
   5861       where Fx=Dx when sz=1, Fx=Sx when sz=0
   5862 
   5863                -----SPEC------    ----IMPL----
   5864       fmadd       a +    n * m    a + n * m
   5865       fmsub       a + (-n) * m    a - n * m
   5866       fnmadd   (-a) + (-n) * m    -(a + n * m)
   5867       fnmsub   (-a) +    n * m    -(a - n * m)
   5868    */
   5869    if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,1,0)) {
   5870       Bool    isD   = INSN(22,22) == 1;
   5871       UInt    mm    = INSN(20,16);
   5872       UInt    aa    = INSN(14,10);
   5873       UInt    nn    = INSN(9,5);
   5874       UInt    dd    = INSN(4,0);
   5875       UInt    ix    = (INSN(21,21) << 1) | INSN(15,15);
   5876       IRType  ty    = isD ? Ity_F64 : Ity_F32;
   5877       IROp    opADD = mkADDF(ty);
   5878       IROp    opSUB = mkSUBF(ty);
   5879       IROp    opMUL = mkMULF(ty);
   5880       IROp    opNEG = mkNEGF(ty);
   5881       IRTemp  res   = newTemp(ty);
   5882       IRExpr* eA    = getQRegLO(aa, ty);
   5883       IRExpr* eN    = getQRegLO(nn, ty);
   5884       IRExpr* eM    = getQRegLO(mm, ty);
   5885       IRExpr* rm    = mkexpr(mk_get_IR_rounding_mode());
   5886       IRExpr* eNxM  = triop(opMUL, rm, eN, eM);
   5887       switch (ix) {
   5888          case 0:  assign(res, triop(opADD, rm, eA, eNxM)); break;
   5889          case 1:  assign(res, triop(opSUB, rm, eA, eNxM)); break;
   5890          case 2:  assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
   5891          case 3:  assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
   5892          default: vassert(0);
   5893       }
   5894       putQReg128(dd, mkV128(0x0000));
   5895       putQRegLO(dd, mkexpr(res));
   5896       const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
   5897       DIP("%s %s, %s, %s, %s\n",
   5898           names[ix], nameQRegLO(dd, ty), nameQRegLO(nn, ty),
   5899                      nameQRegLO(mm, ty), nameQRegLO(aa, ty));
   5900       return True;
   5901    }
   5902 
   5903    /* -------- FCVT{N,P,M,Z}{S,U} (scalar, integer) -------- */
   5904    /*    30       23   20 18  15     9 4
   5905       sf 00 11110 0x 1 00 000 000000 n d  FCVTNS Rd, Fn (round to
   5906       sf 00 11110 0x 1 00 001 000000 n d  FCVTNU Rd, Fn  nearest)
   5907       ---------------- 01 --------------  FCVTP-------- (round to +inf)
   5908       ---------------- 10 --------------  FCVTM-------- (round to -inf)
   5909       ---------------- 11 --------------  FCVTZ-------- (round to zero)
   5910 
   5911       Rd is Xd when sf==1, Wd when sf==0
   5912       Fn is Dn when x==1, Sn when x==0
   5913       20:19 carry the rounding mode, using the same encoding as FPCR
   5914    */
   5915    if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) && INSN(21,21) == 1
   5916        && INSN(18,17) == BITS2(0,0) && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
   5917       Bool isI64 = INSN(31,31) == 1;
   5918       Bool isF64 = INSN(22,22) == 1;
   5919       UInt rm    = INSN(20,19);
   5920       Bool isU   = INSN(16,16) == 1;
   5921       UInt nn    = INSN(9,5);
   5922       UInt dd    = INSN(4,0);
   5923       /* Decide on the IR rounding mode to use. */
   5924       IRRoundingMode irrm = 8; /*impossible*/
   5925       HChar ch = '?';
   5926       switch (rm) {
   5927          case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
   5928          case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
   5929          case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
   5930          case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
   5931          default: vassert(0);
   5932       }
   5933       vassert(irrm != 8);
   5934       /* Decide on the conversion primop, based on the source size,
   5935          dest size and signedness (8 possibilities).  Case coding:
   5936             F32 ->s I32   0
   5937             F32 ->u I32   1
   5938             F32 ->s I64   2
   5939             F32 ->u I64   3
   5940             F64 ->s I32   4
   5941             F64 ->u I32   5
   5942             F64 ->s I64   6
   5943             F64 ->u I64   7
   5944       */
   5945       UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0);
   5946       vassert(ix < 8);
   5947       const IROp ops[8]
   5948          = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
   5949              Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
   5950       IROp op = ops[ix];
   5951       // A bit of ATCery: bounce all cases we haven't seen an example of.
   5952       if (/* F32toI32S */
   5953              (op == Iop_F32toI32S && irrm == Irrm_ZERO)   /* FCVTZS Wd,Sn */
   5954           || (op == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
   5955           || (op == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
   5956           /* F32toI32U */
   5957           || (op == Iop_F32toI32U && irrm == Irrm_ZERO)   /* FCVTZU Wd,Sn */
   5958           || (op == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
   5959           /* F32toI64S */
   5960           || (op == Iop_F32toI64S && irrm == Irrm_ZERO)   /* FCVTZS Xd,Sn */
   5961           /* F32toI64U */
   5962           || (op == Iop_F32toI64U && irrm == Irrm_ZERO)   /* FCVTZU Xd,Sn */
   5963           || (op == Iop_F32toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Sn */
   5964           /* F64toI32S */
   5965           || (op == Iop_F64toI32S && irrm == Irrm_ZERO)   /* FCVTZS Wd,Dn */
   5966           || (op == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
   5967           || (op == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
   5968           /* F64toI32U */
   5969           || (op == Iop_F64toI32U && irrm == Irrm_ZERO)   /* FCVTZU Wd,Dn */
   5970           || (op == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
   5971           || (op == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
   5972           /* F64toI64S */
   5973           || (op == Iop_F64toI64S && irrm == Irrm_ZERO)   /* FCVTZS Xd,Dn */
   5974           || (op == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
   5975           || (op == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
   5976           /* F64toI64U */
   5977           || (op == Iop_F64toI64U && irrm == Irrm_ZERO)   /* FCVTZU Xd,Dn */
   5978           || (op == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
   5979          ) {
   5980         /* validated */
   5981       } else {
   5982         return False;
   5983       }
   5984       IRType srcTy  = isF64 ? Ity_F64 : Ity_F32;
   5985       IRType dstTy  = isI64 ? Ity_I64 : Ity_I32;
   5986       IRTemp src    = newTemp(srcTy);
   5987       IRTemp dst    = newTemp(dstTy);
   5988       assign(src, getQRegLO(nn, srcTy));
   5989       assign(dst, binop(op, mkU32(irrm), mkexpr(src)));
   5990       putIRegOrZR(isI64, dd, mkexpr(dst));
   5991       DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
   5992           nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
   5993       return True;
   5994    }
   5995 
   5996    /* -------- FCVTAS (KLUDGED) (scalar, integer) -------- */
   5997    /*   30       23   20 18  15     9 4
   5998       1 00 11110 0x 1 00 100 000000 n d  FCVTAS Xd, Fn
   5999       0 00 11110 0x 1 00 100 000000 n d  FCVTAS Wd, Fn
   6000       Fn is Dn when x==1, Sn when x==0
   6001    */
   6002    if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0)
   6003        && INSN(21,16) == BITS6(1,0,0,1,0,0)
   6004        && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
   6005       Bool isI64 = INSN(31,31) == 1;
   6006       Bool isF64 = INSN(22,22) == 1;
   6007       UInt nn    = INSN(9,5);
   6008       UInt dd    = INSN(4,0);
   6009       /* Decide on the IR rounding mode to use. */
   6010       /* KLUDGE: should be Irrm_NEAREST_TIE_AWAY_0 */
   6011       IRRoundingMode irrm = Irrm_NEAREST;
   6012       /* Decide on the conversion primop. */
   6013       IROp   op    = isI64 ? (isF64 ? Iop_F64toI64S :  Iop_F32toI64S)
   6014                            : (isF64 ? Iop_F64toI32S :  Iop_F32toI32S);
   6015       IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
   6016       IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
   6017       IRTemp src   = newTemp(srcTy);
   6018       IRTemp dst   = newTemp(dstTy);
   6019       assign(src, getQRegLO(nn, srcTy));
   6020       assign(dst, binop(op, mkU32(irrm), mkexpr(src)));
   6021       putIRegOrZR(isI64, dd, mkexpr(dst));
   6022       DIP("fcvtas %s, %s (KLUDGED)\n",
   6023           nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
   6024       return True;
   6025    }
   6026 
   6027    /* ---------------- FRINT{I,M,P,Z} (scalar) ---------------- */
   6028    /* 31        23 21   17  14    9 4
   6029       000 11110 0x 1001 111 10000 n d  FRINTI Fd, Fm (round per FPCR)
   6030                         rm
   6031       x==0 => S-registers, x==1 => D-registers
   6032       rm (17:15) encodings:
   6033          111 per FPCR  (FRINTI)
   6034          001 +inf      (FRINTP)
   6035          010 -inf      (FRINTM)
   6036          011 zero      (FRINTZ)
   6037          000 tieeven
   6038          100 tieaway   (FRINTA) -- !! FIXME KLUDGED !!
   6039          110 per FPCR + "exact = TRUE"
   6040          101 unallocated
   6041    */
   6042    if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
   6043        && INSN(21,18) == BITS4(1,0,0,1) && INSN(14,10) == BITS5(1,0,0,0,0)) {
   6044       Bool    isD   = INSN(22,22) == 1;
   6045       UInt    rm    = INSN(17,15);
   6046       UInt    nn    = INSN(9,5);
   6047       UInt    dd    = INSN(4,0);
   6048       IRType  ty    = isD ? Ity_F64 : Ity_F32;
   6049       IRExpr* irrmE = NULL;
   6050       UChar   ch    = '?';
   6051       switch (rm) {
   6052          case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
   6053          case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
   6054          case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
   6055          // The following is a kludge.  Should be: Irrm_NEAREST_TIE_AWAY_0
   6056          case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
   6057          default: break;
   6058       }
   6059       if (irrmE) {
   6060          IRTemp src = newTemp(ty);
   6061          IRTemp dst = newTemp(ty);
   6062          assign(src, getQRegLO(nn, ty));
   6063          assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
   6064                            irrmE, mkexpr(src)));
   6065          putQReg128(dd, mkV128(0x0000));
   6066          putQRegLO(dd, mkexpr(dst));
   6067          DIP("frint%c %s, %s\n",
   6068              ch, nameQRegLO(dd, ty), nameQRegLO(nn, ty));
   6069          return True;
   6070       }
   6071       /* else unhandled rounding mode case -- fall through */
   6072    }
   6073 
   6074    /* ------------------ FCVT (scalar) ------------------ */
   6075    /* 31        23 21    16 14    9 4
   6076       000 11110 11 10001 00 10000 n d   FCVT Sd, Hn (unimp)
   6077       --------- 11 ----- 01 ---------   FCVT Dd, Hn (unimp)
   6078       --------- 00 ----- 11 ---------   FCVT Hd, Sn (unimp)
   6079       --------- 00 ----- 01 ---------   FCVT Dd, Sn
   6080       --------- 01 ----- 11 ---------   FCVT Hd, Dn (unimp)
   6081       --------- 01 ----- 00 ---------   FCVT Sd, Dn
   6082       Rounding, when dst is smaller than src, is per the FPCR.
   6083    */
   6084    if (INSN(31,24) == BITS8(0,0,0,1,1,1,1,0)
   6085        && INSN(21,17) == BITS5(1,0,0,0,1)
   6086        && INSN(14,10) == BITS5(1,0,0,0,0)) {
   6087       UInt b2322 = INSN(23,22);
   6088       UInt b1615 = INSN(16,15);
   6089       UInt nn    = INSN(9,5);
   6090       UInt dd    = INSN(4,0);
   6091       if (b2322 == BITS2(0,0) && b1615 == BITS2(0,1)) {
   6092          /* Convert S to D */
   6093          IRTemp res = newTemp(Ity_F64);
   6094          assign(res, unop(Iop_F32toF64, getQRegLO(nn, Ity_F32)));
   6095          putQReg128(dd, mkV128(0x0000));
   6096          putQRegLO(dd, mkexpr(res));
   6097          DIP("fcvt %s, %s\n",
   6098              nameQRegLO(dd, Ity_F64), nameQRegLO(nn, Ity_F32));
   6099          return True;
   6100       }
   6101       if (b2322 == BITS2(0,1) && b1615 == BITS2(0,0)) {
   6102          /* Convert D to S */
   6103          IRTemp res = newTemp(Ity_F32);
   6104          assign(res, binop(Iop_F64toF32, mkexpr(mk_get_IR_rounding_mode()),
   6105                                          getQRegLO(nn, Ity_F64)));
   6106          putQReg128(dd, mkV128(0x0000));
   6107          putQRegLO(dd, mkexpr(res));
   6108          DIP("fcvt %s, %s\n",
   6109              nameQRegLO(dd, Ity_F32), nameQRegLO(nn, Ity_F64));
   6110          return True;
   6111       }
   6112       /* else unhandled */
   6113    }
   6114 
   6115    /* ------------------ FABD (scalar) ------------------ */
   6116    /* 31        23  20 15     9 4
   6117       011 11110 111 m  110101 n d  FABD  Dd, Dn, Dm
   6118       011 11110 101 m  110101 n d  FABD  Sd, Sn, Sm
   6119    */
   6120    if (INSN(31,23) == BITS9(0,1,1,1,1,1,1,0,1) && INSN(21,21) == 1
   6121        && INSN(15,10) == BITS6(1,1,0,1,0,1)) {
   6122       Bool   isD = INSN(22,22) == 1;
   6123       UInt   mm  = INSN(20,16);
   6124       UInt   nn  = INSN(9,5);
   6125       UInt   dd  = INSN(4,0);
   6126       IRType ty  = isD ? Ity_F64 : Ity_F32;
   6127       IRTemp res = newTemp(ty);
   6128       assign(res, unop(mkABSF(ty),
   6129                        triop(mkSUBF(ty),
   6130                              mkexpr(mk_get_IR_rounding_mode()),
   6131                              getQRegLO(nn,ty), getQRegLO(mm,ty))));
   6132       putQReg128(dd, mkV128(0x0000));
   6133       putQRegLO(dd, mkexpr(res));
   6134       DIP("fabd %s, %s, %s\n",
   6135           nameQRegLO(dd, ty), nameQRegLO(nn, ty), nameQRegLO(mm, ty));
   6136       return True;
   6137    }
   6138 
   6139    /* -------------- {S,U}CVTF (vector, integer) -------------- */
   6140    /* 31  28      22 21       15     9 4
   6141       0q0 01110 0 sz 1  00001 110110 n d  SCVTF Vd, Vn
   6142       0q1 01110 0 sz 1  00001 110110 n d  UCVTF Vd, Vn
   6143       with laneage:
   6144       case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
   6145    */
   6146    if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,0,0)
   6147        && INSN(21,16) == BITS6(1,0,0,0,0,1)
   6148        && INSN(15,10) == BITS6(1,1,0,1,1,0)) {
   6149       Bool isQ   = INSN(30,30) == 1;
   6150       Bool isU   = INSN(29,29) == 1;
   6151       Bool isF64 = INSN(22,22) == 1;
   6152       UInt nn    = INSN(9,5);
   6153       UInt dd    = INSN(4,0);
   6154       if (isQ || !isF64) {
   6155          IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
   6156          UInt   nLanes = 0;
   6157          Bool   zeroHI = False;
   6158          const HChar* arrSpec = NULL;
   6159          Bool   ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
   6160                                       isQ, isF64 );
   6161          IROp   op = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
   6162                          : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
   6163          IRTemp rm = mk_get_IR_rounding_mode();
   6164          UInt   i;
   6165          vassert(ok); /* the 'if' above should ensure this */
   6166          for (i = 0; i < nLanes; i++) {
   6167             putQRegLane(dd, i,
   6168                         binop(op, mkexpr(rm), getQRegLane(nn, i, tyI)));
   6169          }
   6170          if (zeroHI) {
   6171             putQRegLane(dd, 1, mkU64(0));
   6172          }
   6173          DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
   6174              nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
   6175          return True;
   6176       }
   6177       /* else fall through */
   6178    }
   6179 
   6180    /* ---------- F{ADD,SUB,MUL,DIV,MLA,MLS} (vector) ---------- */
   6181    /* 31  28      22 21 20 15     9 4                  case
   6182       0q0 01110 0 sz 1  m  110101 n d  FADD Vd,Vn,Vm   1
   6183       0q0 01110 1 sz 1  m  110101 n d  FSUB Vd,Vn,Vm   2
   6184       0q1 01110 0 sz 1  m  110111 n d  FMUL Vd,Vn,Vm   3
   6185       0q1 01110 0 sz 1  m  111111 n d  FDIV Vd,Vn,Vm   4
   6186       0q0 01110 0 sz 1  m  110011 n d  FMLA Vd,Vn,Vm   5
   6187       0q0 01110 1 sz 1  m  110011 n d  FMLS Vd,Vn,Vm   6
   6188       0q1 01110 1 sz 1  m  110101 n d  FABD Vd,Vn,Vm   7
   6189    */
   6190    if (INSN(31,31) == 0
   6191        && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1) {
   6192       Bool isQ   = INSN(30,30) == 1;
   6193       UInt b29   = INSN(29,29);
   6194       UInt b23   = INSN(23,23);
   6195       Bool isF64 = INSN(22,22) == 1;
   6196       UInt mm    = INSN(20,16);
   6197       UInt b1510 = INSN(15,10);
   6198       UInt nn    = INSN(9,5);
   6199       UInt dd    = INSN(4,0);
   6200       UInt ix    = 0;
   6201       /**/ if (b29 == 0 && b23 == 0 && b1510 == BITS6(1,1,0,1,0,1)) ix = 1;
   6202       else if (b29 == 0 && b23 == 1 && b1510 == BITS6(1,1,0,1,0,1)) ix = 2;
   6203       else if (b29 == 1 && b23 == 0 && b1510 == BITS6(1,1,0,1,1,1)) ix = 3;
   6204       else if (b29 == 1 && b23 == 0 && b1510 == BITS6(1,1,1,1,1,1)) ix = 4;
   6205       else if (b29 == 0 && b23 == 0 && b1510 == BITS6(1,1,0,0,1,1)) ix = 5;
   6206       else if (b29 == 0 && b23 == 1 && b1510 == BITS6(1,1,0,0,1,1)) ix = 6;
   6207       else if (b29 == 1 && b23 == 1 && b1510 == BITS6(1,1,0,1,0,1)) ix = 7;
   6208       IRType laneTy = Ity_INVALID;
   6209       Bool   zeroHI = False;
   6210       const HChar* arr = "??";
   6211       Bool ok
   6212          = getLaneInfo_Q_SZ(NULL, &laneTy, NULL, &zeroHI, &arr, isQ, isF64);
   6213       /* Skip MLA/MLS for the time being */
   6214       if (ok && ix >= 1 && ix <= 4) {
   6215          const IROp ops64[4]
   6216             = { Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2 };
   6217          const IROp ops32[4]
   6218             = { Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4 };
   6219          const HChar* names[4]
   6220             = { "fadd", "fsub", "fmul", "fdiv" };
   6221          IROp   op = laneTy==Ity_F64 ? ops64[ix-1] : ops32[ix-1];
   6222          IRTemp rm = mk_get_IR_rounding_mode();
   6223          IRTemp t1 = newTemp(Ity_V128);
   6224          IRTemp t2 = newTemp(Ity_V128);
   6225          assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
   6226          assign(t2, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t1))
   6227                            : mkexpr(t1));
   6228          putQReg128(dd, mkexpr(t2));
   6229          DIP("%s %s.%s, %s.%s, %s.%s\n", names[ix-1],
   6230              nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   6231          return True;
   6232       }
   6233       if (ok && ix >= 5 && ix <= 6) {
   6234          IROp opADD = laneTy==Ity_F64 ? Iop_Add64Fx2 : Iop_Add32Fx4;
   6235          IROp opSUB = laneTy==Ity_F64 ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
   6236          IROp opMUL = laneTy==Ity_F64 ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
   6237          IRTemp rm = mk_get_IR_rounding_mode();
   6238          IRTemp t1 = newTemp(Ity_V128);
   6239          IRTemp t2 = newTemp(Ity_V128);
   6240          // FIXME: double rounding; use FMA primops instead
   6241          assign(t1, triop(opMUL,
   6242                           mkexpr(rm), getQReg128(nn), getQReg128(mm)));
   6243          assign(t2, triop(ix == 5 ? opADD : opSUB,
   6244                           mkexpr(rm), getQReg128(dd), mkexpr(t1)));
   6245          putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2))
   6246                                : mkexpr(t2));
   6247          DIP("%s %s.%s, %s.%s, %s.%s\n", ix == 5 ? "fmla" : "fmls",
   6248              nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   6249          return True;
   6250       }
   6251       if (ok && ix == 7) {
   6252          IROp opSUB = laneTy==Ity_F64 ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
   6253          IROp opABS = laneTy==Ity_F64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
   6254          IRTemp rm = mk_get_IR_rounding_mode();
   6255          IRTemp t1 = newTemp(Ity_V128);
   6256          IRTemp t2 = newTemp(Ity_V128);
   6257          // FIXME: use Abd primop instead?
   6258          assign(t1, triop(opSUB,
   6259                           mkexpr(rm), getQReg128(nn), getQReg128(mm)));
   6260          assign(t2, unop(opABS, mkexpr(t1)));
   6261          putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2))
   6262                                : mkexpr(t2));
   6263          DIP("fabd %s.%s, %s.%s, %s.%s\n",
   6264              nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   6265          return True;
   6266       }
   6267    }
   6268 
   6269    /* ------------ FCM{EQ,GE,GT}, FAC{GE,GT} (vector) ------------ */
   6270    /* 31  28      22   20 15     9 4                  case
   6271       0q1 01110 0 sz 1 m  111011 n d  FACGE Vd, Vn, Vm
   6272       0q1 01110 1 sz 1 m  111011 n d  FACGT Vd, Vn, Vm
   6273       0q0 01110 0 sz 1 m  111001 n d  FCMEQ Vd, Vn, Vm
   6274       0q1 01110 0 sz 1 m  111001 n d  FCMGE Vd, Vn, Vm
   6275       0q1 01110 1 sz 1 m  111001 n d  FCMGT Vd, Vn, Vm
   6276    */
   6277    if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1
   6278        && INSN(15,12) == BITS4(1,1,1,0) && INSN(10,10) == 1) {
   6279       Bool isQ   = INSN(30,30) == 1;
   6280       UInt U     = INSN(29,29);
   6281       UInt E     = INSN(23,23);
   6282       Bool isF64 = INSN(22,22) == 1;
   6283       UInt ac    = INSN(11,11);
   6284       UInt mm    = INSN(20,16);
   6285       UInt nn    = INSN(9,5);
   6286       UInt dd    = INSN(4,0);
   6287       /* */
   6288       UInt   EUac   = (E << 2) | (U << 1) | ac;
   6289       IROp   opABS  = Iop_INVALID;
   6290       IROp   opCMP  = Iop_INVALID;
   6291       IRType laneTy = Ity_INVALID;
   6292       Bool   zeroHI = False;
   6293       Bool   swap   = True;
   6294       const HChar* arr = "??";
   6295       const HChar* nm  = "??";
   6296       Bool ok
   6297          = getLaneInfo_Q_SZ(NULL, &laneTy, NULL, &zeroHI, &arr, isQ, isF64);
   6298       if (ok) {
   6299          vassert((isF64 && laneTy == Ity_F64) || (!isF64 && laneTy == Ity_F32));
   6300          switch (EUac) {
   6301             case BITS3(0,0,0):
   6302                nm    = "fcmeq";
   6303                opCMP = isF64 ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
   6304                swap  = False;
   6305                break;
   6306             case BITS3(0,1,0):
   6307                nm    = "fcmge";
   6308                opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
   6309                break;
   6310             case BITS3(0,1,1):
   6311                nm    = "facge";
   6312                opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
   6313                opABS = isF64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
   6314                break;
   6315             case BITS3(1,1,0):
   6316                nm    = "fcmgt";
   6317                opCMP = isF64 ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
   6318                break;
   6319             case BITS3(1,1,1):
   6320                nm    = "fcagt";
   6321                opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
   6322                opABS = isF64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
   6323                break;
   6324             default:
   6325                break;
   6326          }
   6327       }
   6328       if (opCMP != Iop_INVALID) {
   6329          IRExpr* argN = getQReg128(nn);
   6330          IRExpr* argM = getQReg128(mm);
   6331          if (opABS != Iop_INVALID) {
   6332             argN = unop(opABS, argN);
   6333             argM = unop(opABS, argM);
   6334          }
   6335          IRExpr* res = swap ? binop(opCMP, argM, argN)
   6336                             : binop(opCMP, argN, argM);
   6337          if (zeroHI) {
   6338             res = unop(Iop_ZeroHI64ofV128, res);
   6339          }
   6340          putQReg128(dd, res);
   6341          DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
   6342              nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   6343          return True;
   6344       }
   6345       /* else fall through */
   6346    }
   6347 
   6348    /* -------------------- FCVTN -------------------- */
   6349    /* 31  28    23  20    15     9 4
   6350       0q0 01110 0s1 00001 011010 n d  FCVTN Vd, Vn
   6351       where case q:s of 00: 16Fx4(lo) <- 32Fx4
   6352                         01: 32Fx2(lo) <- 64Fx2
   6353                         10: 16Fx4(hi) <- 32Fx4
   6354                         11: 32Fx2(hi) <- 64Fx2
   6355       Only deals with the 32Fx2 <- 64Fx2 version (s==1)
   6356    */
   6357    if (INSN(31,31) == 0 && INSN(29,23) == BITS7(0,0,1,1,1,0,0)
   6358        && INSN(21,10) == BITS12(1,0,0,0,0,1,0,1,1,0,1,0)) {
   6359       UInt bQ = INSN(30,30);
   6360       UInt bS = INSN(22,22);
   6361       UInt nn = INSN(9,5);
   6362       UInt dd = INSN(4,0);
   6363       if (bS == 1) {
   6364          IRTemp  rm    = mk_get_IR_rounding_mode();
   6365          IRExpr* srcLo = getQRegLane(nn, 0, Ity_F64);
   6366          IRExpr* srcHi = getQRegLane(nn, 1, Ity_F64);
   6367          putQRegLane(dd, 2 * bQ + 0, binop(Iop_F64toF32, mkexpr(rm), srcLo));
   6368          putQRegLane(dd, 2 * bQ + 1, binop(Iop_F64toF32, mkexpr(rm), srcHi));
   6369          if (bQ == 0) {
   6370             putQRegLane(dd, 1, mkU64(0));
   6371          }
   6372          DIP("fcvtn%s %s.%s, %s.2d\n", bQ ? "2" : "",
   6373              nameQReg128(dd), bQ ? "4s" : "2s", nameQReg128(nn));
   6374          return True;
   6375       }
   6376       /* else fall through */
   6377    }
   6378 
   6379    /* ---------------- ADD/SUB (vector) ---------------- */
   6380    /* 31  28    23   21 20 15     9 4
   6381       0q0 01110 size 1  m  100001 n d  ADD Vd.T, Vn.T, Vm.T
   6382       0q1 01110 size 1  m  100001 n d  SUB Vd.T, Vn.T, Vm.T
   6383    */
   6384    if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
   6385        && INSN(21,21) == 1 && INSN(15,10) == BITS6(1,0,0,0,0,1)) {
   6386       Bool isQ    = INSN(30,30) == 1;
   6387       UInt szBlg2 = INSN(23,22);
   6388       Bool isSUB  = INSN(29,29) == 1;
   6389       UInt mm     = INSN(20,16);
   6390       UInt nn     = INSN(9,5);
   6391       UInt dd     = INSN(4,0);
   6392       Bool zeroHI = False;
   6393       const HChar* arrSpec = "";
   6394       Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
   6395       if (ok) {
   6396          const IROp opsADD[4]
   6397             = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
   6398          const IROp opsSUB[4]
   6399             = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
   6400          vassert(szBlg2 < 4);
   6401          IROp   op = isSUB ? opsSUB[szBlg2] : opsADD[szBlg2];
   6402          IRTemp t  = newTemp(Ity_V128);
   6403          assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
   6404          putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t))
   6405                                : mkexpr(t));
   6406          const HChar* nm = isSUB ? "sub" : "add";
   6407          DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
   6408              nameQReg128(dd), arrSpec,
   6409              nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
   6410          return True;
   6411       }
   6412       /* else fall through */
   6413    }
   6414 
   6415    /* ---------------- ADD/SUB (scalar) ---------------- */
   6416    /* 31  28    23 21 20 15     9 4
   6417       010 11110 11 1  m  100001 n d  ADD Dd, Dn, Dm
   6418       011 11110 11 1  m  100001 n d  SUB Dd, Dn, Dm
   6419    */
   6420    if (INSN(31,30) == BITS2(0,1) && INSN(28,21) == BITS8(1,1,1,1,0,1,1,1)
   6421        && INSN(15,10) == BITS6(1,0,0,0,0,1)) {
   6422       Bool isSUB = INSN(29,29) == 1;
   6423       UInt mm    = INSN(20,16);
   6424       UInt nn    = INSN(9,5);
   6425       UInt dd    = INSN(4,0);
   6426       IRTemp res = newTemp(Ity_I64);
   6427       assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
   6428                         getQRegLane(nn, 0, Ity_I64),
   6429                         getQRegLane(mm, 0, Ity_I64)));
   6430       putQRegLane(dd, 0, mkexpr(res));
   6431       putQRegLane(dd, 1, mkU64(0));
   6432       DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
   6433           nameQRegLO(dd, Ity_I64),
   6434           nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
   6435       return True;
   6436    }
   6437 
   6438    /* ------------ MUL/PMUL/MLA/MLS (vector) ------------ */
   6439    /* 31  28    23   21 20 15     9 4
   6440       0q0 01110 size 1  m  100111 n d  MUL  Vd.T, Vn.T, Vm.T  B/H/S only
   6441       0q1 01110 size 1  m  100111 n d  PMUL Vd.T, Vn.T, Vm.T  B only
   6442       0q0 01110 size 1  m  100101 n d  MLA  Vd.T, Vn.T, Vm.T  B/H/S only
   6443       0q1 01110 size 1  m  100101 n d  MLS  Vd.T, Vn.T, Vm.T  B/H/S only
   6444    */
   6445    if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
   6446        && INSN(21,21) == 1
   6447        && (INSN(15,10) & BITS6(1,1,1,1,0,1)) == BITS6(1,0,0,1,0,1)) {
   6448       Bool isQ    = INSN(30,30) == 1;
   6449       UInt szBlg2 = INSN(23,22);
   6450       UInt bit29  = INSN(29,29);
   6451       UInt mm     = INSN(20,16);
   6452       UInt nn     = INSN(9,5);
   6453       UInt dd     = INSN(4,0);
   6454       Bool isMLAS = INSN(11,11) == 0;
   6455       const IROp opsADD[4]
   6456          = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_INVALID };
   6457       const IROp opsSUB[4]
   6458          = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_INVALID };
   6459       const IROp opsMUL[4]
   6460          = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
   6461       const IROp opsPMUL[4]
   6462          = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
   6463       /* Set opMUL and, if necessary, opACC.  A result value of
   6464          Iop_INVALID for opMUL indicates that the instruction is
   6465          invalid. */
   6466       Bool zeroHI = False;
   6467       const HChar* arrSpec = "";
   6468       Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
   6469       vassert(szBlg2 < 4);
   6470       IROp opACC = Iop_INVALID;
   6471       IROp opMUL = Iop_INVALID;
   6472       if (ok) {
   6473          opMUL = (bit29 == 1 && !isMLAS) ? opsPMUL[szBlg2]
   6474                                          : opsMUL[szBlg2];
   6475          opACC = isMLAS ? (bit29 == 1 ? opsSUB[szBlg2] : opsADD[szBlg2])
   6476                         : Iop_INVALID;
   6477       }
   6478       if (ok && opMUL != Iop_INVALID) {
   6479          IRTemp t1 = newTemp(Ity_V128);
   6480          assign(t1, binop(opMUL, getQReg128(nn), getQReg128(mm)));
   6481          IRTemp t2 = newTemp(Ity_V128);
   6482          assign(t2, opACC == Iop_INVALID
   6483                        ? mkexpr(t1)
   6484                        : binop(opACC, getQReg128(dd), mkexpr(t1)));
   6485          putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2))
   6486                                : mkexpr(t2));
   6487          const HChar* nm = isMLAS ? (bit29 == 1 ? "mls" : "mla")
   6488                                   : (bit29 == 1 ? "pmul" : "mul");
   6489          DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
   6490              nameQReg128(dd), arrSpec,
   6491              nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
   6492          return True;
   6493       }
   6494       /* else fall through */
   6495    }
   6496 
   6497    /* ---------------- {S,U}{MIN,MAX} (vector) ---------------- */
   6498    /* 31  28    23   21 20 15     9 4
   6499       0q0 01110 size 1  m  011011 n d  SMIN Vd.T, Vn.T, Vm.T
   6500       0q1 01110 size 1  m  011011 n d  UMIN Vd.T, Vn.T, Vm.T
   6501       0q0 01110 size 1  m  011001 n d  SMAX Vd.T, Vn.T, Vm.T
   6502       0q1 01110 size 1  m  011001 n d  UMAX Vd.T, Vn.T, Vm.T
   6503    */
   6504    if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
   6505        && INSN(21,21) == 1
   6506        && ((INSN(15,10) & BITS6(1,1,1,1,0,1)) == BITS6(0,1,1,0,0,1))) {
   6507       Bool isQ    = INSN(30,30) == 1;
   6508       Bool isU    = INSN(29,29) == 1;
   6509       UInt szBlg2 = INSN(23,22);
   6510       Bool isMAX  = INSN(11,11) == 0;
   6511       UInt mm     = INSN(20,16);
   6512       UInt nn     = INSN(9,5);
   6513       UInt dd     = INSN(4,0);
   6514       Bool zeroHI = False;
   6515       const HChar* arrSpec = "";
   6516       Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
   6517       if (ok) {
   6518          const IROp opMINS[4]
   6519             = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
   6520          const IROp opMINU[4]
   6521             = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
   6522          const IROp opMAXS[4]
   6523             = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
   6524          const IROp opMAXU[4]
   6525             = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
   6526          vassert(szBlg2 < 4);
   6527          IROp op = isMAX ? (isU ? opMAXU[szBlg2] : opMAXS[szBlg2])
   6528                          : (isU ? opMINU[szBlg2] : opMINS[szBlg2]);
   6529          IRTemp t = newTemp(Ity_V128);
   6530          assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
   6531          putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t))
   6532                                : mkexpr(t));
   6533          const HChar* nm = isMAX ? (isU ? "umax" : "smax")
   6534                                  : (isU ? "umin" : "smin");
   6535          DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
   6536              nameQReg128(dd), arrSpec,
   6537              nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
   6538          return True;
   6539       }
   6540       /* else fall through */
   6541    }
   6542 
   6543    /* -------------------- {S,U}{MIN,MAX}V -------------------- */
   6544    /* 31  28    23   21    16 15     9 4
   6545       0q0 01110 size 11000 1  101010 n d  SMINV Vd, Vn.T
   6546       0q1 01110 size 11000 1  101010 n d  UMINV Vd, Vn.T
   6547       0q0 01110 size 11000 0  101010 n d  SMAXV Vd, Vn.T
   6548       0q1 01110 size 11000 0  101010 n d  UMAXV Vd, Vn.T
   6549    */
   6550    if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
   6551        && INSN(21,17) == BITS5(1,1,0,0,0)
   6552        && INSN(15,10) == BITS6(1,0,1,0,1,0)) {
   6553       Bool isQ    = INSN(30,30) == 1;
   6554       Bool isU    = INSN(29,29) == 1;
   6555       UInt szBlg2 = INSN(23,22);
   6556       Bool isMAX  = INSN(16,16) == 0;
   6557       UInt nn     = INSN(9,5);
   6558       UInt dd     = INSN(4,0);
   6559       Bool zeroHI = False;
   6560       const HChar* arrSpec = "";
   6561       Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2);
   6562       if (ok) {
   6563          if (szBlg2 == 3)         ok = False;
   6564          if (szBlg2 == 2 && !isQ) ok = False;
   6565       }
   6566       if (ok) {
   6567          const IROp opMINS[3]
   6568             = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
   6569          const IROp opMINU[3]
   6570             = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
   6571          const IROp opMAXS[3]
   6572             = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
   6573          const IROp opMAXU[3]
   6574             = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
   6575          vassert(szBlg2 < 3);
   6576          IROp op = isMAX ? (isU ? opMAXU[szBlg2] : opMAXS[szBlg2])
   6577                          : (isU ? opMINU[szBlg2] : opMINS[szBlg2]);
   6578          IRTemp tN1 = newTemp(Ity_V128);
   6579          assign(tN1, getQReg128(nn));
   6580          /* If Q == 0, we're just folding lanes in the lower half of
   6581             the value.  In which case, copy the lower half of the
   6582             source into the upper half, so we can then treat it the
   6583             same as the full width case. */
   6584          IRTemp tN2 = newTemp(Ity_V128);
   6585          assign(tN2, zeroHI ? mk_CatEvenLanes64x2(tN1,tN1) : mkexpr(tN1));
   6586          IRTemp res = math_MINMAXV(tN2, op);
   6587          if (res == IRTemp_INVALID)
   6588             return False; /* means math_MINMAXV
   6589                              doesn't handle this case yet */
   6590          putQReg128(dd, mkexpr(res));
   6591          const HChar* nm = isMAX ? (isU ? "umaxv" : "smaxv")
   6592                                  : (isU ? "uminv" : "sminv");
   6593          const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
   6594          IRType laneTy = tys[szBlg2];
   6595          DIP("%s %s, %s.%s\n", nm,
   6596              nameQRegLO(dd, laneTy), nameQReg128(nn), arrSpec);
   6597          return True;
   6598       }
   6599       /* else fall through */
   6600    }
   6601    /* ------------ UMULL (vector) ------------ */
   6602    /* 31  28    23 21 20 15     9 4
   6603       001 01110 sz 1  m  110000 n d UMULL Vd.Ta, Vn.Tb, Vm.Tb
   6604 
   6605    */
   6606    if (INSN(31,24) == BITS8(0,0,1,0,1,1,1,0) && INSN(23,22) != BITS2(1,1)
   6607        && INSN(21,21) == 1 && INSN(15,10) == BITS6(1,1,0,0,0,0)) {
   6608       UInt mm = INSN(20,16);
   6609       UInt nn = INSN(9,5);
   6610       UInt dd = INSN(4,0);
   6611       UInt sz = INSN(23,22);
   6612 
   6613       const HChar* nameTa[3] = { "8h", "4s", "2d" };
   6614       const HChar* nameTb[3] = { "8b", "4h", "2s" };
   6615       const IROp ops[3] = { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2 };
   6616 
   6617       putQReg128(dd, binop(ops[sz], getQRegLO(nn, Ity_I64), getQRegLO(mm, Ity_I64)));
   6618 
   6619       DIP("umull %s.%s, %s.%s, %s.%s\n", nameQReg128(dd), nameTa[sz],
   6620           nameQReg128(nn), nameTb[sz], nameQReg128(mm), nameTb[sz]);
   6621       return True;
   6622    }
   6623 
   6624 
   6625    /* ------------ {AND,BIC,ORR,ORN} (vector) ------------ */
   6626    /* 31  28    23  20 15     9 4
   6627       0q0 01110 001 m  000111 n d  AND Vd.T, Vn.T, Vm.T
   6628       0q0 01110 011 m  000111 n d  BIC Vd.T, Vn.T, Vm.T
   6629       0q0 01110 101 m  000111 n d  ORR Vd.T, Vn.T, Vm.T
   6630       0q0 01110 111 m  000111 n d  ORN Vd.T, Vn.T, Vm.T
   6631       T is 16b when q==1, 8b when q==0
   6632    */
   6633    if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,1,0)
   6634        && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
   6635       Bool   isQ    = INSN(30,30) == 1;
   6636       Bool   isORR  = INSN(23,23) == 1;
   6637       Bool   invert = INSN(22,22) == 1;
   6638       UInt   mm     = INSN(20,16);
   6639       UInt   nn     = INSN(9,5);
   6640       UInt   dd     = INSN(4,0);
   6641       IRTemp res    = newTemp(Ity_V128);
   6642       assign(res, binop(isORR ? Iop_OrV128 : Iop_AndV128,
   6643                         getQReg128(nn),
   6644                         invert ? unop(Iop_NotV128, getQReg128(mm))
   6645                                : getQReg128(mm)));
   6646       putQReg128(dd, isQ ? mkexpr(res)
   6647                          : unop(Iop_ZeroHI64ofV128, mkexpr(res)));
   6648       const HChar* names[4] = { "and", "bic", "orr", "orn" };
   6649       const HChar* ar = isQ ? "16b" : "8b";
   6650       DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)],
   6651           nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar);
   6652       return True;
   6653    }
   6654 
   6655    /* ---------- CM{EQ,HI,HS,GE,GT,TST,LE,LT} (vector) ---------- */
   6656    /* 31  28    23   21     15     9 4                          ix
   6657       0q1 01110 size 1  m   100011 n d  CMEQ  Vd.T, Vn.T, Vm.T  (1) ==
   6658       0q0 01110 size 1  m   100011 n d  CMTST Vd.T, Vn.T, Vm.T  (2) &, != 0
   6659 
   6660       0q1 01110 size 1  m   001101 n d  CMHI Vd.T, Vn.T, Vm.T   (3) >u
   6661       0q0 01110 size 1  m   001101 n d  CMGT Vd.T, Vn.T, Vm.T   (4) >s
   6662 
   6663       0q1 01110 size 1  m   001111 n d  CMHS Vd.T, Vn.T, Vm.T   (5) >=u
   6664       0q0 01110 size 1  m   001111 n d  CMGE Vd.T, Vn.T, Vm.T   (6) >=s
   6665 
   6666       0q1 01110 size 100000 100010 n d  CMGE Vd.T, Vn.T, #0     (7) >=s 0
   6667       0q0 01110 size 100000 100010 n d  CMGT Vd.T, Vn.T, #0     (8) >s 0
   6668 
   6669       0q1 01110 size 100000 100110 n d  CMLE Vd.T, Vn.T, #0     (9) <=s 0
   6670       0q0 01110 size 100000 100110 n d  CMEQ Vd.T, Vn.T, #0     (10) == 0
   6671 
   6672       0q0 01110 size 100000 101010 n d  CMLT Vd.T, Vn.T, #0     (11) <s 0
   6673    */
   6674    if (INSN(31,31) == 0
   6675        && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1) {
   6676       Bool isQ    = INSN(30,30) == 1;
   6677       UInt bit29  = INSN(29,29);
   6678       UInt szBlg2 = INSN(23,22);
   6679       UInt mm     = INSN(20,16);
   6680       UInt b1510  = INSN(15,10);
   6681       UInt nn     = INSN(9,5);
   6682       UInt dd     = INSN(4,0);
   6683       const IROp opsEQ[4]
   6684          = { Iop_CmpEQ8x16,  Iop_CmpEQ16x8,  Iop_CmpEQ32x4,  Iop_CmpEQ64x2 };
   6685       const IROp opsGTS[4]
   6686          = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
   6687       const IROp opsGTU[4]
   6688          = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
   6689       Bool zeroHI = False;
   6690       const HChar* arrSpec = "??";
   6691       Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2);
   6692       UInt ix = 0;
   6693       if (ok) {
   6694          switch (b1510) {
   6695             case BITS6(1,0,0,0,1,1): ix = bit29 ? 1 : 2; break;
   6696             case BITS6(0,0,1,1,0,1): ix = bit29 ? 3 : 4; break;
   6697             case BITS6(0,0,1,1,1,1): ix = bit29 ? 5 : 6; break;
   6698             case BITS6(1,0,0,0,1,0):
   6699                if (mm == 0) { ix = bit29 ? 7 : 8; }; break;
   6700             case BITS6(1,0,0,1,1,0):
   6701                if (mm == 0) { ix = bit29 ? 9 : 10; }; break;
   6702             case BITS6(1,0,1,0,1,0):
   6703                if (mm == 0 && bit29 == 0) { ix = 11; }; break;
   6704             default: break;
   6705          }
   6706       }
   6707       if (ix != 0) {
   6708          vassert(ok && szBlg2 < 4);
   6709          IRExpr* argL = getQReg128(nn);
   6710          IRExpr* argR = (ix <= 6) ? getQReg128(mm) : mkV128(0x0000);
   6711          IRExpr* res  = NULL;
   6712          /* Some useful identities:
   6713                x >  y   can be expressed directly
   6714                x <  y   ==   y > x
   6715                x <= y   ==   not (x > y)
   6716                x >= y   ==   not (y > x)
   6717          */
   6718          switch (ix) {
   6719             case 1: res = binop(opsEQ[szBlg2], argL, argR); break;
   6720             case 2: res = unop(Iop_NotV128, binop(opsEQ[szBlg2],
   6721                                             binop(Iop_AndV128, argL, argR),
   6722                                                   mkV128(0x0000)));
   6723                     break;
   6724             case 3: res = binop(opsGTU[szBlg2], argL, argR); break;
   6725             case 4: res = binop(opsGTS[szBlg2], argL, argR); break;
   6726             case 5: res = unop(Iop_NotV128, binop(opsGTU[szBlg2], argR, argL));
   6727                     break;
   6728             case 6: res = unop(Iop_NotV128, binop(opsGTS[szBlg2], argR, argL));
   6729                     break;
   6730             case 7: res = unop(Iop_NotV128, binop(opsGTS[szBlg2], argR, argL));
   6731                     break;
   6732             case 8: res = binop(opsGTS[szBlg2], argL, argR); break;
   6733             case 9: res = unop(Iop_NotV128,
   6734                                binop(opsGTS[szBlg2], argL, argR));
   6735                     break;
   6736             case 10: res = binop(opsEQ[szBlg2],  argL, argR); break;
   6737             case 11: res = binop(opsGTS[szBlg2], argR, argL); break;
   6738             default: vassert(0);
   6739          }
   6740          vassert(res);
   6741          putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, res) : res);
   6742          const HChar* nms[11] = { "eq", "tst", "hi", "gt", "hs", "ge",
   6743                                   "ge", "gt", "le", "eq", "lt" };
   6744          if (ix <= 6) {
   6745             DIP("cm%s %s.%s, %s.%s, %s.%s\n", nms[ix-1],
   6746                 nameQReg128(dd), arrSpec,
   6747                 nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
   6748          } else {
   6749             DIP("cm%s %s.%s, %s.%s, #0\n", nms[ix-1],
   6750                 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
   6751          }
   6752          return True;
   6753       }
   6754       /* else fall through */
   6755    }
   6756 
   6757    /* -------------- {EOR,BSL,BIT,BIF} (vector) -------------- */
   6758    /* 31  28    23   20 15     9 4
   6759       0q1 01110 00 1 m  000111 n d  EOR Vd.T, Vm.T, Vn.T
   6760       0q1 01110 01 1 m  000111 n d  BSL Vd.T, Vm.T, Vn.T
   6761       0q1 01110 10 1 m  000111 n d  BIT Vd.T, Vm.T, Vn.T
   6762       0q1 01110 11 1 m  000111 n d  BIF Vd.T, Vm.T, Vn.T
   6763    */
   6764    if (INSN(31,31) == 0 && INSN(29,24) == BITS6(1,0,1,1,1,0)
   6765        && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
   6766       Bool   isQ  = INSN(30,30) == 1;
   6767       UInt   op   = INSN(23,22);
   6768       UInt   mm   = INSN(20,16);
   6769       UInt   nn   = INSN(9,5);
   6770       UInt   dd   = INSN(4,0);
   6771       IRTemp argD = newTemp(Ity_V128);
   6772       IRTemp argN = newTemp(Ity_V128);
   6773       IRTemp argM = newTemp(Ity_V128);
   6774       assign(argD, getQReg128(dd));
   6775       assign(argN, getQReg128(nn));
   6776       assign(argM, getQReg128(mm));
   6777       const IROp opXOR = Iop_XorV128;
   6778       const IROp opAND = Iop_AndV128;
   6779       const IROp opNOT = Iop_NotV128;
   6780       IRExpr* res = NULL;
   6781       switch (op) {
   6782          case BITS2(0,0): /* EOR */
   6783             res = binop(opXOR, mkexpr(argM), mkexpr(argN));
   6784             break;
   6785          case BITS2(0,1): /* BSL */
   6786             res = binop(opXOR, mkexpr(argM),
   6787                                binop(opAND,
   6788                                      binop(opXOR, mkexpr(argM), mkexpr(argN)),
   6789                                      mkexpr(argD)));
   6790             break;
   6791          case BITS2(1,0): /* BIT */
   6792             res = binop(opXOR, mkexpr(argD),
   6793                                binop(opAND,
   6794                                      binop(opXOR, mkexpr(argD), mkexpr(argN)),
   6795                                      mkexpr(argM)));
   6796             break;
   6797          case BITS2(1,1): /* BIF */
   6798             res = binop(opXOR, mkexpr(argD),
   6799                                binop(opAND,
   6800                                      binop(opXOR, mkexpr(argD), mkexpr(argN)),
   6801                                      unop(opNOT, mkexpr(argM))));
   6802             break;
   6803          default:
   6804             vassert(0);
   6805       }
   6806       vassert(res);
   6807       putQReg128(dd, isQ ? res : unop(Iop_ZeroHI64ofV128, res));
   6808       const HChar* nms[4] = { "eor", "bsl", "bit", "bif" };
   6809       const HChar* arr = isQ ? "16b" : "8b";
   6810       vassert(op < 4);
   6811       DIP("%s %s.%s, %s.%s, %s.%s\n", nms[op],
   6812           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   6813       return True;
   6814    }
   6815 
   6816    /* ------------ USHR (scalar, immediate) ------------ */
   6817    /* 31  28     22   18   15     9 4
   6818       011 111110 immh immb 000001 n d  USHR Vd, Vn, #shift
   6819    */
   6820    if (INSN(31,23) == BITS9(0,1,1, 1,1,1,1,1,0)
   6821        && INSN(15,10) == BITS6(0,0,0,0,0,1)) {
   6822       UInt immh = INSN(22,19);
   6823       UInt immb = INSN(18,16);
   6824       UInt nn   = INSN(9,5);
   6825       UInt dd   = INSN(4,0);
   6826 
   6827       UInt szBlg2 = 0;
   6828       UInt shift  = 0;
   6829       Bool ok     = getLaneInfo_IMMH_IMMB(&shift, &szBlg2, immh, immb);
   6830 
   6831       if (szBlg2 == 3) {
   6832          putQRegHI64(dd, mkU64(0x0));
   6833          putQRegLO(dd, binop(Iop_Shr64, getQRegLO(nn, Ity_I64), mkU8(shift)));
   6834          DIP("ushr %s, %s\n", nameQRegLO(dd, Ity_I64), nameQRegLO(nn, Ity_I64));
   6835          return True;
   6836       }
   6837    }
   6838    /* ------------ {USHR,SSHR,SHL} (vector, immediate) ------------ */
   6839    /* 31  28     22   18   15     9 4
   6840       0q1 011110 immh immb 000001 n d  USHR Vd.T, Vn.T, #shift (1)
   6841       0q1 011110 immh immb 010001 n d  SRI  Vd.T, Vn.T, #shift (1)
   6842       0q0 011110 immh immb 000001 n d  SSHR Vd.T, Vn.T, #shift (2)
   6843       0q0 011110 immh immb 010101 n d  SHL  Vd.T, Vn.T, #shift (3)
   6844       0q1 011110 immh immb 010101 n d  SLI  Vd.T, Vn.T, #shift (3)
   6845       laneTy, shift = case immh:immb of
   6846                          0001:xxx -> B, SHR:8-xxx,    SHL:xxx
   6847                          001x:xxx -> H, SHR:16-xxxx   SHL:xxxx
   6848                          01xx:xxx -> S, SHR:32-xxxxx  SHL:xxxxx
   6849                          1xxx:xxx -> D, SHR:64-xxxxxx SHL:xxxxxx
   6850                          other    -> invalid
   6851       As usual the case laneTy==D && q==0 is not allowed.
   6852    */
   6853    if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,1,0)
   6854        && INSN(10,10) == 1) {
   6855       UInt ix = 0;
   6856       /**/ if (INSN(29,29) == 1 && INSN(15,11) == BITS5(0,0,0,0,0)) ix = 1;
   6857       else if (INSN(29,29) == 1 && INSN(15,11) == BITS5(0,1,0,0,0)) ix = 1;
   6858       else if (INSN(29,29) == 0 && INSN(15,11) == BITS5(0,0,0,0,0)) ix = 2;
   6859       else if (                    INSN(15,11) == BITS5(0,1,0,1,0)) ix = 3;
   6860       if (ix > 0) {
   6861          Bool isQ  = INSN(30,30) == 1;
   6862          UInt immh = INSN(22,19);
   6863          UInt immb = INSN(18,16);
   6864          UInt nn   = INSN(9,5);
   6865          UInt dd   = INSN(4,0);
   6866          Bool isInsert = (ix == 3 && INSN(29,29) == 1)
   6867                          || (INSN(29,29) == 1 && INSN(15,11) == BITS5(0,1,0,0,0));
   6868 
   6869          const IROp opsSHRN[4]
   6870             = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
   6871          const IROp opsSARN[4]
   6872             = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
   6873          const IROp opsSHLN[4]
   6874             = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
   6875          UInt szBlg2 = 0;
   6876          UInt shift  = 0;
   6877          Bool ok     = getLaneInfo_IMMH_IMMB(&shift, &szBlg2, immh, immb);
   6878          if (ix == 3) {
   6879             /* The shift encoding has opposite sign for the leftwards
   6880                case.  Adjust shift to compensate. */
   6881             shift = (8 << szBlg2) - shift;
   6882          }
   6883          if (ok && szBlg2 < 4 && shift >= 0 && shift <= (8 << szBlg2)
   6884              && !(szBlg2 == 3/*64bit*/ && !isQ)) {
   6885             IROp op = Iop_INVALID;
   6886             const HChar* nm = NULL;
   6887             switch (ix) {
   6888                case 1: op = opsSHRN[szBlg2]; nm = isInsert ? "sri" : "ushr"; break;
   6889                case 2: op = opsSARN[szBlg2]; nm = "sshr"; break;
   6890                case 3: op = opsSHLN[szBlg2]; nm = isInsert ? "sli" : "shl";  break;
   6891                default: vassert(0);
   6892             }
   6893             IRTemp mask = newTemp(Ity_V128);
   6894             IRTemp res;
   6895             IRTemp candidate  = newTemp(Ity_V128);
   6896 
   6897             assign(candidate, binop(op, getQReg128(nn), mkU8(shift)));
   6898 
   6899             if (isInsert) {
   6900               assign(mask, binop(op,
   6901                                  binop(Iop_64HLtoV128,
   6902                                        mkU64(0xFFFFFFFFFFFFFFFFULL),
   6903                                        mkU64(0xFFFFFFFFFFFFFFFFULL)),
   6904                                  mkU8(shift)));
   6905               res = newTemp(Ity_V128);
   6906 
   6907               assign(res, binop(Iop_OrV128,
   6908                                 binop(Iop_AndV128,
   6909                                       unop(Iop_NotV128, mkexpr(mask)),
   6910                                       getQReg128(dd)),
   6911                                 mkexpr(candidate)));
   6912             } else {
   6913                res = candidate;
   6914             }
   6915 
   6916             putQReg128(dd, isQ ? mkexpr(res) : unop(Iop_ZeroHI64ofV128, mkexpr(res)));
   6917             HChar laneCh = "bhsd"[szBlg2];
   6918             UInt  nLanes = (isQ ? 128 : 64) / (8 << szBlg2);
   6919             DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
   6920                 nameQReg128(dd), nLanes, laneCh,
   6921                 nameQReg128(nn), nLanes, laneCh, shift);
   6922             return True;
   6923          }
   6924          /* else fall through */
   6925       }
   6926    }
   6927 
   6928    /* -------------------- SHRN{,2} -------------------- */
   6929    /* 31  28     22   18   15     9 4
   6930       0q0 011110 immh immb 100001 n d  SHRN  Vd.Tb, Vn.Ta, #sh
   6931 
   6932       where Ta,Tb,sh
   6933         = case immh of 1xxx -> invalid
   6934                        01xx -> 2d, 2s(q0)/4s(q1),  64 - immh:immb (0..31)
   6935                        001x -> 4s, 4h(q0)/8h(q1),  32 - immh:immb (0..15)
   6936                        0001 -> 8h, 8b(q0)/16b(q1),  8 - immh:immb  (0..7)
   6937                        0000 -> AdvSIMD modified immediate (???)
   6938    */
   6939 
   6940    if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,1,0)
   6941        && INSN(15,10) == BITS6(1,0,0,0,0,1)) {
   6942       Bool isQ = INSN(30,30) == 1;
   6943       UInt immh  = INSN(22,19);
   6944       UInt immb  = INSN(18,16);
   6945       UInt nn    = INSN(9,5);
   6946       UInt dd    = INSN(4,0);
   6947       IRTemp  src  = newTemp(Ity_V128);
   6948       IRTemp  zero = newTemp(Ity_V128);
   6949       IRExpr* res  = NULL;
   6950       const HChar* ta = "??";
   6951       const HChar* tb = "??";
   6952 
   6953       UInt szBlg2 = 0;
   6954       UInt shift  = 0;
   6955       Bool ok     = getLaneInfo_IMMH_IMMB(&shift, &szBlg2, immh, immb);
   6956 
   6957       if (ok && shift >= 0 && szBlg2 < 3 && shift <= (8 << szBlg2)) {
   6958          const IROp opsSHR[3] = { Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
   6959          const HChar* tas[3] = { "8h", "4s", "2d" };
   6960          const HChar* tbs_q0[3] = { "8b", "4h", "2s" };
   6961          const HChar* tbs_q1[3] = { "16b", "8h", "4s" };
   6962          assign(src, binop(opsSHR[szBlg2], getQReg128(nn), mkU8(shift)));
   6963          assign(zero, mkV128(0x0000));
   6964          switch(szBlg2) {
   6965             case 0:
   6966                res = mk_CatEvenLanes8x16(zero, src);
   6967                break;
   6968             case 1:
   6969                res = mk_CatEvenLanes16x8(zero, src);
   6970                break;
   6971             case 2:
   6972                res = mk_CatEvenLanes32x4(zero, src);
   6973                break;
   6974             default:
   6975                break;
   6976          }
   6977 
   6978          if (res != NULL) {
   6979             if (isQ) {
   6980                putQRegHI64(dd, unop(Iop_V128to64, res));
   6981             } else {
   6982                putQReg128(dd, res);
   6983             }
   6984             DIP("shrn%s %s.%s, %s.%s, #%d\n",
   6985                 isQ ? "2" : "", nameQReg128(dd), isQ ? tbs_q1[szBlg2] : tbs_q0[szBlg2],
   6986                 nameQReg128(nn), tas[szBlg2], shift);
   6987             return True;
   6988          }
   6989       }
   6990    }
   6991 
   6992    /* -------------------- {U,S}SHLL{,2} -------------------- */
   6993    /* 31  28     22   18   15     9 4
   6994       0q0 011110 immh immb 101001 n d  SSHLL Vd.Ta, Vn.Tb, #sh
   6995       0q1 011110 immh immb 101001 n d  USHLL Vd.Ta, Vn.Tb, #sh
   6996       where Ta,Tb,sh
   6997         = case immh of 1xxx -> invalid
   6998                        01xx -> 2d, 2s(q0)/4s(q1),  immh:immb - 32 (0..31)
   6999                        001x -> 4s, 4h(q0)/8h(q1),  immh:immb - 16 (0..15)
   7000                        0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8  (0..7)
   7001                        0000 -> AdvSIMD modified immediate (???)
   7002    */
   7003    if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,1,0)
   7004        && INSN(15,10) == BITS6(1,0,1,0,0,1)) {
   7005       Bool isQ   = INSN(30,30) == 1;
   7006       Bool isU   = INSN(29,29) == 1;
   7007       UInt immh  = INSN(22,19);
   7008       UInt immb  = INSN(18,16);
   7009       UInt nn    = INSN(9,5);
   7010       UInt dd    = INSN(4,0);
   7011       UInt immhb = (immh << 3) | immb;
   7012       IRTemp  src  = newTemp(Ity_V128);
   7013       IRTemp  zero = newTemp(Ity_V128);
   7014       IRExpr* res  = NULL;
   7015       UInt    sh   = 0;
   7016       const HChar* ta = "??";
   7017       const HChar* tb = "??";
   7018       assign(src, getQReg128(nn));
   7019       assign(zero, mkV128(0x0000));
   7020       if (immh & 8) {
   7021          /* invalid; don't assign to res */
   7022       }
   7023       else if (immh & 4) {
   7024          sh = immhb - 32;
   7025          vassert(sh < 32); /* so 32-sh is 1..32 */
   7026          ta = "2d";
   7027          tb = isQ ? "4s" : "2s";
   7028          IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
   7029                            : mk_InterleaveLO32x4(src, zero);
   7030          res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
   7031       }
   7032       else if (immh & 2) {
   7033          sh = immhb - 16;
   7034          vassert(sh < 16); /* so 16-sh is 1..16 */
   7035          ta = "4s";
   7036          tb = isQ ? "8h" : "4h";
   7037          IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero)
   7038                            : mk_InterleaveLO16x8(src, zero);
   7039          res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
   7040       }
   7041       else if (immh & 1) {
   7042          sh = immhb - 8;
   7043          vassert(sh < 8); /* so 8-sh is 1..8 */
   7044          ta = "8h";
   7045          tb = isQ ? "16b" : "8b";
   7046          IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
   7047                            : mk_InterleaveLO8x16(src, zero);
   7048          res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
   7049       } else {
   7050          vassert(immh == 0);
   7051          /* invalid; don't assign to res */
   7052       }
   7053       /* */
   7054       if (res) {
   7055          putQReg128(dd, res);
   7056          DIP("%cshll%s %s.%s, %s.%s, #%d\n",
   7057              isU ? 'u' : 's', isQ ? "2" : "",
   7058              nameQReg128(dd), ta, nameQReg128(nn), tb, sh);
   7059          return True;
   7060       }
   7061       /* else fall through */
   7062    }
   7063 
   7064    /* -------------------- XTN{,2} -------------------- */
   7065    /* 31  28    23   21     15     9 4  XTN{,2} Vd.Tb, Vn.Ta
   7066       0q0 01110 size 100001 001010 n d
   7067    */
   7068    if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,1,0)
   7069        && INSN(21,16) == BITS6(1,0,0,0,0,1)
   7070        && INSN(15,10) == BITS6(0,0,1,0,1,0)) {
   7071       Bool isQ  = INSN(30,30) == 1;
   7072       UInt size = INSN(23,22);
   7073       UInt nn   = INSN(9,5);
   7074       UInt dd   = INSN(4,0);
   7075       IROp op   = Iop_INVALID;
   7076       const HChar* tb = NULL;
   7077       const HChar* ta = NULL;
   7078       switch ((size << 1) | (isQ ? 1 : 0)) {
   7079          case 0: tb = "8b";  ta = "8h"; op = Iop_NarrowUn16to8x8;  break;
   7080          case 1: tb = "16b"; ta = "8h"; op = Iop_NarrowUn16to8x8;  break;
   7081          case 2: tb = "4h";  ta = "4s"; op = Iop_NarrowUn32to16x4; break;
   7082          case 3: tb = "8h";  ta = "4s"; op = Iop_NarrowUn32to16x4; break;
   7083          case 4: tb = "2s";  ta = "2d"; op = Iop_NarrowUn64to32x2; break;
   7084          case 5: tb = "4s";  ta = "2d"; op = Iop_NarrowUn64to32x2; break;
   7085          case 6: break;
   7086          case 7: break;
   7087          default: vassert(0);
   7088       }
   7089       if (op != Iop_INVALID) {
   7090          if (!isQ) {
   7091             putQRegLane(dd, 1, mkU64(0));
   7092          }
   7093          putQRegLane(dd, isQ ? 1 : 0, unop(op, getQReg128(nn)));
   7094          DIP("xtn%s %s.%s, %s.%s\n", isQ ? "2" : "",
   7095              nameQReg128(dd), tb, nameQReg128(nn), ta);
   7096          return True;
   7097       }
   7098       /* else fall through */
   7099    }
   7100 
   7101    /* ---------------- CNT (vector) ---------------- */
   7102    /* 31 29     23 21           9 4
   7103       0q 001110 00 100000010110 n d  CNT Vd.T, Vn.T
   7104    */
   7105 
   7106   if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,1,0)
   7107       && INSN(23,22) == BITS2(0,0)
   7108       && INSN(21,10) == BITS12(1,0,0,0,0,0,0,1,0,1,1,0) ) {
   7109      Bool isQ = INSN(30,30) == 1;
   7110      UInt nn  = INSN(9,5);
   7111      UInt dd  = INSN(4,0);
   7112      const HChar* name = isQ ? "16b" : "8b";
   7113 
   7114      IRExpr* res = unop(Iop_Cnt8x16, getQReg128(nn));
   7115      putQReg128(dd, isQ ? res : unop(Iop_ZeroHI64ofV128, res));
   7116 
   7117      DIP("cnt %s.%s, %s.%s\n", nameQReg128(dd), name, nameQReg128(nn), name);
   7118      return True;
   7119   }
   7120 
   7121 
   7122    /* ---------------- DUP (element, vector) ---------------- */
   7123    /* 31  28       20   15     9 4
   7124       0q0 01110000 imm5 000001 n d  DUP Vd.T, Vn.Ts[index]
   7125    */
   7126    if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
   7127        && INSN(15,10) == BITS6(0,0,0,0,0,1)) {
   7128       Bool   isQ  = INSN(30,30) == 1;
   7129       UInt   imm5 = INSN(20,16);
   7130       UInt   nn   = INSN(9,5);
   7131       UInt   dd   = INSN(4,0);
   7132       IRTemp w0   = newTemp(Ity_I64);
   7133       const HChar* arT  = "??";
   7134       const HChar* arTs = "??";
   7135       IRType laneTy = Ity_INVALID;
   7136       UInt   laneNo = 16; /* invalid */
   7137       if (imm5 & 1) {
   7138          arT    = isQ ? "16b" : "8b";
   7139          arTs   = "b";
   7140          laneNo = (imm5 >> 1) & 15;
   7141          laneTy = Ity_I8;
   7142          assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
   7143       }
   7144       else if (imm5 & 2) {
   7145          arT    = isQ ? "8h" : "4h";
   7146          arTs   = "h";
   7147          laneNo = (imm5 >> 2) & 7;
   7148          laneTy = Ity_I16;
   7149          assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
   7150       }
   7151       else if (imm5 & 4) {
   7152          arT    = isQ ? "4s" : "2s";
   7153          arTs   = "s";
   7154          laneNo = (imm5 >> 3) & 3;
   7155          laneTy = Ity_I32;
   7156          assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
   7157       }
   7158       else if ((imm5 & 8) && isQ) {
   7159          arT  = "2d";
   7160          arTs = "d";
   7161          laneNo = (imm5 >> 4) & 1;
   7162          laneTy = Ity_I64;
   7163          assign(w0, getQRegLane(nn, laneNo, laneTy));
   7164       }
   7165       else {
   7166          /* invalid; leave laneTy unchanged. */
   7167       }
   7168       /* */
   7169       if (laneTy != Ity_INVALID) {
   7170          vassert(laneNo < 16);
   7171          IRTemp w1 = math_DUP_TO_64(w0, laneTy);
   7172          putQReg128(dd, binop(Iop_64HLtoV128,
   7173                               isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
   7174          DIP("dup %s.%s, %s.%s[%u]\n",
   7175              nameQReg128(dd), arT, nameQReg128(nn), arTs, laneNo);
   7176          return True;
   7177       }
   7178       /* else fall through */
   7179    }
   7180 
   7181    /* ---------------- DUP (general, vector) ---------------- */
   7182    /* 31  28    23  20   15     9 4
   7183       0q0 01110 000 imm5 000011 n d  DUP Vd.T, Rn
   7184       Q=0 writes 64, Q=1 writes 128
   7185       imm5: xxxx1  8B(q=0)      or 16b(q=1),     R=W
   7186             xxx10  4H(q=0)      or 8H(q=1),      R=W
   7187             xx100  2S(q=0)      or 4S(q=1),      R=W
   7188             x1000  Invalid(q=0) or 2D(q=1),      R=X
   7189             x0000  Invalid(q=0) or Invalid(q=1)
   7190    */
   7191    if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
   7192        && INSN(15,10) == BITS6(0,0,0,0,1,1)) {
   7193       Bool   isQ  = INSN(30,30) == 1;
   7194       UInt   imm5 = INSN(20,16);
   7195       UInt   nn   = INSN(9,5);
   7196       UInt   dd   = INSN(4,0);
   7197       IRTemp w0   = newTemp(Ity_I64);
   7198       const HChar* arT = "??";
   7199       IRType laneTy = Ity_INVALID;
   7200       if (imm5 & 1) {
   7201          arT    = isQ ? "16b" : "8b";
   7202          laneTy = Ity_I8;
   7203          assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
   7204       }
   7205       else if (imm5 & 2) {
   7206          arT    = isQ ? "8h" : "4h";
   7207          laneTy = Ity_I16;
   7208          assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
   7209       }
   7210       else if (imm5 & 4) {
   7211          arT    = isQ ? "4s" : "2s";
   7212          laneTy = Ity_I32;
   7213          assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
   7214       }
   7215       else if ((imm5 & 8) && isQ) {
   7216          arT    = "2d";
   7217          laneTy = Ity_I64;
   7218          assign(w0, getIReg64orZR(nn));
   7219       }
   7220       else {
   7221          /* invalid; leave laneTy unchanged. */
   7222       }
   7223       /* */
   7224       if (laneTy != Ity_INVALID) {
   7225          IRTemp w1 = math_DUP_TO_64(w0, laneTy);
   7226          putQReg128(dd, binop(Iop_64HLtoV128,
   7227                               isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
   7228          DIP("dup %s.%s, %s\n",
   7229              nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
   7230          return True;
   7231       }
   7232       /* else fall through */
   7233    }
   7234 
   7235    /* --------------------- {S,U}ADDLV --------------------- */
   7236    /* 31  28    23 21           9 4
   7237       0qu 01110 sz 110000001110 n d  {U,S}ADDLV Vd, Vn.T
   7238 
   7239       sz V T(q=1/0)
   7240       -- - ----
   7241       00 h 16/8b
   7242       01 s 8/4h
   7243       10 d 4s (q can't be 0)
   7244       11 invalid
   7245    */
   7246    if (INSN(31,31) == 0 && INSN(28, 24) == BITS5(0,1,1,1,0)
   7247        && INSN(21, 10) == BITS12(1,1,0,0,0,0,0,0,1,1,1,0)) {
   7248       UInt bitQ = INSN(30,30);
   7249       UInt bitU = INSN(29,29);
   7250       UInt sz   = INSN(23,22);
   7251       UInt nn   = INSN(9,5);
   7252       UInt dd   = INSN(4,0);
   7253 
   7254       Bool valid = !((sz == BITS2(1,1)) || (bitQ == 0 && sz == BITS2(1,0)));
   7255       if (valid) {
   7256         const IRType ddTypes[3] = { Ity_I16, Ity_I32, Ity_I64 };
   7257         const HChar* suffixesQ[3] = { "16b", "8h", "4s" };
   7258         const HChar* suffixesq[3] = { "8b", "4h", "invalid" };
   7259 
   7260         IRTemp src = newTemp(Ity_V128);
   7261         IRExpr* half = mkU64(0xFFFFFFFFFFFFFFFFULL);
   7262         IRExpr* zero = mkU64(0x0);
   7263 
   7264         IRExpr* mask = binop(Iop_64HLtoV128, zero, half);
   7265         assign(src, bitQ ? getQReg128(nn) : binop(Iop_AndV128, getQReg128(nn), mask));
   7266 
   7267         IROp op;
   7268         switch (sz) {
   7269         case BITS2(0,0): op = bitU ? Iop_AddLV8Ux16 : Iop_AddLV8Sx16; break;
   7270         case BITS2(0,1): op = bitU ? Iop_AddLV16Ux8 : Iop_AddLV16Sx8; break;
   7271         case BITS2(1,0): op = bitU ? Iop_AddLV32Ux4 : Iop_AddLV32Sx4; break;
   7272         default: vassert(0);
   7273         }
   7274 
   7275         putQReg128(dd, unop(op, mkexpr(src)));
   7276 
   7277         DIP("%saddlv %s,%s.%s\n", bitU ? "u" : "s", nameQRegLO(dd, ddTypes[sz]),
   7278             nameQReg128(nn), bitQ ? suffixesQ[sz] : suffixesq[sz]);
   7279 
   7280         return True;
   7281       }
   7282       /* else fall through */
   7283    }
   7284    /* ---------------------- {S,U}MOV ---------------------- */
   7285    /* 31  28        20   15     9 4
   7286       0q0 01110 000 imm5 001111 n d  UMOV Xd/Wd, Vn.Ts[index]
   7287       0q0 01110 000 imm5 001011 n d  SMOV Xd/Wd, Vn.Ts[index]
   7288       dest is Xd when q==1, Wd when q==0
   7289       UMOV:
   7290          Ts,index,ops = case q:imm5 of
   7291                           0:xxxx1 -> B, xxxx, 8Uto64
   7292                           1:xxxx1 -> invalid
   7293                           0:xxx10 -> H, xxx,  16Uto64
   7294                           1:xxx10 -> invalid
   7295                           0:xx100 -> S, xx,   32Uto64
   7296                           1:xx100 -> invalid
   7297                           1:x1000 -> D, x,    copy64
   7298                           other   -> invalid
   7299       SMOV:
   7300          Ts,index,ops = case q:imm5 of
   7301                           0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
   7302                           1:xxxx1 -> B, xxxx, 8Sto64
   7303                           0:xxx10 -> H, xxx,  (32Uto64 . 16Sto32)
   7304                           1:xxx10 -> H, xxx,  16Sto64
   7305                           0:xx100 -> invalid
   7306                           1:xx100 -> S, xx,   32Sto64
   7307                           1:x1000 -> invalid
   7308                           other   -> invalid
   7309    */
   7310    if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
   7311        && (INSN(15,10) & BITS6(1,1,1,0,1,1)) == BITS6(0,0,1,0,1,1)) {
   7312       UInt bitQ = INSN(30,30) == 1;
   7313       UInt imm5 = INSN(20,16);
   7314       UInt nn   = INSN(9,5);
   7315       UInt dd   = INSN(4,0);
   7316       Bool isU  = INSN(12,12) == 1;
   7317       const HChar* arTs = "??";
   7318       UInt    laneNo = 16; /* invalid */
   7319       // Setting 'res' to non-NULL determines valid/invalid
   7320       IRExpr* res    = NULL;
   7321       if (!bitQ && (imm5 & 1)) { // 0:xxxx1
   7322          laneNo = (imm5 >> 1) & 15;
   7323          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
   7324          res = isU ? unop(Iop_8Uto64, lane)
   7325                    : unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
   7326          arTs = "b";
   7327       }
   7328       else if (bitQ && (imm5 & 1)) { // 1:xxxx1
   7329          laneNo = (imm5 >> 1) & 15;
   7330          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
   7331          res = isU ? NULL
   7332                    : unop(Iop_8Sto64, lane);
   7333          arTs = "b";
   7334       }
   7335       else if (!bitQ && (imm5 & 2)) { // 0:xxx10
   7336          laneNo = (imm5 >> 2) & 7;
   7337          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
   7338          res = isU ? unop(Iop_16Uto64, lane)
   7339                    : unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
   7340          arTs = "h";
   7341       }
   7342       else if (bitQ && (imm5 & 2)) { // 1:xxx10
   7343          laneNo = (imm5 >> 2) & 7;
   7344          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
   7345          res = isU ? NULL
   7346                    : unop(Iop_16Sto64, lane);
   7347          arTs = "h";
   7348       }
   7349       else if (!bitQ && (imm5 & 4)) { // 0:xx100
   7350          laneNo = (imm5 >> 3) & 3;
   7351          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
   7352          res = isU ? unop(Iop_32Uto64, lane)
   7353                    : NULL;
   7354          arTs = "s";
   7355       }
   7356       else if (bitQ && (imm5 & 4)) { // 1:xxx10
   7357          laneNo = (imm5 >> 3) & 3;
   7358          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
   7359          res = isU ? NULL
   7360                    : unop(Iop_32Sto64, lane);
   7361          arTs = "s";
   7362       }
   7363       else if (bitQ && (imm5 & 8)) { // 1:x1000
   7364          laneNo = (imm5 >> 4) & 1;
   7365          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
   7366          res = isU ? lane
   7367                    : NULL;
   7368          arTs = "d";
   7369       }
   7370       /* */
   7371       if (res) {
   7372          vassert(laneNo < 16);
   7373          putIReg64orZR(dd, res);
   7374          DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
   7375              nameIRegOrZR(bitQ == 1, dd),
   7376              nameQReg128(nn), arTs, laneNo);
   7377          return True;
   7378       }
   7379       /* else fall through */
   7380    }
   7381 
   7382    /* -------------------- INS (general) -------------------- */
   7383    /* 31  28       20   15     9 4
   7384       010 01110000 imm5 000111 n d  INS Vd.Ts[ix], Rn
   7385       where Ts,ix = case imm5 of xxxx1 -> B, xxxx
   7386                                  xxx10 -> H, xxx
   7387                                  xx100 -> S, xx
   7388                                  x1000 -> D, x
   7389    */
   7390    if (INSN(31,21) == BITS11(0,1,0,0,1,1,1,0,0,0,0)
   7391        && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
   7392       UInt    imm5   = INSN(20,16);
   7393       UInt    nn     = INSN(9,5);
   7394       UInt    dd     = INSN(4,0);
   7395       HChar   ts     = '?';
   7396       UInt    laneNo = 16;
   7397       IRExpr* src    = NULL;
   7398       if (imm5 & 1) {
   7399          src    = unop(Iop_64to8, getIReg64orZR(nn));
   7400          laneNo = (imm5 >> 1) & 15;
   7401          ts     = 'b';
   7402       }
   7403       else if (imm5 & 2) {
   7404          src    = unop(Iop_64to16, getIReg64orZR(nn));
   7405          laneNo = (imm5 >> 2) & 7;
   7406          ts     = 'h';
   7407       }
   7408       else if (imm5 & 4) {
   7409          src    = unop(Iop_64to32, getIReg64orZR(nn));
   7410          laneNo = (imm5 >> 3) & 3;
   7411          ts     = 's';
   7412       }
   7413       else if (imm5 & 8) {
   7414          src    = getIReg64orZR(nn);
   7415          laneNo = (imm5 >> 4) & 1;
   7416          ts     = 'd';
   7417       }
   7418       /* */
   7419       if (src) {
   7420          vassert(laneNo < 16);
   7421          putQRegLane(dd, laneNo, src);
   7422          DIP("ins %s.%c[%u], %s\n",
   7423              nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn));
   7424          return True;
   7425       }
   7426       /* else invalid; fall through */
   7427    }
   7428 
   7429    /* -------------------- INS (element) -------------------- */
   7430    /* 31  28       20   15 14   10 9 4
   7431       011 01110000 imm5 0  imm4 1  n d  INS Vd.Ts[ix1], Vn.Ts[ix2]
   7432 
   7433       where Ts, ix1, ix2 = case imm5 of xxxx1 -> B, imm5<4:1>, imm4<3:0>
   7434                                         xxx10 -> H, imm5<4:2>, imm4<3:1>
   7435                                         xx100 -> S, imm5<4:3>, imm4<3:2>
   7436                                         x1000 -> D, imm5<4:4>, imm4<3:3>
   7437    */
   7438    if (INSN(31,21) == BITS11(0,1,1,0,1,1,1,0,0,0,0)
   7439        && INSN(15,15) == 0 && INSN(10,10) == 1 ) {
   7440       UInt   imm5      = INSN(20,16);
   7441       UInt   imm4      = INSN(14,11);
   7442       UInt   nn        = INSN(9,5);
   7443       UInt   dd        = INSN(4,0);
   7444       HChar  ts        = '?';
   7445       IRType ty        = Ity_INVALID;
   7446       UInt   srcLaneNo = 16;
   7447       UInt   dstLaneNo = 16;
   7448 
   7449       if (imm5 & 1) {
   7450          srcLaneNo = imm4;
   7451          dstLaneNo = imm5 >> 1;
   7452          ty = Ity_I8;
   7453          ts = 'b';
   7454       } else if (imm5 & 2) {
   7455          srcLaneNo = imm4 >> 1;
   7456          dstLaneNo = imm5 >> 2;
   7457          ty = Ity_I16;
   7458          ts = 'h';
   7459       } else if (imm5 & 4) {
   7460          srcLaneNo = imm4 >> 2;
   7461          dstLaneNo = imm5 >> 3;
   7462          ty = Ity_I32;
   7463          ts = 's';
   7464       } else if (imm5 & 8) {
   7465          srcLaneNo = imm4 >> 3;
   7466          dstLaneNo = imm5 >> 4;
   7467          ty = Ity_I64;
   7468          ts = 'd';
   7469       }
   7470 
   7471       if (ty != Ity_INVALID) {
   7472          vassert(srcLaneNo < 16);
   7473          vassert(dstLaneNo < 16);
   7474          putQRegLane(dd, dstLaneNo, getQRegLane(nn, srcLaneNo, ty));
   7475          DIP("ins %s.%c[%u], %s.%c[%u]\n",
   7476              nameQReg128(dd), ts, dstLaneNo, nameQReg128(nn), ts, dstLaneNo);
   7477          return True;
   7478       }
   7479 
   7480    }
   7481 
   7482    /* -------------------- NEG (vector) -------------------- */
   7483    /* 31  28    23 21    16      9 4
   7484       0q1 01110 sz 10000 0101110 n d  NEG Vd, Vn
   7485       sz is laneSz, q:sz == 011 is disallowed, as usual
   7486    */
   7487    if (INSN(31,31) == 0 && INSN(29,24) == BITS6(1,0,1,1,1,0)
   7488        && INSN(21,10) == BITS12(1,0,0,0,0,0,1,0,1,1,1,0)) {
   7489       Bool isQ    = INSN(30,30) == 1;
   7490       UInt szBlg2 = INSN(23,22);
   7491       UInt nn     = INSN(9,5);
   7492       UInt dd     = INSN(4,0);
   7493       Bool zeroHI = False;
   7494       const HChar* arrSpec = "";
   7495       Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
   7496       if (ok) {
   7497          const IROp opSUB[4]
   7498             = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
   7499          IRTemp res = newTemp(Ity_V128);
   7500          vassert(szBlg2 < 4);
   7501          assign(res, binop(opSUB[szBlg2], mkV128(0x0000), getQReg128(nn)));
   7502          putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(res))
   7503                                : mkexpr(res));
   7504          DIP("neg %s.%s, %s.%s\n",
   7505              nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
   7506          return True;
   7507       }
   7508       /* else fall through */
   7509    }
   7510 
   7511    /* -------------------- TBL, TBX -------------------- */
   7512    /* 31  28        20 15 14  12  9 4
   7513       0q0 01110 000 m  0  len 000 n d  TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
   7514       0q0 01110 000 m  0  len 100 n d  TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
   7515       where Ta = 16b(q=1) or 8b(q=0)
   7516    */
   7517    if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
   7518        && INSN(15,15) == 0 && INSN(11,10) == BITS2(0,0)) {
   7519       Bool isQ   = INSN(30,30) == 1;
   7520       Bool isTBX = INSN(12,12) == 1;
   7521       UInt mm    = INSN(20,16);
   7522       UInt len   = INSN(14,13);
   7523       UInt nn    = INSN(9,5);
   7524       UInt dd    = INSN(4,0);
   7525       /* The out-of-range values to use. */
   7526       IRTemp oor_values = newTemp(Ity_V128);
   7527       assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
   7528       /* src value */
   7529       IRTemp src = newTemp(Ity_V128);
   7530       assign(src, getQReg128(mm));
   7531       /* The table values */
   7532       IRTemp tab[4];
   7533       UInt   i;
   7534       for (i = 0; i <= len; i++) {
   7535          vassert(i < 4);
   7536          tab[i] = newTemp(Ity_V128);
   7537          assign(tab[i], getQReg128((nn + i) % 32));
   7538       }
   7539       IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
   7540       putQReg128(dd, isQ ? mkexpr(res)
   7541                          : unop(Iop_ZeroHI64ofV128, mkexpr(res)) );
   7542       const HChar* Ta = isQ ? "16b" : "8b";
   7543       const HChar* nm = isTBX ? "tbx" : "tbl";
   7544       DIP("%s %s.%s, {v%d.16b .. v%d.16b}, %s.%s\n",
   7545           nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
   7546       return True;
   7547    }
   7548    /* FIXME Temporary hacks to get through ld.so FIXME */
   7549 
   7550    /* ------------------ movi vD.4s, #0x0 ------------------ */
   7551    /* 0x4F 0x00 0x04 000 vD */
   7552    if ((insn & 0xFFFFFFE0) == 0x4F000400) {
   7553       UInt vD = INSN(4,0);
   7554       putQReg128(vD, mkV128(0x0000));
   7555       DIP("movi v%u.4s, #0x0\n", vD);
   7556       return True;
   7557    }
   7558 
   7559    /* ---------------- MOV vD.16b, vN.16b ---------------- */
   7560    /* 31        23  20 15     9 4
   7561       010 01110 101 m  000111 n d   ORR vD.16b, vN.16b, vM.16b
   7562       This only handles the N == M case.
   7563    */
   7564    if (INSN(31,24) == BITS8(0,1,0,0,1,1,1,0)
   7565        && INSN(23,21) == BITS3(1,0,1) && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
   7566       UInt mm = INSN(20,16);
   7567       UInt nn = INSN(9,5);
   7568       UInt dd = INSN(4,0);
   7569       if (mm == nn) {
   7570          putQReg128(dd, getQReg128(nn));
   7571          DIP("mov v%u.16b, v%u.16b\n", dd, nn);
   7572          return True;
   7573       }
   7574       /* else it's really an ORR; fall through. */
   7575    }
   7576 
   7577    /* ---------------- CMEQ_d_d_#0 ---------------- */
   7578    /*
   7579       010 11110 11 10000 0100 110 n d   CMEQ Dd, Dn, #0
   7580    */
   7581    if ((INSN(31,0) & 0xFFFFFC00) == 0x5EE09800) {
   7582       UInt nn = INSN(9,5);
   7583       UInt dd = INSN(4,0);
   7584       putQReg128(dd, unop(Iop_ZeroHI64ofV128,
   7585                           binop(Iop_CmpEQ64x2, getQReg128(nn),
   7586                                 mkV128(0x0000))));
   7587       DIP("cmeq d%u, d%u, #0\n", dd, nn);
   7588       return True;
   7589    }
   7590 
   7591    /* ---------------- SHL_d_d_#imm ---------------- */
   7592    /* 31         22 21  18 15     9 4
   7593       010 111110 1  ih3 ib 010101 n d  SHL Dd, Dn, #(ih3:ib)
   7594    */
   7595    if (INSN(31,22) == BITS10(0,1,0,1,1,1,1,1,0,1)
   7596        && INSN(15,10) == BITS6(0,1,0,1,0,1)) {
   7597       UInt nn = INSN(9,5);
   7598       UInt dd = INSN(4,0);
   7599       UInt sh = INSN(21,16);
   7600       vassert(sh < 64);
   7601       putQReg128(dd, unop(Iop_ZeroHI64ofV128,
   7602                           binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
   7603       DIP("shl d%u, d%u, #%u\n", dd, nn, sh);
   7604       return True;
   7605    }
   7606    vex_printf("ARM64 front end: simd_and_fp\n");
   7607    return False;
   7608 #  undef INSN
   7609 }
   7610 
   7611 
   7612 /*------------------------------------------------------------*/
   7613 /*--- Disassemble a single ARM64 instruction               ---*/
   7614 /*------------------------------------------------------------*/
   7615 
   7616 /* Disassemble a single ARM64 instruction into IR.  The instruction
   7617    has is located at |guest_instr| and has guest IP of
   7618    |guest_PC_curr_instr|, which will have been set before the call
   7619    here.  Returns True iff the instruction was decoded, in which case
   7620    *dres will be set accordingly, or False, in which case *dres should
   7621    be ignored by the caller. */
   7622 
   7623 static
   7624 Bool disInstr_ARM64_WRK (
   7625         /*MB_OUT*/DisResult* dres,
   7626         Bool         (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
   7627         Bool         resteerCisOk,
   7628         void*        callback_opaque,
   7629         UChar*       guest_instr,
   7630         VexArchInfo* archinfo,
   7631         VexAbiInfo*  abiinfo
   7632      )
   7633 {
   7634    // A macro to fish bits out of 'insn'.
   7635 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   7636 
   7637 //ZZ    DisResult dres;
   7638 //ZZ    UInt      insn;
   7639 //ZZ    //Bool      allow_VFP = False;
   7640 //ZZ    //UInt      hwcaps = archinfo->hwcaps;
   7641 //ZZ    IRTemp    condT; /* :: Ity_I32 */
   7642 //ZZ    UInt      summary;
   7643 //ZZ    HChar     dis_buf[128];  // big enough to hold LDMIA etc text
   7644 //ZZ
   7645 //ZZ    /* What insn variants are we supporting today? */
   7646 //ZZ    //allow_VFP  = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
   7647 //ZZ    // etc etc
   7648 
   7649    /* Set result defaults. */
   7650    dres->whatNext    = Dis_Continue;
   7651    dres->len         = 4;
   7652    dres->continueAt  = 0;
   7653    dres->jk_StopHere = Ijk_INVALID;
   7654 
   7655    /* At least this is simple on ARM64: insns are all 4 bytes long, and
   7656       4-aligned.  So just fish the whole thing out of memory right now
   7657       and have done. */
   7658    UInt insn = getUIntLittleEndianly( guest_instr );
   7659 
   7660    if (0) vex_printf("insn: 0x%x\n", insn);
   7661 
   7662    DIP("\t(arm64) 0x%llx:  ", (ULong)guest_PC_curr_instr);
   7663 
   7664    vassert(0 == (guest_PC_curr_instr & 3ULL));
   7665 
   7666    /* ----------------------------------------------------------- */
   7667 
   7668    /* Spot "Special" instructions (see comment at top of file). */
   7669    {
   7670       UChar* code = (UChar*)guest_instr;
   7671       /* Spot the 16-byte preamble:
   7672             93CC0D8C   ror x12, x12, #3
   7673             93CC358C   ror x12, x12, #13
   7674             93CCCD8C   ror x12, x12, #51
   7675             93CCF58C   ror x12, x12, #61
   7676       */
   7677       UInt word1 = 0x93CC0D8C;
   7678       UInt word2 = 0x93CC358C;
   7679       UInt word3 = 0x93CCCD8C;
   7680       UInt word4 = 0x93CCF58C;
   7681       if (getUIntLittleEndianly(code+ 0) == word1 &&
   7682           getUIntLittleEndianly(code+ 4) == word2 &&
   7683           getUIntLittleEndianly(code+ 8) == word3 &&
   7684           getUIntLittleEndianly(code+12) == word4) {
   7685          /* Got a "Special" instruction preamble.  Which one is it? */
   7686          if (getUIntLittleEndianly(code+16) == 0xAA0A014A
   7687                                                /* orr x10,x10,x10 */) {
   7688             /* X3 = client_request ( X4 ) */
   7689             DIP("x3 = client_request ( x4 )\n");
   7690             putPC(mkU64( guest_PC_curr_instr + 20 ));
   7691             dres->jk_StopHere = Ijk_ClientReq;
   7692             dres->whatNext    = Dis_StopHere;
   7693             return True;
   7694          }
   7695          else
   7696          if (getUIntLittleEndianly(code+16) == 0xAA0B016B
   7697                                                /* orr x11,x11,x11 */) {
   7698             /* X3 = guest_NRADDR */
   7699             DIP("x3 = guest_NRADDR\n");
   7700             dres->len = 20;
   7701             putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
   7702             return True;
   7703          }
   7704          else
   7705          if (getUIntLittleEndianly(code+16) == 0xAA0C018C
   7706                                                /* orr x12,x12,x12 */) {
   7707             /*  branch-and-link-to-noredir X8 */
   7708             DIP("branch-and-link-to-noredir x8\n");
   7709             putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
   7710             putPC(getIReg64orZR(8));
   7711             dres->jk_StopHere = Ijk_NoRedir;
   7712             dres->whatNext    = Dis_StopHere;
   7713             return True;
   7714          }
   7715          else
   7716          if (getUIntLittleEndianly(code+16) == 0xAA090129
   7717                                                /* orr x9,x9,x9 */) {
   7718             /* IR injection */
   7719             DIP("IR injection\n");
   7720             vex_inject_ir(irsb, Iend_LE);
   7721             // Invalidate the current insn. The reason is that the IRop we're
   7722             // injecting here can change. In which case the translation has to
   7723             // be redone. For ease of handling, we simply invalidate all the
   7724             // time.
   7725             stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr)));
   7726             stmt(IRStmt_Put(OFFB_CMLEN,   mkU64(20)));
   7727             putPC(mkU64( guest_PC_curr_instr + 20 ));
   7728             dres->whatNext    = Dis_StopHere;
   7729             dres->jk_StopHere = Ijk_InvalICache;
   7730             return True;
   7731          }
   7732          /* We don't know what it is. */
   7733          return False;
   7734          /*NOTREACHED*/
   7735       }
   7736    }
   7737 
   7738    /* ----------------------------------------------------------- */
   7739 
   7740    /* Main ARM64 instruction decoder starts here. */
   7741 
   7742    Bool ok = False;
   7743 
   7744    /* insn[28:25] determines the top-level grouping, so let's start
   7745       off with that.
   7746 
   7747       For all of these dis_ARM64_ functions, we pass *dres with the
   7748       normal default results "insn OK, 4 bytes long, keep decoding" so
   7749       they don't need to change it.  However, decodes of control-flow
   7750       insns may cause *dres to change.
   7751    */
   7752    switch (INSN(28,25)) {
   7753       case BITS4(1,0,0,0): case BITS4(1,0,0,1):
   7754          // Data processing - immediate
   7755          ok = dis_ARM64_data_processing_immediate(dres, insn);
   7756          break;
   7757       case BITS4(1,0,1,0): case BITS4(1,0,1,1):
   7758          // Branch, exception generation and system instructions
   7759          ok = dis_ARM64_branch_etc(dres, insn, archinfo);
   7760          break;
   7761       case BITS4(0,1,0,0): case BITS4(0,1,1,0):
   7762       case BITS4(1,1,0,0): case BITS4(1,1,1,0):
   7763          // Loads and stores
   7764          ok = dis_ARM64_load_store(dres, insn);
   7765          break;
   7766       case BITS4(0,1,0,1): case BITS4(1,1,0,1):
   7767          // Data processing - register
   7768          ok = dis_ARM64_data_processing_register(dres, insn);
   7769          break;
   7770       case BITS4(0,1,1,1): case BITS4(1,1,1,1):
   7771          // Data processing - SIMD and floating point
   7772          ok = dis_ARM64_simd_and_fp(dres, insn);
   7773          break;
   7774       case BITS4(0,0,0,0): case BITS4(0,0,0,1):
   7775       case BITS4(0,0,1,0): case BITS4(0,0,1,1):
   7776          // UNALLOCATED
   7777          break;
   7778       default:
   7779          vassert(0); /* Can't happen */
   7780    }
   7781 
   7782    /* If the next-level down decoders failed, make sure |dres| didn't
   7783       get changed. */
   7784    if (!ok) {
   7785       vassert(dres->whatNext    == Dis_Continue);
   7786       vassert(dres->len         == 4);
   7787       vassert(dres->continueAt  == 0);
   7788       vassert(dres->jk_StopHere == Ijk_INVALID);
   7789    }
   7790 
   7791    return ok;
   7792 
   7793 #  undef INSN
   7794 }
   7795 
   7796 
   7797 /*------------------------------------------------------------*/
   7798 /*--- Top-level fn                                         ---*/
   7799 /*------------------------------------------------------------*/
   7800 
   7801 /* Disassemble a single instruction into IR.  The instruction
   7802    is located in host memory at &guest_code[delta]. */
   7803 
   7804 DisResult disInstr_ARM64 ( IRSB*        irsb_IN,
   7805                            Bool         (*resteerOkFn) ( void*, Addr64 ),
   7806                            Bool         resteerCisOk,
   7807                            void*        callback_opaque,
   7808                            UChar*       guest_code_IN,
   7809                            Long         delta_IN,
   7810                            Addr64       guest_IP,
   7811                            VexArch      guest_arch,
   7812                            VexArchInfo* archinfo,
   7813                            VexAbiInfo*  abiinfo,
   7814                            Bool         host_bigendian_IN,
   7815                            Bool         sigill_diag_IN )
   7816 {
   7817    DisResult dres;
   7818    vex_bzero(&dres, sizeof(dres));
   7819 
   7820    /* Set globals (see top of this file) */
   7821    vassert(guest_arch == VexArchARM64);
   7822 
   7823    irsb                = irsb_IN;
   7824    host_is_bigendian   = host_bigendian_IN;
   7825    guest_PC_curr_instr = (Addr64)guest_IP;
   7826 
   7827    /* Sanity checks */
   7828    /* (x::UInt - 2) <= 15   ===   x >= 2 && x <= 17 (I hope) */
   7829    vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15);
   7830    vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15);
   7831 
   7832    /* Try to decode */
   7833    Bool ok = disInstr_ARM64_WRK( &dres,
   7834                                  resteerOkFn, resteerCisOk, callback_opaque,
   7835                                  (UChar*)&guest_code_IN[delta_IN],
   7836                                  archinfo, abiinfo );
   7837    if (ok) {
   7838       /* All decode successes end up here. */
   7839       vassert(dres.len == 4 || dres.len == 20);
   7840       switch (dres.whatNext) {
   7841          case Dis_Continue:
   7842             putPC( mkU64(dres.len + guest_PC_curr_instr) );
   7843             break;
   7844          case Dis_ResteerU:
   7845          case Dis_ResteerC:
   7846             putPC(mkU64(dres.continueAt));
   7847             break;
   7848          case Dis_StopHere:
   7849             break;
   7850          default:
   7851             vassert(0);
   7852       }
   7853       DIP("\n");
   7854    } else {
   7855       /* All decode failures end up here. */
   7856       if (sigill_diag_IN) {
   7857          Int   i, j;
   7858          UChar buf[64];
   7859          UInt  insn
   7860                   = getUIntLittleEndianly( (UChar*)&guest_code_IN[delta_IN] );
   7861          vex_bzero(buf, sizeof(buf));
   7862          for (i = j = 0; i < 32; i++) {
   7863             if (i > 0) {
   7864               if ((i & 7) == 0) buf[j++] = ' ';
   7865               else if ((i & 3) == 0) buf[j++] = '\'';
   7866             }
   7867             buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
   7868          }
   7869          vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
   7870          vex_printf("disInstr(arm64): %s\n", buf);
   7871       }
   7872 
   7873       /* Tell the dispatcher that this insn cannot be decoded, and so
   7874          has not been executed, and (is currently) the next to be
   7875          executed.  PC should be up-to-date since it is made so at the
   7876          start of each insn, but nevertheless be paranoid and update
   7877          it again right now. */
   7878       putPC( mkU64(guest_PC_curr_instr) );
   7879       dres.whatNext    = Dis_StopHere;
   7880       dres.len         = 0;
   7881       dres.continueAt  = 0;
   7882       dres.jk_StopHere = Ijk_NoDecode;
   7883    }
   7884    return dres;
   7885 }
   7886 
   7887 ////////////////////////////////////////////////////////////////////////
   7888 ////////////////////////////////////////////////////////////////////////
   7889 
   7890 /* Spare code for doing reference implementations of various 128-bit
   7891    SIMD interleaves/deinterleaves/concatenation ops.  For 64-bit
   7892    equivalents see the end of guest_arm_toIR.c. */
   7893 
   7894 ////////////////////////////////////////////////////////////////
   7895 // 64x2 operations
   7896 //
   7897 static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 )
   7898 {
   7899   // returns a0 b0
   7900   return binop(Iop_64HLtoV128, unop(Iop_V128to64, mkexpr(a10)),
   7901                                unop(Iop_V128to64, mkexpr(b10)));
   7902 }
   7903 
   7904 static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 )
   7905 {
   7906   // returns a1 b1
   7907   return binop(Iop_64HLtoV128, unop(Iop_V128HIto64, mkexpr(a10)),
   7908                                unop(Iop_V128HIto64, mkexpr(b10)));
   7909 }
   7910 
   7911 
   7912 ////////////////////////////////////////////////////////////////
   7913 // 32x4 operations
   7914 //
   7915 
   7916 // Split a 128 bit value into 4 32 bit ones, in 64-bit IRTemps with
   7917 // the top halves guaranteed to be zero.
   7918 static void breakV128to32s ( IRTemp* out3, IRTemp* out2, IRTemp* out1,
   7919                              IRTemp* out0, IRTemp v128 )
   7920 {
   7921   if (out3) *out3 = newTemp(Ity_I64);
   7922   if (out2) *out2 = newTemp(Ity_I64);
   7923   if (out1) *out1 = newTemp(Ity_I64);
   7924   if (out0) *out0 = newTemp(Ity_I64);
   7925   IRTemp hi64 = newTemp(Ity_I64);
   7926   IRTemp lo64 = newTemp(Ity_I64);
   7927   assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) );
   7928   assign(lo64, unop(Iop_V128to64,   mkexpr(v128)) );
   7929   if (out3) assign(*out3, binop(Iop_Shr64, mkexpr(hi64), mkU8(32)));
   7930   if (out2) assign(*out2, binop(Iop_And64, mkexpr(hi64), mkU64(0xFFFFFFFF)));
   7931   if (out1) assign(*out1, binop(Iop_Shr64, mkexpr(lo64), mkU8(32)));
   7932   if (out0) assign(*out0, binop(Iop_And64, mkexpr(lo64), mkU64(0xFFFFFFFF)));
   7933 }
   7934 
   7935 // Make a V128 bit value from 4 32 bit ones, each of which is in a 64 bit
   7936 // IRTemp.
   7937 static IRTemp mkV128from32s ( IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
   7938 {
   7939   IRTemp hi64 = newTemp(Ity_I64);
   7940   IRTemp lo64 = newTemp(Ity_I64);
   7941   assign(hi64,
   7942          binop(Iop_Or64,
   7943                binop(Iop_Shl64, mkexpr(in3), mkU8(32)),
   7944                binop(Iop_And64, mkexpr(in2), mkU64(0xFFFFFFFF))));
   7945   assign(lo64,
   7946          binop(Iop_Or64,
   7947                binop(Iop_Shl64, mkexpr(in1), mkU8(32)),
   7948                binop(Iop_And64, mkexpr(in0), mkU64(0xFFFFFFFF))));
   7949   IRTemp res = newTemp(Ity_V128);
   7950   assign(res, binop(Iop_64HLtoV128, mkexpr(hi64), mkexpr(lo64)));
   7951   return res;
   7952 }
   7953 
   7954 static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 )
   7955 {
   7956   // returns a2 a0 b2 b0
   7957   IRTemp a2, a0, b2, b0;
   7958   breakV128to32s(NULL, &a2, NULL, &a0, a3210);
   7959   breakV128to32s(NULL, &b2, NULL, &b0, b3210);
   7960   return mkexpr(mkV128from32s(a2, a0, b2, b0));
   7961 }
   7962 
   7963 static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 )
   7964 {
   7965   // returns a3 a1 b3 b1
   7966   IRTemp a3, a1, b3, b1;
   7967   breakV128to32s(&a3, NULL, &a1, NULL, a3210);
   7968   breakV128to32s(&b3, NULL, &b1, NULL, b3210);
   7969   return mkexpr(mkV128from32s(a3, a1, b3, b1));
   7970 }
   7971 
   7972 static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 )
   7973 {
   7974   // returns a1 b1 a0 b0
   7975   IRTemp a1, a0, b1, b0;
   7976   breakV128to32s(NULL, NULL, &a1, &a0, a3210);
   7977   breakV128to32s(NULL, NULL, &b1, &b0, b3210);
   7978   return mkexpr(mkV128from32s(a1, b1, a0, b0));
   7979 }
   7980 
   7981 static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 )
   7982 {
   7983   // returns a3 b3 a2 b2
   7984   IRTemp a3, a2, b3, b2;
   7985   breakV128to32s(&a3, &a2, NULL, NULL, a3210);
   7986   breakV128to32s(&b3, &b2, NULL, NULL, b3210);
   7987   return mkexpr(mkV128from32s(a3, b3, a2, b2));
   7988 }
   7989 
   7990 ////////////////////////////////////////////////////////////////
   7991 // 16x8 operations
   7992 //
   7993 
   7994 static void breakV128to16s ( IRTemp* out7, IRTemp* out6, IRTemp* out5,
   7995                              IRTemp* out4, IRTemp* out3, IRTemp* out2,
   7996                              IRTemp* out1,IRTemp* out0, IRTemp v128 )
   7997 {
   7998   if (out7) *out7 = newTemp(Ity_I64);
   7999   if (out6) *out6 = newTemp(Ity_I64);
   8000   if (out5) *out5 = newTemp(Ity_I64);
   8001   if (out4) *out4 = newTemp(Ity_I64);
   8002   if (out3) *out3 = newTemp(Ity_I64);
   8003   if (out2) *out2 = newTemp(Ity_I64);
   8004   if (out1) *out1 = newTemp(Ity_I64);
   8005   if (out0) *out0 = newTemp(Ity_I64);
   8006   IRTemp hi64 = newTemp(Ity_I64);
   8007   IRTemp lo64 = newTemp(Ity_I64);
   8008   assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) );
   8009   assign(lo64, unop(Iop_V128to64,   mkexpr(v128)) );
   8010   if (out7)
   8011     assign(*out7, binop(Iop_And64,
   8012                         binop(Iop_Shr64, mkexpr(hi64), mkU8(48)),
   8013                         mkU64(0xFFFF)));
   8014   if (out6)
   8015     assign(*out6, binop(Iop_And64,
   8016                         binop(Iop_Shr64, mkexpr(hi64), mkU8(32)),
   8017                         mkU64(0xFFFF)));
   8018   if (out5)
   8019     assign(*out5, binop(Iop_And64,
   8020                         binop(Iop_Shr64, mkexpr(hi64), mkU8(16)),
   8021                         mkU64(0xFFFF)));
   8022   if (out4)
   8023     assign(*out4, binop(Iop_And64, mkexpr(hi64), mkU64(0xFFFF)));
   8024   if (out3)
   8025     assign(*out3, binop(Iop_And64,
   8026                         binop(Iop_Shr64, mkexpr(lo64), mkU8(48)),
   8027                         mkU64(0xFFFF)));
   8028   if (out2)
   8029     assign(*out2, binop(Iop_And64,
   8030                         binop(Iop_Shr64, mkexpr(lo64), mkU8(32)),
   8031                         mkU64(0xFFFF)));
   8032   if (out1)
   8033     assign(*out1, binop(Iop_And64,
   8034                         binop(Iop_Shr64, mkexpr(lo64), mkU8(16)),
   8035                         mkU64(0xFFFF)));
   8036   if (out0)
   8037     assign(*out0, binop(Iop_And64, mkexpr(lo64), mkU64(0xFFFF)));
   8038 }
   8039 
   8040 static IRTemp mkV128from16s ( IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4,
   8041                               IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
   8042 {
   8043   IRTemp hi64 = newTemp(Ity_I64);
   8044   IRTemp lo64 = newTemp(Ity_I64);
   8045   assign(hi64,
   8046          binop(Iop_Or64,
   8047                binop(Iop_Or64,
   8048                      binop(Iop_Shl64,
   8049                            binop(Iop_And64, mkexpr(in7), mkU64(0xFFFF)),
   8050                            mkU8(48)),
   8051                      binop(Iop_Shl64,
   8052                            binop(Iop_And64, mkexpr(in6), mkU64(0xFFFF)),
   8053                            mkU8(32))),
   8054                binop(Iop_Or64,
   8055                      binop(Iop_Shl64,
   8056                            binop(Iop_And64, mkexpr(in5), mkU64(0xFFFF)),
   8057                            mkU8(16)),
   8058                      binop(Iop_And64,
   8059                            mkexpr(in4), mkU64(0xFFFF)))));
   8060   assign(lo64,
   8061          binop(Iop_Or64,
   8062                binop(Iop_Or64,
   8063                      binop(Iop_Shl64,
   8064                            binop(Iop_And64, mkexpr(in3), mkU64(0xFFFF)),
   8065                            mkU8(48)),
   8066                      binop(Iop_Shl64,
   8067                            binop(Iop_And64, mkexpr(in2), mkU64(0xFFFF)),
   8068                            mkU8(32))),
   8069                binop(Iop_Or64,
   8070                      binop(Iop_Shl64,
   8071                            binop(Iop_And64, mkexpr(in1), mkU64(0xFFFF)),
   8072                            mkU8(16)),
   8073                      binop(Iop_And64,
   8074                            mkexpr(in0), mkU64(0xFFFF)))));
   8075   IRTemp res = newTemp(Ity_V128);
   8076   assign(res, binop(Iop_64HLtoV128, mkexpr(hi64), mkexpr(lo64)));
   8077   return res;
   8078 }
   8079 
   8080 static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 )
   8081 {
   8082   // returns a6 a4 a2 a0 b6 b4 b2 b0
   8083   IRTemp a6, a4, a2, a0, b6, b4, b2, b0;
   8084   breakV128to16s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
   8085   breakV128to16s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
   8086   return mkexpr(mkV128from16s(a6, a4, a2, a0, b6, b4, b2, b0));
   8087 }
   8088 
   8089 static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 )
   8090 {
   8091   // returns a7 a5 a3 a1 b7 b5 b3 b1
   8092   IRTemp a7, a5, a3, a1, b7, b5, b3, b1;
   8093   breakV128to16s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
   8094   breakV128to16s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
   8095   return mkexpr(mkV128from16s(a7, a5, a3, a1, b7, b5, b3, b1));
   8096 }
   8097 
   8098 static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 )
   8099 {
   8100   // returns a3 b3 a2 b2 a1 b1 a0 b0
   8101   IRTemp a3, b3, a2, b2, a1, a0, b1, b0;
   8102   breakV128to16s(NULL, NULL, NULL, NULL, &a3, &a2, &a1, &a0, a76543210);
   8103   breakV128to16s(NULL, NULL, NULL, NULL, &b3, &b2, &b1, &b0, b76543210);
   8104   return mkexpr(mkV128from16s(a3, b3, a2, b2, a1, b1, a0, b0));
   8105 }
   8106 
   8107 static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 )
   8108 {
   8109   // returns a7 b7 a6 b6 a5 b5 a4 b4
   8110   IRTemp a7, b7, a6, b6, a5, b5, a4, b4;
   8111   breakV128to16s(&a7, &a6, &a5, &a4, NULL, NULL, NULL, NULL, a76543210);
   8112   breakV128to16s(&b7, &b6, &b5, &b4, NULL, NULL, NULL, NULL, b76543210);
   8113   return mkexpr(mkV128from16s(a7, b7, a6, b6, a5, b5, a4, b4));
   8114 }
   8115 
   8116 ////////////////////////////////////////////////////////////////
   8117 // 8x16 operations
   8118 //
   8119 
   8120 static void breakV128to8s ( IRTemp* outF, IRTemp* outE, IRTemp* outD,
   8121                             IRTemp* outC, IRTemp* outB, IRTemp* outA,
   8122                             IRTemp* out9, IRTemp* out8,
   8123                             IRTemp* out7, IRTemp* out6, IRTemp* out5,
   8124                             IRTemp* out4, IRTemp* out3, IRTemp* out2,
   8125                             IRTemp* out1,IRTemp* out0, IRTemp v128 )
   8126 {
   8127   if (outF) *outF = newTemp(Ity_I64);
   8128   if (outE) *outE = newTemp(Ity_I64);
   8129   if (outD) *outD = newTemp(Ity_I64);
   8130   if (outC) *outC = newTemp(Ity_I64);
   8131   if (outB) *outB = newTemp(Ity_I64);
   8132   if (outA) *outA = newTemp(Ity_I64);
   8133   if (out9) *out9 = newTemp(Ity_I64);
   8134   if (out8) *out8 = newTemp(Ity_I64);
   8135   if (out7) *out7 = newTemp(Ity_I64);
   8136   if (out6) *out6 = newTemp(Ity_I64);
   8137   if (out5) *out5 = newTemp(Ity_I64);
   8138   if (out4) *out4 = newTemp(Ity_I64);
   8139   if (out3) *out3 = newTemp(Ity_I64);
   8140   if (out2) *out2 = newTemp(Ity_I64);
   8141   if (out1) *out1 = newTemp(Ity_I64);
   8142   if (out0) *out0 = newTemp(Ity_I64);
   8143   IRTemp hi64 = newTemp(Ity_I64);
   8144   IRTemp lo64 = newTemp(Ity_I64);
   8145   assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) );
   8146   assign(lo64, unop(Iop_V128to64,   mkexpr(v128)) );
   8147   if (outF)
   8148     assign(*outF, binop(Iop_And64,
   8149                         binop(Iop_Shr64, mkexpr(hi64), mkU8(56)),
   8150                         mkU64(0xFF)));
   8151   if (outE)
   8152     assign(*outE, binop(Iop_And64,
   8153                         binop(Iop_Shr64, mkexpr(hi64), mkU8(48)),
   8154                         mkU64(0xFF)));
   8155   if (outD)
   8156     assign(*outD, binop(Iop_And64,
   8157                         binop(Iop_Shr64, mkexpr(hi64), mkU8(40)),
   8158                         mkU64(0xFF)));
   8159   if (outC)
   8160     assign(*outC, binop(Iop_And64,
   8161                         binop(Iop_Shr64, mkexpr(hi64), mkU8(32)),
   8162                         mkU64(0xFF)));
   8163   if (outB)
   8164     assign(*outB, binop(Iop_And64,
   8165                         binop(Iop_Shr64, mkexpr(hi64), mkU8(24)),
   8166                         mkU64(0xFF)));
   8167   if (outA)
   8168     assign(*outA, binop(Iop_And64,
   8169                         binop(Iop_Shr64, mkexpr(hi64), mkU8(16)),
   8170                         mkU64(0xFF)));
   8171   if (out9)
   8172     assign(*out9, binop(Iop_And64,
   8173                         binop(Iop_Shr64, mkexpr(hi64), mkU8(8)),
   8174                         mkU64(0xFF)));
   8175   if (out8)
   8176     assign(*out8, binop(Iop_And64,
   8177                         binop(Iop_Shr64, mkexpr(hi64), mkU8(0)),
   8178                         mkU64(0xFF)));
   8179   if (out7)
   8180     assign(*out7, binop(Iop_And64,
   8181                         binop(Iop_Shr64, mkexpr(lo64), mkU8(56)),
   8182                         mkU64(0xFF)));
   8183   if (out6)
   8184     assign(*out6, binop(Iop_And64,
   8185                         binop(Iop_Shr64, mkexpr(lo64), mkU8(48)),
   8186                         mkU64(0xFF)));
   8187   if (out5)
   8188     assign(*out5, binop(Iop_And64,
   8189                         binop(Iop_Shr64, mkexpr(lo64), mkU8(40)),
   8190                         mkU64(0xFF)));
   8191   if (out4)
   8192     assign(*out4, binop(Iop_And64,
   8193                         binop(Iop_Shr64, mkexpr(lo64), mkU8(32)),
   8194                         mkU64(0xFF)));
   8195   if (out3)
   8196     assign(*out3, binop(Iop_And64,
   8197                         binop(Iop_Shr64, mkexpr(lo64), mkU8(24)),
   8198                         mkU64(0xFF)));
   8199   if (out2)
   8200     assign(*out2, binop(Iop_And64,
   8201                         binop(Iop_Shr64, mkexpr(lo64), mkU8(16)),
   8202                         mkU64(0xFF)));
   8203   if (out1)
   8204     assign(*out1, binop(Iop_And64,
   8205                         binop(Iop_Shr64, mkexpr(lo64), mkU8(8)),
   8206                         mkU64(0xFF)));
   8207   if (out0)
   8208     assign(*out0, binop(Iop_And64,
   8209                         binop(Iop_Shr64, mkexpr(lo64), mkU8(0)),
   8210                         mkU64(0xFF)));
   8211 }
   8212 
   8213 static IRTemp mkV128from8s ( IRTemp inF, IRTemp inE, IRTemp inD, IRTemp inC,
   8214                              IRTemp inB, IRTemp inA, IRTemp in9, IRTemp in8,
   8215                              IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4,
   8216                              IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
   8217 {
   8218   IRTemp vFE = newTemp(Ity_I64);
   8219   IRTemp vDC = newTemp(Ity_I64);
   8220   IRTemp vBA = newTemp(Ity_I64);
   8221   IRTemp v98 = newTemp(Ity_I64);
   8222   IRTemp v76 = newTemp(Ity_I64);
   8223   IRTemp v54 = newTemp(Ity_I64);
   8224   IRTemp v32 = newTemp(Ity_I64);
   8225   IRTemp v10 = newTemp(Ity_I64);
   8226   assign(vFE, binop(Iop_Or64,
   8227                     binop(Iop_Shl64,
   8228                           binop(Iop_And64, mkexpr(inF), mkU64(0xFF)), mkU8(8)),
   8229                     binop(Iop_And64, mkexpr(inE), mkU64(0xFF))));
   8230   assign(vDC, binop(Iop_Or64,
   8231                     binop(Iop_Shl64,
   8232                           binop(Iop_And64, mkexpr(inD), mkU64(0xFF)), mkU8(8)),
   8233                     binop(Iop_And64, mkexpr(inC), mkU64(0xFF))));
   8234   assign(vBA, binop(Iop_Or64,
   8235                     binop(Iop_Shl64,
   8236                           binop(Iop_And64, mkexpr(inB), mkU64(0xFF)), mkU8(8)),
   8237                     binop(Iop_And64, mkexpr(inA), mkU64(0xFF))));
   8238   assign(v98, binop(Iop_Or64,
   8239                     binop(Iop_Shl64,
   8240                           binop(Iop_And64, mkexpr(in9), mkU64(0xFF)), mkU8(8)),
   8241                     binop(Iop_And64, mkexpr(in8), mkU64(0xFF))));
   8242   assign(v76, binop(Iop_Or64,
   8243                     binop(Iop_Shl64,
   8244                           binop(Iop_And64, mkexpr(in7), mkU64(0xFF)), mkU8(8)),
   8245                     binop(Iop_And64, mkexpr(in6), mkU64(0xFF))));
   8246   assign(v54, binop(Iop_Or64,
   8247                     binop(Iop_Shl64,
   8248                           binop(Iop_And64, mkexpr(in5), mkU64(0xFF)), mkU8(8)),
   8249                     binop(Iop_And64, mkexpr(in4), mkU64(0xFF))));
   8250   assign(v32, binop(Iop_Or64,
   8251                     binop(Iop_Shl64,
   8252                           binop(Iop_And64, mkexpr(in3), mkU64(0xFF)), mkU8(8)),
   8253                     binop(Iop_And64, mkexpr(in2), mkU64(0xFF))));
   8254   assign(v10, binop(Iop_Or64,
   8255                     binop(Iop_Shl64,
   8256                           binop(Iop_And64, mkexpr(in1), mkU64(0xFF)), mkU8(8)),
   8257                     binop(Iop_And64, mkexpr(in0), mkU64(0xFF))));
   8258   return mkV128from16s(vFE, vDC, vBA, v98, v76, v54, v32, v10);
   8259 }
   8260 
   8261 static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210,
   8262                                      IRTemp bFEDCBA9876543210 )
   8263 {
   8264   // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
   8265   IRTemp aE, aC, aA, a8, a6, a4, a2, a0, bE, bC, bA, b8, b6, b4, b2, b0;
   8266   breakV128to8s(NULL, &aE, NULL, &aC, NULL, &aA, NULL, &a8,
   8267                 NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0,
   8268                 aFEDCBA9876543210);
   8269   breakV128to8s(NULL, &bE, NULL, &bC, NULL, &bA, NULL, &b8,
   8270                 NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0,
   8271                 bFEDCBA9876543210);
   8272   return mkexpr(mkV128from8s(aE, aC, aA, a8, a6, a4, a2, a0,
   8273                              bE, bC, bA, b8, b6, b4, b2, b0));
   8274 }
   8275 
   8276 static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210,
   8277                                     IRTemp bFEDCBA9876543210 )
   8278 {
   8279   // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
   8280   IRTemp aF, aD, aB, a9, a7, a5, a3, a1, bF, bD, bB, b9, b7, b5, b3, b1;
   8281   breakV128to8s(&aF, NULL, &aD, NULL, &aB, NULL, &a9, NULL,
   8282                 &a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL,
   8283                 aFEDCBA9876543210);
   8284 
   8285   breakV128to8s(&bF, NULL, &bD, NULL, &bB, NULL, &b9, NULL,
   8286                 &b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL,
   8287                 aFEDCBA9876543210);
   8288 
   8289   return mkexpr(mkV128from8s(aF, aD, aB, a9, a7, a5, a3, a1,
   8290                              bF, bD, bB, b9, b7, b5, b3, b1));
   8291 }
   8292 
   8293 static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210,
   8294                                      IRTemp bFEDCBA9876543210 )
   8295 {
   8296   // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
   8297   IRTemp a7, b7, a6, b6, a5, b5, a4, b4, a3, b3, a2, b2, a1, b1, a0, b0;
   8298   breakV128to8s(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
   8299                 &a7,  &a6,  &a5,  &a4,  &a3,  &a2,  &a1,  &a0,
   8300                 aFEDCBA9876543210);
   8301   breakV128to8s(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
   8302                 &b7,  &b6,  &b5,  &b4,  &b3,  &b2,  &b1,  &b0,
   8303                 bFEDCBA9876543210);
   8304   return mkexpr(mkV128from8s(a7, b7, a6, b6, a5, b5, a4, b4,
   8305                              a3, b3, a2, b2, a1, b1, a0, b0));
   8306 }
   8307 
   8308 static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210,
   8309                                      IRTemp bFEDCBA9876543210 )
   8310 {
   8311   // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
   8312   IRTemp aF, bF, aE, bE, aD, bD, aC, bC, aB, bB, aA, bA, a9, b9, a8, b8;
   8313   breakV128to8s(&aF,  &aE,  &aD,  &aC,  &aB,  &aA,  &a9,  &a8,
   8314                 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
   8315                 aFEDCBA9876543210);
   8316   breakV128to8s(&bF,  &bE,  &bD,  &bC,  &bB,  &bA,  &b9,  &b8,
   8317                 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
   8318                 bFEDCBA9876543210);
   8319   return mkexpr(mkV128from8s(aF, bF, aE, bE, aD, bD, aC, bC,
   8320                              aB, bB, aA, bA, a9, b9, a8, b8));
   8321 }
   8322 
   8323 /*--------------------------------------------------------------------*/
   8324 /*--- end                                       guest_arm64_toIR.c ---*/
   8325 /*--------------------------------------------------------------------*/
   8326