Home | History | Annotate | Download | only in priv
      1 /* -*- mode: C; c-basic-offset: 3; -*- */
      2 
      3 /*--------------------------------------------------------------------*/
      4 /*--- begin                                     guest_arm64_toIR.c ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Valgrind, a dynamic binary instrumentation
      9    framework.
     10 
     11    Copyright (C) 2013-2013 OpenWorks
     12       info (at) open-works.net
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     27    02110-1301, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 */
     31 
     32 /* KNOWN LIMITATIONS 2014-Nov-16
     33 
     34    * Correctness: FMAXNM, FMINNM are implemented the same as FMAX/FMIN.
     35 
     36      Also FP comparison "unordered" .. is implemented as normal FP
     37      comparison.
     38 
     39      Both should be fixed.  They behave incorrectly in the presence of
     40      NaNs.
     41 
     42      FMULX is treated the same as FMUL.  That's also not correct.
     43 
     44    * Floating multiply-add (etc) insns.  Are split into a multiply and
     45      an add, and so suffer double rounding and hence sometimes the
     46      least significant mantissa bit is incorrect.  Fix: use the IR
     47      multiply-add IROps instead.
     48 
     49    * FRINTA, FRINTN are kludged .. they just round to nearest.  No special
     50      handling for the "ties" case.  FRINTX might be dubious too.
     51 
     52    * Ditto FCVTXN.  No idea what "round to odd" means.  This implementation
     53      just rounds to nearest.
     54 */
     55 
     56 /* "Special" instructions.
     57 
     58    This instruction decoder can decode four special instructions
     59    which mean nothing natively (are no-ops as far as regs/mem are
     60    concerned) but have meaning for supporting Valgrind.  A special
     61    instruction is flagged by a 16-byte preamble:
     62 
     63       93CC0D8C 93CC358C 93CCCD8C 93CCF58C
     64       (ror x12, x12, #3;   ror x12, x12, #13
     65        ror x12, x12, #51;  ror x12, x12, #61)
     66 
     67    Following that, one of the following 3 are allowed
     68    (standard interpretation in parentheses):
     69 
     70       AA0A014A (orr x10,x10,x10)   X3 = client_request ( X4 )
     71       AA0B016B (orr x11,x11,x11)   X3 = guest_NRADDR
     72       AA0C018C (orr x12,x12,x12)   branch-and-link-to-noredir X8
     73       AA090129 (orr x9,x9,x9)      IR injection
     74 
     75    Any other bytes following the 16-byte preamble are illegal and
     76    constitute a failure in instruction decoding.  This all assumes
     77    that the preamble will never occur except in specific code
     78    fragments designed for Valgrind to catch.
     79 */
     80 
     81 /* Translates ARM64 code to IR. */
     82 
     83 #include "libvex_basictypes.h"
     84 #include "libvex_ir.h"
     85 #include "libvex.h"
     86 #include "libvex_guest_arm64.h"
     87 
     88 #include "main_util.h"
     89 #include "main_globals.h"
     90 #include "guest_generic_bb_to_IR.h"
     91 #include "guest_arm64_defs.h"
     92 
     93 
     94 /*------------------------------------------------------------*/
     95 /*--- Globals                                              ---*/
     96 /*------------------------------------------------------------*/
     97 
     98 /* These are set at the start of the translation of a instruction, so
     99    that we don't have to pass them around endlessly.  CONST means does
    100    not change during translation of the instruction.
    101 */
    102 
    103 /* CONST: what is the host's endianness?  We need to know this in
    104    order to do sub-register accesses to the SIMD/FP registers
    105    correctly. */
    106 static VexEndness host_endness;
    107 
    108 /* CONST: The guest address for the instruction currently being
    109    translated.  */
    110 static Addr64 guest_PC_curr_instr;
    111 
    112 /* MOD: The IRSB* into which we're generating code. */
    113 static IRSB* irsb;
    114 
    115 
    116 /*------------------------------------------------------------*/
    117 /*--- Debugging output                                     ---*/
    118 /*------------------------------------------------------------*/
    119 
    120 #define DIP(format, args...)           \
    121    if (vex_traceflags & VEX_TRACE_FE)  \
    122       vex_printf(format, ## args)
    123 
    124 #define DIS(buf, format, args...)      \
    125    if (vex_traceflags & VEX_TRACE_FE)  \
    126       vex_sprintf(buf, format, ## args)
    127 
    128 
    129 /*------------------------------------------------------------*/
    130 /*--- Helper bits and pieces for deconstructing the        ---*/
    131 /*--- arm insn stream.                                     ---*/
    132 /*------------------------------------------------------------*/
    133 
    134 /* Do a little-endian load of a 32-bit word, regardless of the
    135    endianness of the underlying host. */
    136 static inline UInt getUIntLittleEndianly ( const UChar* p )
    137 {
    138    UInt w = 0;
    139    w = (w << 8) | p[3];
    140    w = (w << 8) | p[2];
    141    w = (w << 8) | p[1];
    142    w = (w << 8) | p[0];
    143    return w;
    144 }
    145 
    146 /* Sign extend a N-bit value up to 64 bits, by copying
    147    bit N-1 into all higher positions. */
    148 static ULong sx_to_64 ( ULong x, UInt n )
    149 {
    150    vassert(n > 1 && n < 64);
    151    Long r = (Long)x;
    152    r = (r << (64-n)) >> (64-n);
    153    return (ULong)r;
    154 }
    155 
    156 //ZZ /* Do a little-endian load of a 16-bit word, regardless of the
    157 //ZZ    endianness of the underlying host. */
    158 //ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
    159 //ZZ {
    160 //ZZ    UShort w = 0;
    161 //ZZ    w = (w << 8) | p[1];
    162 //ZZ    w = (w << 8) | p[0];
    163 //ZZ    return w;
    164 //ZZ }
    165 //ZZ
    166 //ZZ static UInt ROR32 ( UInt x, UInt sh ) {
    167 //ZZ    vassert(sh >= 0 && sh < 32);
    168 //ZZ    if (sh == 0)
    169 //ZZ       return x;
    170 //ZZ    else
    171 //ZZ       return (x << (32-sh)) | (x >> sh);
    172 //ZZ }
    173 //ZZ
    174 //ZZ static Int popcount32 ( UInt x )
    175 //ZZ {
    176 //ZZ    Int res = 0, i;
    177 //ZZ    for (i = 0; i < 32; i++) {
    178 //ZZ       res += (x & 1);
    179 //ZZ       x >>= 1;
    180 //ZZ    }
    181 //ZZ    return res;
    182 //ZZ }
    183 //ZZ
    184 //ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
    185 //ZZ {
    186 //ZZ    UInt mask = 1 << ix;
    187 //ZZ    x &= ~mask;
    188 //ZZ    x |= ((b << ix) & mask);
    189 //ZZ    return x;
    190 //ZZ }
    191 
    192 #define BITS2(_b1,_b0)  \
    193    (((_b1) << 1) | (_b0))
    194 
    195 #define BITS3(_b2,_b1,_b0)  \
    196   (((_b2) << 2) | ((_b1) << 1) | (_b0))
    197 
    198 #define BITS4(_b3,_b2,_b1,_b0)  \
    199    (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
    200 
    201 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    202    ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4)  \
    203     | BITS4((_b3),(_b2),(_b1),(_b0)))
    204 
    205 #define BITS5(_b4,_b3,_b2,_b1,_b0)  \
    206    (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
    207 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0)  \
    208    (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    209 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    210    (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    211 
    212 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    213    (((_b8) << 8)  \
    214     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    215 
    216 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    217    (((_b9) << 9) | ((_b8) << 8)  \
    218     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    219 
    220 #define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    221    (((_b10) << 10)  \
    222     | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
    223 
    224 #define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
    225    (((_b11) << 11)  \
    226     | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
    227 
    228 #define X00 BITS2(0,0)
    229 #define X01 BITS2(0,1)
    230 #define X10 BITS2(1,0)
    231 #define X11 BITS2(1,1)
    232 
    233 // produces _uint[_bMax:_bMin]
    234 #define SLICE_UInt(_uint,_bMax,_bMin)  \
    235    (( ((UInt)(_uint)) >> (_bMin))  \
    236     & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
    237 
    238 
    239 /*------------------------------------------------------------*/
    240 /*--- Helper bits and pieces for creating IR fragments.    ---*/
    241 /*------------------------------------------------------------*/
    242 
    243 static IRExpr* mkV128 ( UShort w )
    244 {
    245    return IRExpr_Const(IRConst_V128(w));
    246 }
    247 
    248 static IRExpr* mkU64 ( ULong i )
    249 {
    250    return IRExpr_Const(IRConst_U64(i));
    251 }
    252 
    253 static IRExpr* mkU32 ( UInt i )
    254 {
    255    return IRExpr_Const(IRConst_U32(i));
    256 }
    257 
    258 static IRExpr* mkU16 ( UInt i )
    259 {
    260    vassert(i < 65536);
    261    return IRExpr_Const(IRConst_U16(i));
    262 }
    263 
    264 static IRExpr* mkU8 ( UInt i )
    265 {
    266    vassert(i < 256);
    267    return IRExpr_Const(IRConst_U8( (UChar)i ));
    268 }
    269 
    270 static IRExpr* mkexpr ( IRTemp tmp )
    271 {
    272    return IRExpr_RdTmp(tmp);
    273 }
    274 
    275 static IRExpr* unop ( IROp op, IRExpr* a )
    276 {
    277    return IRExpr_Unop(op, a);
    278 }
    279 
    280 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
    281 {
    282    return IRExpr_Binop(op, a1, a2);
    283 }
    284 
    285 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
    286 {
    287    return IRExpr_Triop(op, a1, a2, a3);
    288 }
    289 
    290 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
    291 {
    292    return IRExpr_Load(Iend_LE, ty, addr);
    293 }
    294 
    295 /* Add a statement to the list held by "irbb". */
    296 static void stmt ( IRStmt* st )
    297 {
    298    addStmtToIRSB( irsb, st );
    299 }
    300 
    301 static void assign ( IRTemp dst, IRExpr* e )
    302 {
    303    stmt( IRStmt_WrTmp(dst, e) );
    304 }
    305 
    306 static void storeLE ( IRExpr* addr, IRExpr* data )
    307 {
    308    stmt( IRStmt_Store(Iend_LE, addr, data) );
    309 }
    310 
    311 //ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
    312 //ZZ {
    313 //ZZ    if (guardT == IRTemp_INVALID) {
    314 //ZZ       /* unconditional */
    315 //ZZ       storeLE(addr, data);
    316 //ZZ    } else {
    317 //ZZ       stmt( IRStmt_StoreG(Iend_LE, addr, data,
    318 //ZZ                           binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
    319 //ZZ    }
    320 //ZZ }
    321 //ZZ
    322 //ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
    323 //ZZ                             IRExpr* addr, IRExpr* alt,
    324 //ZZ                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
    325 //ZZ {
    326 //ZZ    if (guardT == IRTemp_INVALID) {
    327 //ZZ       /* unconditional */
    328 //ZZ       IRExpr* loaded = NULL;
    329 //ZZ       switch (cvt) {
    330 //ZZ          case ILGop_Ident32:
    331 //ZZ             loaded = loadLE(Ity_I32, addr); break;
    332 //ZZ          case ILGop_8Uto32:
    333 //ZZ             loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
    334 //ZZ          case ILGop_8Sto32:
    335 //ZZ             loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
    336 //ZZ          case ILGop_16Uto32:
    337 //ZZ             loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
    338 //ZZ          case ILGop_16Sto32:
    339 //ZZ             loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
    340 //ZZ          default:
    341 //ZZ             vassert(0);
    342 //ZZ       }
    343 //ZZ       vassert(loaded != NULL);
    344 //ZZ       assign(dst, loaded);
    345 //ZZ    } else {
    346 //ZZ       /* Generate a guarded load into 'dst', but apply 'cvt' to the
    347 //ZZ          loaded data before putting the data in 'dst'.  If the load
    348 //ZZ          does not take place, 'alt' is placed directly in 'dst'. */
    349 //ZZ       stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
    350 //ZZ                          binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
    351 //ZZ    }
    352 //ZZ }
    353 
    354 /* Generate a new temporary of the given type. */
    355 static IRTemp newTemp ( IRType ty )
    356 {
    357    vassert(isPlausibleIRType(ty));
    358    return newIRTemp( irsb->tyenv, ty );
    359 }
    360 
    361 /* This is used in many places, so the brevity is an advantage. */
    362 static IRTemp newTempV128(void)
    363 {
    364    return newTemp(Ity_V128);
    365 }
    366 
    367 /* Initialise V128 temporaries en masse. */
    368 static
    369 void newTempsV128_2(IRTemp* t1, IRTemp* t2)
    370 {
    371    vassert(t1 && *t1 == IRTemp_INVALID);
    372    vassert(t2 && *t2 == IRTemp_INVALID);
    373    *t1 = newTempV128();
    374    *t2 = newTempV128();
    375 }
    376 
    377 static
    378 void newTempsV128_3(IRTemp* t1, IRTemp* t2, IRTemp* t3)
    379 {
    380    vassert(t1 && *t1 == IRTemp_INVALID);
    381    vassert(t2 && *t2 == IRTemp_INVALID);
    382    vassert(t3 && *t3 == IRTemp_INVALID);
    383    *t1 = newTempV128();
    384    *t2 = newTempV128();
    385    *t3 = newTempV128();
    386 }
    387 
    388 static
    389 void newTempsV128_4(IRTemp* t1, IRTemp* t2, IRTemp* t3, IRTemp* t4)
    390 {
    391    vassert(t1 && *t1 == IRTemp_INVALID);
    392    vassert(t2 && *t2 == IRTemp_INVALID);
    393    vassert(t3 && *t3 == IRTemp_INVALID);
    394    vassert(t4 && *t4 == IRTemp_INVALID);
    395    *t1 = newTempV128();
    396    *t2 = newTempV128();
    397    *t3 = newTempV128();
    398    *t4 = newTempV128();
    399 }
    400 
    401 static
    402 void newTempsV128_7(IRTemp* t1, IRTemp* t2, IRTemp* t3,
    403                     IRTemp* t4, IRTemp* t5, IRTemp* t6, IRTemp* t7)
    404 {
    405    vassert(t1 && *t1 == IRTemp_INVALID);
    406    vassert(t2 && *t2 == IRTemp_INVALID);
    407    vassert(t3 && *t3 == IRTemp_INVALID);
    408    vassert(t4 && *t4 == IRTemp_INVALID);
    409    vassert(t5 && *t5 == IRTemp_INVALID);
    410    vassert(t6 && *t6 == IRTemp_INVALID);
    411    vassert(t7 && *t7 == IRTemp_INVALID);
    412    *t1 = newTempV128();
    413    *t2 = newTempV128();
    414    *t3 = newTempV128();
    415    *t4 = newTempV128();
    416    *t5 = newTempV128();
    417    *t6 = newTempV128();
    418    *t7 = newTempV128();
    419 }
    420 
    421 //ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
    422 //ZZ    IRRoundingMode. */
    423 //ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
    424 //ZZ {
    425 //ZZ    return mkU32(Irrm_NEAREST);
    426 //ZZ }
    427 //ZZ
    428 //ZZ /* Generate an expression for SRC rotated right by ROT. */
    429 //ZZ static IRExpr* genROR32( IRTemp src, Int rot )
    430 //ZZ {
    431 //ZZ    vassert(rot >= 0 && rot < 32);
    432 //ZZ    if (rot == 0)
    433 //ZZ       return mkexpr(src);
    434 //ZZ    return
    435 //ZZ       binop(Iop_Or32,
    436 //ZZ             binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
    437 //ZZ             binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
    438 //ZZ }
    439 //ZZ
    440 //ZZ static IRExpr* mkU128 ( ULong i )
    441 //ZZ {
    442 //ZZ    return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
    443 //ZZ }
    444 //ZZ
    445 //ZZ /* Generate a 4-aligned version of the given expression if
    446 //ZZ    the given condition is true.  Else return it unchanged. */
    447 //ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
    448 //ZZ {
    449 //ZZ    if (b)
    450 //ZZ       return binop(Iop_And32, e, mkU32(~3));
    451 //ZZ    else
    452 //ZZ       return e;
    453 //ZZ }
    454 
    455 /* Other IR construction helpers. */
    456 static IROp mkAND ( IRType ty ) {
    457    switch (ty) {
    458       case Ity_I32: return Iop_And32;
    459       case Ity_I64: return Iop_And64;
    460       default: vpanic("mkAND");
    461    }
    462 }
    463 
    464 static IROp mkOR ( IRType ty ) {
    465    switch (ty) {
    466       case Ity_I32: return Iop_Or32;
    467       case Ity_I64: return Iop_Or64;
    468       default: vpanic("mkOR");
    469    }
    470 }
    471 
    472 static IROp mkXOR ( IRType ty ) {
    473    switch (ty) {
    474       case Ity_I32: return Iop_Xor32;
    475       case Ity_I64: return Iop_Xor64;
    476       default: vpanic("mkXOR");
    477    }
    478 }
    479 
    480 static IROp mkSHL ( IRType ty ) {
    481    switch (ty) {
    482       case Ity_I32: return Iop_Shl32;
    483       case Ity_I64: return Iop_Shl64;
    484       default: vpanic("mkSHL");
    485    }
    486 }
    487 
    488 static IROp mkSHR ( IRType ty ) {
    489    switch (ty) {
    490       case Ity_I32: return Iop_Shr32;
    491       case Ity_I64: return Iop_Shr64;
    492       default: vpanic("mkSHR");
    493    }
    494 }
    495 
    496 static IROp mkSAR ( IRType ty ) {
    497    switch (ty) {
    498       case Ity_I32: return Iop_Sar32;
    499       case Ity_I64: return Iop_Sar64;
    500       default: vpanic("mkSAR");
    501    }
    502 }
    503 
    504 static IROp mkNOT ( IRType ty ) {
    505    switch (ty) {
    506       case Ity_I32: return Iop_Not32;
    507       case Ity_I64: return Iop_Not64;
    508       default: vpanic("mkNOT");
    509    }
    510 }
    511 
    512 static IROp mkADD ( IRType ty ) {
    513    switch (ty) {
    514       case Ity_I32: return Iop_Add32;
    515       case Ity_I64: return Iop_Add64;
    516       default: vpanic("mkADD");
    517    }
    518 }
    519 
    520 static IROp mkSUB ( IRType ty ) {
    521    switch (ty) {
    522       case Ity_I32: return Iop_Sub32;
    523       case Ity_I64: return Iop_Sub64;
    524       default: vpanic("mkSUB");
    525    }
    526 }
    527 
    528 static IROp mkADDF ( IRType ty ) {
    529    switch (ty) {
    530       case Ity_F32: return Iop_AddF32;
    531       case Ity_F64: return Iop_AddF64;
    532       default: vpanic("mkADDF");
    533    }
    534 }
    535 
    536 static IROp mkSUBF ( IRType ty ) {
    537    switch (ty) {
    538       case Ity_F32: return Iop_SubF32;
    539       case Ity_F64: return Iop_SubF64;
    540       default: vpanic("mkSUBF");
    541    }
    542 }
    543 
    544 static IROp mkMULF ( IRType ty ) {
    545    switch (ty) {
    546       case Ity_F32: return Iop_MulF32;
    547       case Ity_F64: return Iop_MulF64;
    548       default: vpanic("mkMULF");
    549    }
    550 }
    551 
    552 static IROp mkDIVF ( IRType ty ) {
    553    switch (ty) {
    554       case Ity_F32: return Iop_DivF32;
    555       case Ity_F64: return Iop_DivF64;
    556       default: vpanic("mkMULF");
    557    }
    558 }
    559 
    560 static IROp mkNEGF ( IRType ty ) {
    561    switch (ty) {
    562       case Ity_F32: return Iop_NegF32;
    563       case Ity_F64: return Iop_NegF64;
    564       default: vpanic("mkNEGF");
    565    }
    566 }
    567 
    568 static IROp mkABSF ( IRType ty ) {
    569    switch (ty) {
    570       case Ity_F32: return Iop_AbsF32;
    571       case Ity_F64: return Iop_AbsF64;
    572       default: vpanic("mkNEGF");
    573    }
    574 }
    575 
    576 static IROp mkSQRTF ( IRType ty ) {
    577    switch (ty) {
    578       case Ity_F32: return Iop_SqrtF32;
    579       case Ity_F64: return Iop_SqrtF64;
    580       default: vpanic("mkNEGF");
    581    }
    582 }
    583 
    584 static IROp mkVecADD ( UInt size ) {
    585    const IROp ops[4]
    586       = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
    587    vassert(size < 4);
    588    return ops[size];
    589 }
    590 
    591 static IROp mkVecQADDU ( UInt size ) {
    592    const IROp ops[4]
    593       = { Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2 };
    594    vassert(size < 4);
    595    return ops[size];
    596 }
    597 
    598 static IROp mkVecQADDS ( UInt size ) {
    599    const IROp ops[4]
    600       = { Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2 };
    601    vassert(size < 4);
    602    return ops[size];
    603 }
    604 
    605 static IROp mkVecQADDEXTSUSATUU ( UInt size ) {
    606    const IROp ops[4]
    607       = { Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8,
    608           Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2 };
    609    vassert(size < 4);
    610    return ops[size];
    611 }
    612 
    613 static IROp mkVecQADDEXTUSSATSS ( UInt size ) {
    614    const IROp ops[4]
    615       = { Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8,
    616           Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2 };
    617    vassert(size < 4);
    618    return ops[size];
    619 }
    620 
    621 static IROp mkVecSUB ( UInt size ) {
    622    const IROp ops[4]
    623       = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
    624    vassert(size < 4);
    625    return ops[size];
    626 }
    627 
    628 static IROp mkVecQSUBU ( UInt size ) {
    629    const IROp ops[4]
    630       = { Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2 };
    631    vassert(size < 4);
    632    return ops[size];
    633 }
    634 
    635 static IROp mkVecQSUBS ( UInt size ) {
    636    const IROp ops[4]
    637       = { Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2 };
    638    vassert(size < 4);
    639    return ops[size];
    640 }
    641 
    642 static IROp mkVecSARN ( UInt size ) {
    643    const IROp ops[4]
    644       = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
    645    vassert(size < 4);
    646    return ops[size];
    647 }
    648 
    649 static IROp mkVecSHRN ( UInt size ) {
    650    const IROp ops[4]
    651       = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
    652    vassert(size < 4);
    653    return ops[size];
    654 }
    655 
    656 static IROp mkVecSHLN ( UInt size ) {
    657    const IROp ops[4]
    658       = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
    659    vassert(size < 4);
    660    return ops[size];
    661 }
    662 
    663 static IROp mkVecCATEVENLANES ( UInt size ) {
    664    const IROp ops[4]
    665       = { Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8,
    666           Iop_CatEvenLanes32x4, Iop_InterleaveLO64x2 };
    667    vassert(size < 4);
    668    return ops[size];
    669 }
    670 
    671 static IROp mkVecCATODDLANES ( UInt size ) {
    672    const IROp ops[4]
    673       = { Iop_CatOddLanes8x16, Iop_CatOddLanes16x8,
    674           Iop_CatOddLanes32x4, Iop_InterleaveHI64x2 };
    675    vassert(size < 4);
    676    return ops[size];
    677 }
    678 
    679 static IROp mkVecINTERLEAVELO ( UInt size ) {
    680    const IROp ops[4]
    681       = { Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
    682           Iop_InterleaveLO32x4, Iop_InterleaveLO64x2 };
    683    vassert(size < 4);
    684    return ops[size];
    685 }
    686 
    687 static IROp mkVecINTERLEAVEHI ( UInt size ) {
    688    const IROp ops[4]
    689       = { Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
    690           Iop_InterleaveHI32x4, Iop_InterleaveHI64x2 };
    691    vassert(size < 4);
    692    return ops[size];
    693 }
    694 
    695 static IROp mkVecMAXU ( UInt size ) {
    696    const IROp ops[4]
    697       = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
    698    vassert(size < 4);
    699    return ops[size];
    700 }
    701 
    702 static IROp mkVecMAXS ( UInt size ) {
    703    const IROp ops[4]
    704       = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
    705    vassert(size < 4);
    706    return ops[size];
    707 }
    708 
    709 static IROp mkVecMINU ( UInt size ) {
    710    const IROp ops[4]
    711       = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
    712    vassert(size < 4);
    713    return ops[size];
    714 }
    715 
    716 static IROp mkVecMINS ( UInt size ) {
    717    const IROp ops[4]
    718       = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
    719    vassert(size < 4);
    720    return ops[size];
    721 }
    722 
    723 static IROp mkVecMUL ( UInt size ) {
    724    const IROp ops[4]
    725       = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
    726    vassert(size < 3);
    727    return ops[size];
    728 }
    729 
    730 static IROp mkVecMULLU ( UInt sizeNarrow ) {
    731    const IROp ops[4]
    732       = { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2, Iop_INVALID };
    733    vassert(sizeNarrow < 3);
    734    return ops[sizeNarrow];
    735 }
    736 
    737 static IROp mkVecMULLS ( UInt sizeNarrow ) {
    738    const IROp ops[4]
    739       = { Iop_Mull8Sx8, Iop_Mull16Sx4, Iop_Mull32Sx2, Iop_INVALID };
    740    vassert(sizeNarrow < 3);
    741    return ops[sizeNarrow];
    742 }
    743 
    744 static IROp mkVecQDMULLS ( UInt sizeNarrow ) {
    745    const IROp ops[4]
    746       = { Iop_INVALID, Iop_QDMull16Sx4, Iop_QDMull32Sx2, Iop_INVALID };
    747    vassert(sizeNarrow < 3);
    748    return ops[sizeNarrow];
    749 }
    750 
    751 static IROp mkVecCMPEQ ( UInt size ) {
    752    const IROp ops[4]
    753       = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 };
    754    vassert(size < 4);
    755    return ops[size];
    756 }
    757 
    758 static IROp mkVecCMPGTU ( UInt size ) {
    759    const IROp ops[4]
    760       = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
    761    vassert(size < 4);
    762    return ops[size];
    763 }
    764 
    765 static IROp mkVecCMPGTS ( UInt size ) {
    766    const IROp ops[4]
    767       = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
    768    vassert(size < 4);
    769    return ops[size];
    770 }
    771 
    772 static IROp mkVecABS ( UInt size ) {
    773    const IROp ops[4]
    774       = { Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2 };
    775    vassert(size < 4);
    776    return ops[size];
    777 }
    778 
    779 static IROp mkVecZEROHIxxOFV128 ( UInt size ) {
    780    const IROp ops[4]
    781       = { Iop_ZeroHI120ofV128, Iop_ZeroHI112ofV128,
    782           Iop_ZeroHI96ofV128,  Iop_ZeroHI64ofV128 };
    783    vassert(size < 4);
    784    return ops[size];
    785 }
    786 
    787 static IRExpr* mkU ( IRType ty, ULong imm ) {
    788    switch (ty) {
    789       case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
    790       case Ity_I64: return mkU64(imm);
    791       default: vpanic("mkU");
    792    }
    793 }
    794 
    795 static IROp mkVecQDMULHIS ( UInt size ) {
    796    const IROp ops[4]
    797       = { Iop_INVALID, Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, Iop_INVALID };
    798    vassert(size < 4);
    799    return ops[size];
    800 }
    801 
    802 static IROp mkVecQRDMULHIS ( UInt size ) {
    803    const IROp ops[4]
    804       = { Iop_INVALID, Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, Iop_INVALID };
    805    vassert(size < 4);
    806    return ops[size];
    807 }
    808 
    809 static IROp mkVecQANDUQSH ( UInt size ) {
    810    const IROp ops[4]
    811       = { Iop_QandUQsh8x16, Iop_QandUQsh16x8,
    812           Iop_QandUQsh32x4, Iop_QandUQsh64x2 };
    813    vassert(size < 4);
    814    return ops[size];
    815 }
    816 
    817 static IROp mkVecQANDSQSH ( UInt size ) {
    818    const IROp ops[4]
    819       = { Iop_QandSQsh8x16, Iop_QandSQsh16x8,
    820           Iop_QandSQsh32x4, Iop_QandSQsh64x2 };
    821    vassert(size < 4);
    822    return ops[size];
    823 }
    824 
    825 static IROp mkVecQANDUQRSH ( UInt size ) {
    826    const IROp ops[4]
    827       = { Iop_QandUQRsh8x16, Iop_QandUQRsh16x8,
    828           Iop_QandUQRsh32x4, Iop_QandUQRsh64x2 };
    829    vassert(size < 4);
    830    return ops[size];
    831 }
    832 
    833 static IROp mkVecQANDSQRSH ( UInt size ) {
    834    const IROp ops[4]
    835       = { Iop_QandSQRsh8x16, Iop_QandSQRsh16x8,
    836           Iop_QandSQRsh32x4, Iop_QandSQRsh64x2 };
    837    vassert(size < 4);
    838    return ops[size];
    839 }
    840 
    841 static IROp mkVecSHU ( UInt size ) {
    842    const IROp ops[4]
    843       = { Iop_Sh8Ux16, Iop_Sh16Ux8, Iop_Sh32Ux4, Iop_Sh64Ux2 };
    844    vassert(size < 4);
    845    return ops[size];
    846 }
    847 
    848 static IROp mkVecSHS ( UInt size ) {
    849    const IROp ops[4]
    850       = { Iop_Sh8Sx16, Iop_Sh16Sx8, Iop_Sh32Sx4, Iop_Sh64Sx2 };
    851    vassert(size < 4);
    852    return ops[size];
    853 }
    854 
    855 static IROp mkVecRSHU ( UInt size ) {
    856    const IROp ops[4]
    857       = { Iop_Rsh8Ux16, Iop_Rsh16Ux8, Iop_Rsh32Ux4, Iop_Rsh64Ux2 };
    858    vassert(size < 4);
    859    return ops[size];
    860 }
    861 
    862 static IROp mkVecRSHS ( UInt size ) {
    863    const IROp ops[4]
    864       = { Iop_Rsh8Sx16, Iop_Rsh16Sx8, Iop_Rsh32Sx4, Iop_Rsh64Sx2 };
    865    vassert(size < 4);
    866    return ops[size];
    867 }
    868 
    869 static IROp mkVecNARROWUN ( UInt sizeNarrow ) {
    870    const IROp ops[4]
    871       = { Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4,
    872           Iop_NarrowUn64to32x2, Iop_INVALID };
    873    vassert(sizeNarrow < 4);
    874    return ops[sizeNarrow];
    875 }
    876 
    877 static IROp mkVecQNARROWUNSU ( UInt sizeNarrow ) {
    878    const IROp ops[4]
    879       = { Iop_QNarrowUn16Sto8Ux8,  Iop_QNarrowUn32Sto16Ux4,
    880           Iop_QNarrowUn64Sto32Ux2, Iop_INVALID };
    881    vassert(sizeNarrow < 4);
    882    return ops[sizeNarrow];
    883 }
    884 
    885 static IROp mkVecQNARROWUNSS ( UInt sizeNarrow ) {
    886    const IROp ops[4]
    887       = { Iop_QNarrowUn16Sto8Sx8,  Iop_QNarrowUn32Sto16Sx4,
    888           Iop_QNarrowUn64Sto32Sx2, Iop_INVALID };
    889    vassert(sizeNarrow < 4);
    890    return ops[sizeNarrow];
    891 }
    892 
    893 static IROp mkVecQNARROWUNUU ( UInt sizeNarrow ) {
    894    const IROp ops[4]
    895       = { Iop_QNarrowUn16Uto8Ux8,  Iop_QNarrowUn32Uto16Ux4,
    896           Iop_QNarrowUn64Uto32Ux2, Iop_INVALID };
    897    vassert(sizeNarrow < 4);
    898    return ops[sizeNarrow];
    899 }
    900 
    901 static IROp mkVecQANDqshrNNARROWUU ( UInt sizeNarrow ) {
    902    const IROp ops[4]
    903       = { Iop_QandQShrNnarrow16Uto8Ux8, Iop_QandQShrNnarrow32Uto16Ux4,
    904           Iop_QandQShrNnarrow64Uto32Ux2, Iop_INVALID };
    905    vassert(sizeNarrow < 4);
    906    return ops[sizeNarrow];
    907 }
    908 
    909 static IROp mkVecQANDqsarNNARROWSS ( UInt sizeNarrow ) {
    910    const IROp ops[4]
    911       = { Iop_QandQSarNnarrow16Sto8Sx8,  Iop_QandQSarNnarrow32Sto16Sx4,
    912           Iop_QandQSarNnarrow64Sto32Sx2, Iop_INVALID };
    913    vassert(sizeNarrow < 4);
    914    return ops[sizeNarrow];
    915 }
    916 
    917 static IROp mkVecQANDqsarNNARROWSU ( UInt sizeNarrow ) {
    918    const IROp ops[4]
    919       = { Iop_QandQSarNnarrow16Sto8Ux8,  Iop_QandQSarNnarrow32Sto16Ux4,
    920           Iop_QandQSarNnarrow64Sto32Ux2, Iop_INVALID };
    921    vassert(sizeNarrow < 4);
    922    return ops[sizeNarrow];
    923 }
    924 
    925 static IROp mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow ) {
    926    const IROp ops[4]
    927       = { Iop_QandQRShrNnarrow16Uto8Ux8,  Iop_QandQRShrNnarrow32Uto16Ux4,
    928           Iop_QandQRShrNnarrow64Uto32Ux2, Iop_INVALID };
    929    vassert(sizeNarrow < 4);
    930    return ops[sizeNarrow];
    931 }
    932 
    933 static IROp mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow ) {
    934    const IROp ops[4]
    935       = { Iop_QandQRSarNnarrow16Sto8Sx8,  Iop_QandQRSarNnarrow32Sto16Sx4,
    936           Iop_QandQRSarNnarrow64Sto32Sx2, Iop_INVALID };
    937    vassert(sizeNarrow < 4);
    938    return ops[sizeNarrow];
    939 }
    940 
    941 static IROp mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow ) {
    942    const IROp ops[4]
    943       = { Iop_QandQRSarNnarrow16Sto8Ux8,  Iop_QandQRSarNnarrow32Sto16Ux4,
    944           Iop_QandQRSarNnarrow64Sto32Ux2, Iop_INVALID };
    945    vassert(sizeNarrow < 4);
    946    return ops[sizeNarrow];
    947 }
    948 
    949 static IROp mkVecQSHLNSATUU ( UInt size ) {
    950    const IROp ops[4]
    951       = { Iop_QShlNsatUU8x16, Iop_QShlNsatUU16x8,
    952           Iop_QShlNsatUU32x4, Iop_QShlNsatUU64x2 };
    953    vassert(size < 4);
    954    return ops[size];
    955 }
    956 
    957 static IROp mkVecQSHLNSATSS ( UInt size ) {
    958    const IROp ops[4]
    959       = { Iop_QShlNsatSS8x16, Iop_QShlNsatSS16x8,
    960           Iop_QShlNsatSS32x4, Iop_QShlNsatSS64x2 };
    961    vassert(size < 4);
    962    return ops[size];
    963 }
    964 
    965 static IROp mkVecQSHLNSATSU ( UInt size ) {
    966    const IROp ops[4]
    967       = { Iop_QShlNsatSU8x16, Iop_QShlNsatSU16x8,
    968           Iop_QShlNsatSU32x4, Iop_QShlNsatSU64x2 };
    969    vassert(size < 4);
    970    return ops[size];
    971 }
    972 
    973 static IROp mkVecADDF ( UInt size ) {
    974    const IROp ops[4]
    975       = { Iop_INVALID, Iop_INVALID, Iop_Add32Fx4, Iop_Add64Fx2 };
    976    vassert(size < 4);
    977    return ops[size];
    978 }
    979 
    980 static IROp mkVecMAXF ( UInt size ) {
    981    const IROp ops[4]
    982       = { Iop_INVALID, Iop_INVALID, Iop_Max32Fx4, Iop_Max64Fx2 };
    983    vassert(size < 4);
    984    return ops[size];
    985 }
    986 
    987 static IROp mkVecMINF ( UInt size ) {
    988    const IROp ops[4]
    989       = { Iop_INVALID, Iop_INVALID, Iop_Min32Fx4, Iop_Min64Fx2 };
    990    vassert(size < 4);
    991    return ops[size];
    992 }
    993 
    994 /* Generate IR to create 'arg rotated right by imm', for sane values
    995    of 'ty' and 'imm'. */
    996 static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
    997 {
    998    UInt w = 0;
    999    if (ty == Ity_I64) {
   1000       w = 64;
   1001    } else {
   1002       vassert(ty == Ity_I32);
   1003       w = 32;
   1004    }
   1005    vassert(w != 0);
   1006    vassert(imm < w);
   1007    if (imm == 0) {
   1008       return arg;
   1009    }
   1010    IRTemp res = newTemp(ty);
   1011    assign(res, binop(mkOR(ty),
   1012                      binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
   1013                      binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
   1014    return res;
   1015 }
   1016 
   1017 /* Generate IR to set the returned temp to either all-zeroes or
   1018    all ones, as a copy of arg<imm>. */
   1019 static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
   1020 {
   1021    UInt w = 0;
   1022    if (ty == Ity_I64) {
   1023       w = 64;
   1024    } else {
   1025       vassert(ty == Ity_I32);
   1026       w = 32;
   1027    }
   1028    vassert(w != 0);
   1029    vassert(imm < w);
   1030    IRTemp res = newTemp(ty);
   1031    assign(res, binop(mkSAR(ty),
   1032                      binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
   1033                      mkU8(w - 1)));
   1034    return res;
   1035 }
   1036 
   1037 /* U-widen 8/16/32/64 bit int expr to 64. */
   1038 static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
   1039 {
   1040    switch (srcTy) {
   1041       case Ity_I64: return e;
   1042       case Ity_I32: return unop(Iop_32Uto64, e);
   1043       case Ity_I16: return unop(Iop_16Uto64, e);
   1044       case Ity_I8:  return unop(Iop_8Uto64, e);
   1045       default: vpanic("widenUto64(arm64)");
   1046    }
   1047 }
   1048 
   1049 /* Narrow 64 bit int expr to 8/16/32/64.  Clearly only some
   1050    of these combinations make sense. */
   1051 static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
   1052 {
   1053    switch (dstTy) {
   1054       case Ity_I64: return e;
   1055       case Ity_I32: return unop(Iop_64to32, e);
   1056       case Ity_I16: return unop(Iop_64to16, e);
   1057       case Ity_I8:  return unop(Iop_64to8, e);
   1058       default: vpanic("narrowFrom64(arm64)");
   1059    }
   1060 }
   1061 
   1062 
   1063 /*------------------------------------------------------------*/
   1064 /*--- Helpers for accessing guest registers.               ---*/
   1065 /*------------------------------------------------------------*/
   1066 
   1067 #define OFFB_X0       offsetof(VexGuestARM64State,guest_X0)
   1068 #define OFFB_X1       offsetof(VexGuestARM64State,guest_X1)
   1069 #define OFFB_X2       offsetof(VexGuestARM64State,guest_X2)
   1070 #define OFFB_X3       offsetof(VexGuestARM64State,guest_X3)
   1071 #define OFFB_X4       offsetof(VexGuestARM64State,guest_X4)
   1072 #define OFFB_X5       offsetof(VexGuestARM64State,guest_X5)
   1073 #define OFFB_X6       offsetof(VexGuestARM64State,guest_X6)
   1074 #define OFFB_X7       offsetof(VexGuestARM64State,guest_X7)
   1075 #define OFFB_X8       offsetof(VexGuestARM64State,guest_X8)
   1076 #define OFFB_X9       offsetof(VexGuestARM64State,guest_X9)
   1077 #define OFFB_X10      offsetof(VexGuestARM64State,guest_X10)
   1078 #define OFFB_X11      offsetof(VexGuestARM64State,guest_X11)
   1079 #define OFFB_X12      offsetof(VexGuestARM64State,guest_X12)
   1080 #define OFFB_X13      offsetof(VexGuestARM64State,guest_X13)
   1081 #define OFFB_X14      offsetof(VexGuestARM64State,guest_X14)
   1082 #define OFFB_X15      offsetof(VexGuestARM64State,guest_X15)
   1083 #define OFFB_X16      offsetof(VexGuestARM64State,guest_X16)
   1084 #define OFFB_X17      offsetof(VexGuestARM64State,guest_X17)
   1085 #define OFFB_X18      offsetof(VexGuestARM64State,guest_X18)
   1086 #define OFFB_X19      offsetof(VexGuestARM64State,guest_X19)
   1087 #define OFFB_X20      offsetof(VexGuestARM64State,guest_X20)
   1088 #define OFFB_X21      offsetof(VexGuestARM64State,guest_X21)
   1089 #define OFFB_X22      offsetof(VexGuestARM64State,guest_X22)
   1090 #define OFFB_X23      offsetof(VexGuestARM64State,guest_X23)
   1091 #define OFFB_X24      offsetof(VexGuestARM64State,guest_X24)
   1092 #define OFFB_X25      offsetof(VexGuestARM64State,guest_X25)
   1093 #define OFFB_X26      offsetof(VexGuestARM64State,guest_X26)
   1094 #define OFFB_X27      offsetof(VexGuestARM64State,guest_X27)
   1095 #define OFFB_X28      offsetof(VexGuestARM64State,guest_X28)
   1096 #define OFFB_X29      offsetof(VexGuestARM64State,guest_X29)
   1097 #define OFFB_X30      offsetof(VexGuestARM64State,guest_X30)
   1098 
   1099 #define OFFB_XSP      offsetof(VexGuestARM64State,guest_XSP)
   1100 #define OFFB_PC       offsetof(VexGuestARM64State,guest_PC)
   1101 
   1102 #define OFFB_CC_OP    offsetof(VexGuestARM64State,guest_CC_OP)
   1103 #define OFFB_CC_DEP1  offsetof(VexGuestARM64State,guest_CC_DEP1)
   1104 #define OFFB_CC_DEP2  offsetof(VexGuestARM64State,guest_CC_DEP2)
   1105 #define OFFB_CC_NDEP  offsetof(VexGuestARM64State,guest_CC_NDEP)
   1106 
   1107 #define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
   1108 #define OFFB_NRADDR   offsetof(VexGuestARM64State,guest_NRADDR)
   1109 
   1110 #define OFFB_Q0       offsetof(VexGuestARM64State,guest_Q0)
   1111 #define OFFB_Q1       offsetof(VexGuestARM64State,guest_Q1)
   1112 #define OFFB_Q2       offsetof(VexGuestARM64State,guest_Q2)
   1113 #define OFFB_Q3       offsetof(VexGuestARM64State,guest_Q3)
   1114 #define OFFB_Q4       offsetof(VexGuestARM64State,guest_Q4)
   1115 #define OFFB_Q5       offsetof(VexGuestARM64State,guest_Q5)
   1116 #define OFFB_Q6       offsetof(VexGuestARM64State,guest_Q6)
   1117 #define OFFB_Q7       offsetof(VexGuestARM64State,guest_Q7)
   1118 #define OFFB_Q8       offsetof(VexGuestARM64State,guest_Q8)
   1119 #define OFFB_Q9       offsetof(VexGuestARM64State,guest_Q9)
   1120 #define OFFB_Q10      offsetof(VexGuestARM64State,guest_Q10)
   1121 #define OFFB_Q11      offsetof(VexGuestARM64State,guest_Q11)
   1122 #define OFFB_Q12      offsetof(VexGuestARM64State,guest_Q12)
   1123 #define OFFB_Q13      offsetof(VexGuestARM64State,guest_Q13)
   1124 #define OFFB_Q14      offsetof(VexGuestARM64State,guest_Q14)
   1125 #define OFFB_Q15      offsetof(VexGuestARM64State,guest_Q15)
   1126 #define OFFB_Q16      offsetof(VexGuestARM64State,guest_Q16)
   1127 #define OFFB_Q17      offsetof(VexGuestARM64State,guest_Q17)
   1128 #define OFFB_Q18      offsetof(VexGuestARM64State,guest_Q18)
   1129 #define OFFB_Q19      offsetof(VexGuestARM64State,guest_Q19)
   1130 #define OFFB_Q20      offsetof(VexGuestARM64State,guest_Q20)
   1131 #define OFFB_Q21      offsetof(VexGuestARM64State,guest_Q21)
   1132 #define OFFB_Q22      offsetof(VexGuestARM64State,guest_Q22)
   1133 #define OFFB_Q23      offsetof(VexGuestARM64State,guest_Q23)
   1134 #define OFFB_Q24      offsetof(VexGuestARM64State,guest_Q24)
   1135 #define OFFB_Q25      offsetof(VexGuestARM64State,guest_Q25)
   1136 #define OFFB_Q26      offsetof(VexGuestARM64State,guest_Q26)
   1137 #define OFFB_Q27      offsetof(VexGuestARM64State,guest_Q27)
   1138 #define OFFB_Q28      offsetof(VexGuestARM64State,guest_Q28)
   1139 #define OFFB_Q29      offsetof(VexGuestARM64State,guest_Q29)
   1140 #define OFFB_Q30      offsetof(VexGuestARM64State,guest_Q30)
   1141 #define OFFB_Q31      offsetof(VexGuestARM64State,guest_Q31)
   1142 
   1143 #define OFFB_FPCR     offsetof(VexGuestARM64State,guest_FPCR)
   1144 #define OFFB_QCFLAG   offsetof(VexGuestARM64State,guest_QCFLAG)
   1145 
   1146 #define OFFB_CMSTART  offsetof(VexGuestARM64State,guest_CMSTART)
   1147 #define OFFB_CMLEN    offsetof(VexGuestARM64State,guest_CMLEN)
   1148 
   1149 
   1150 /* ---------------- Integer registers ---------------- */
   1151 
   1152 static Int offsetIReg64 ( UInt iregNo )
   1153 {
   1154    /* Do we care about endianness here?  We do if sub-parts of integer
   1155       registers are accessed. */
   1156    switch (iregNo) {
   1157       case 0:  return OFFB_X0;
   1158       case 1:  return OFFB_X1;
   1159       case 2:  return OFFB_X2;
   1160       case 3:  return OFFB_X3;
   1161       case 4:  return OFFB_X4;
   1162       case 5:  return OFFB_X5;
   1163       case 6:  return OFFB_X6;
   1164       case 7:  return OFFB_X7;
   1165       case 8:  return OFFB_X8;
   1166       case 9:  return OFFB_X9;
   1167       case 10: return OFFB_X10;
   1168       case 11: return OFFB_X11;
   1169       case 12: return OFFB_X12;
   1170       case 13: return OFFB_X13;
   1171       case 14: return OFFB_X14;
   1172       case 15: return OFFB_X15;
   1173       case 16: return OFFB_X16;
   1174       case 17: return OFFB_X17;
   1175       case 18: return OFFB_X18;
   1176       case 19: return OFFB_X19;
   1177       case 20: return OFFB_X20;
   1178       case 21: return OFFB_X21;
   1179       case 22: return OFFB_X22;
   1180       case 23: return OFFB_X23;
   1181       case 24: return OFFB_X24;
   1182       case 25: return OFFB_X25;
   1183       case 26: return OFFB_X26;
   1184       case 27: return OFFB_X27;
   1185       case 28: return OFFB_X28;
   1186       case 29: return OFFB_X29;
   1187       case 30: return OFFB_X30;
   1188       /* but not 31 */
   1189       default: vassert(0);
   1190    }
   1191 }
   1192 
   1193 static Int offsetIReg64orSP ( UInt iregNo )
   1194 {
   1195    return iregNo == 31  ? OFFB_XSP  : offsetIReg64(iregNo);
   1196 }
   1197 
   1198 static const HChar* nameIReg64orZR ( UInt iregNo )
   1199 {
   1200    vassert(iregNo < 32);
   1201    static const HChar* names[32]
   1202       = { "x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",
   1203           "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
   1204           "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
   1205           "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
   1206    return names[iregNo];
   1207 }
   1208 
   1209 static const HChar* nameIReg64orSP ( UInt iregNo )
   1210 {
   1211    if (iregNo == 31) {
   1212       return "sp";
   1213    }
   1214    vassert(iregNo < 31);
   1215    return nameIReg64orZR(iregNo);
   1216 }
   1217 
   1218 static IRExpr* getIReg64orSP ( UInt iregNo )
   1219 {
   1220    vassert(iregNo < 32);
   1221    return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
   1222 }
   1223 
   1224 static IRExpr* getIReg64orZR ( UInt iregNo )
   1225 {
   1226    if (iregNo == 31) {
   1227       return mkU64(0);
   1228    }
   1229    vassert(iregNo < 31);
   1230    return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
   1231 }
   1232 
   1233 static void putIReg64orSP ( UInt iregNo, IRExpr* e )
   1234 {
   1235    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
   1236    stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
   1237 }
   1238 
   1239 static void putIReg64orZR ( UInt iregNo, IRExpr* e )
   1240 {
   1241    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
   1242    if (iregNo == 31) {
   1243       return;
   1244    }
   1245    vassert(iregNo < 31);
   1246    stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
   1247 }
   1248 
   1249 static const HChar* nameIReg32orZR ( UInt iregNo )
   1250 {
   1251    vassert(iregNo < 32);
   1252    static const HChar* names[32]
   1253       = { "w0",  "w1",  "w2",  "w3",  "w4",  "w5",  "w6",  "w7",
   1254           "w8",  "w9",  "w10", "w11", "w12", "w13", "w14", "w15",
   1255           "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
   1256           "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
   1257    return names[iregNo];
   1258 }
   1259 
   1260 static const HChar* nameIReg32orSP ( UInt iregNo )
   1261 {
   1262    if (iregNo == 31) {
   1263       return "wsp";
   1264    }
   1265    vassert(iregNo < 31);
   1266    return nameIReg32orZR(iregNo);
   1267 }
   1268 
   1269 static IRExpr* getIReg32orSP ( UInt iregNo )
   1270 {
   1271    vassert(iregNo < 32);
   1272    return unop(Iop_64to32,
   1273                IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
   1274 }
   1275 
   1276 static IRExpr* getIReg32orZR ( UInt iregNo )
   1277 {
   1278    if (iregNo == 31) {
   1279       return mkU32(0);
   1280    }
   1281    vassert(iregNo < 31);
   1282    return unop(Iop_64to32,
   1283                IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
   1284 }
   1285 
   1286 static void putIReg32orSP ( UInt iregNo, IRExpr* e )
   1287 {
   1288    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
   1289    stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
   1290 }
   1291 
   1292 static void putIReg32orZR ( UInt iregNo, IRExpr* e )
   1293 {
   1294    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
   1295    if (iregNo == 31) {
   1296       return;
   1297    }
   1298    vassert(iregNo < 31);
   1299    stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
   1300 }
   1301 
   1302 static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
   1303 {
   1304    vassert(is64 == True || is64 == False);
   1305    return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
   1306 }
   1307 
   1308 static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
   1309 {
   1310    vassert(is64 == True || is64 == False);
   1311    return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
   1312 }
   1313 
   1314 static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
   1315 {
   1316    vassert(is64 == True || is64 == False);
   1317    return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
   1318 }
   1319 
   1320 static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
   1321 {
   1322    vassert(is64 == True || is64 == False);
   1323    if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
   1324 }
   1325 
   1326 static void putPC ( IRExpr* e )
   1327 {
   1328    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
   1329    stmt( IRStmt_Put(OFFB_PC, e) );
   1330 }
   1331 
   1332 
   1333 /* ---------------- Vector (Q) registers ---------------- */
   1334 
   1335 static Int offsetQReg128 ( UInt qregNo )
   1336 {
   1337    /* We don't care about endianness at this point.  It only becomes
   1338       relevant when dealing with sections of these registers.*/
   1339    switch (qregNo) {
   1340       case 0:  return OFFB_Q0;
   1341       case 1:  return OFFB_Q1;
   1342       case 2:  return OFFB_Q2;
   1343       case 3:  return OFFB_Q3;
   1344       case 4:  return OFFB_Q4;
   1345       case 5:  return OFFB_Q5;
   1346       case 6:  return OFFB_Q6;
   1347       case 7:  return OFFB_Q7;
   1348       case 8:  return OFFB_Q8;
   1349       case 9:  return OFFB_Q9;
   1350       case 10: return OFFB_Q10;
   1351       case 11: return OFFB_Q11;
   1352       case 12: return OFFB_Q12;
   1353       case 13: return OFFB_Q13;
   1354       case 14: return OFFB_Q14;
   1355       case 15: return OFFB_Q15;
   1356       case 16: return OFFB_Q16;
   1357       case 17: return OFFB_Q17;
   1358       case 18: return OFFB_Q18;
   1359       case 19: return OFFB_Q19;
   1360       case 20: return OFFB_Q20;
   1361       case 21: return OFFB_Q21;
   1362       case 22: return OFFB_Q22;
   1363       case 23: return OFFB_Q23;
   1364       case 24: return OFFB_Q24;
   1365       case 25: return OFFB_Q25;
   1366       case 26: return OFFB_Q26;
   1367       case 27: return OFFB_Q27;
   1368       case 28: return OFFB_Q28;
   1369       case 29: return OFFB_Q29;
   1370       case 30: return OFFB_Q30;
   1371       case 31: return OFFB_Q31;
   1372       default: vassert(0);
   1373    }
   1374 }
   1375 
   1376 /* Write to a complete Qreg. */
   1377 static void putQReg128 ( UInt qregNo, IRExpr* e )
   1378 {
   1379    vassert(qregNo < 32);
   1380    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
   1381    stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
   1382 }
   1383 
   1384 /* Read a complete Qreg. */
   1385 static IRExpr* getQReg128 ( UInt qregNo )
   1386 {
   1387    vassert(qregNo < 32);
   1388    return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
   1389 }
   1390 
   1391 /* Produce the IR type for some sub-part of a vector.  For 32- and 64-
   1392    bit sub-parts we can choose either integer or float types, and
   1393    choose float on the basis that that is the common use case and so
   1394    will give least interference with Put-to-Get forwarding later
   1395    on. */
   1396 static IRType preferredVectorSubTypeFromSize ( UInt szB )
   1397 {
   1398    switch (szB) {
   1399       case 1:  return Ity_I8;
   1400       case 2:  return Ity_I16;
   1401       case 4:  return Ity_I32; //Ity_F32;
   1402       case 8:  return Ity_F64;
   1403       case 16: return Ity_V128;
   1404       default: vassert(0);
   1405    }
   1406 }
   1407 
   1408 /* Find the offset of the laneNo'th lane of type laneTy in the given
   1409    Qreg.  Since the host is little-endian, the least significant lane
   1410    has the lowest offset. */
   1411 static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
   1412 {
   1413    vassert(host_endness == VexEndnessLE);
   1414    Int base = offsetQReg128(qregNo);
   1415    /* Since the host is little-endian, the least significant lane
   1416       will be at the lowest address. */
   1417    /* Restrict this to known types, so as to avoid silently accepting
   1418       stupid types. */
   1419    UInt laneSzB = 0;
   1420    switch (laneTy) {
   1421       case Ity_I8:                 laneSzB = 1;  break;
   1422       case Ity_F16: case Ity_I16:  laneSzB = 2;  break;
   1423       case Ity_F32: case Ity_I32:  laneSzB = 4;  break;
   1424       case Ity_F64: case Ity_I64:  laneSzB = 8;  break;
   1425       case Ity_V128:               laneSzB = 16; break;
   1426       default: break;
   1427    }
   1428    vassert(laneSzB > 0);
   1429    UInt minOff = laneNo * laneSzB;
   1430    UInt maxOff = minOff + laneSzB - 1;
   1431    vassert(maxOff < 16);
   1432    return base + minOff;
   1433 }
   1434 
   1435 /* Put to the least significant lane of a Qreg. */
   1436 static void putQRegLO ( UInt qregNo, IRExpr* e )
   1437 {
   1438    IRType ty  = typeOfIRExpr(irsb->tyenv, e);
   1439    Int    off = offsetQRegLane(qregNo, ty, 0);
   1440    switch (ty) {
   1441       case Ity_I8:  case Ity_I16: case Ity_I32: case Ity_I64:
   1442       case Ity_F16: case Ity_F32: case Ity_F64: case Ity_V128:
   1443          break;
   1444       default:
   1445          vassert(0); // Other cases are probably invalid
   1446    }
   1447    stmt(IRStmt_Put(off, e));
   1448 }
   1449 
   1450 /* Get from the least significant lane of a Qreg. */
   1451 static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
   1452 {
   1453    Int off = offsetQRegLane(qregNo, ty, 0);
   1454    switch (ty) {
   1455       case Ity_I8:
   1456       case Ity_F16: case Ity_I16:
   1457       case Ity_I32: case Ity_I64:
   1458       case Ity_F32: case Ity_F64: case Ity_V128:
   1459          break;
   1460       default:
   1461          vassert(0); // Other cases are ATC
   1462    }
   1463    return IRExpr_Get(off, ty);
   1464 }
   1465 
   1466 static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
   1467 {
   1468    static const HChar* namesQ[32]
   1469       = { "q0",  "q1",  "q2",  "q3",  "q4",  "q5",  "q6",  "q7",
   1470           "q8",  "q9",  "q10", "q11", "q12", "q13", "q14", "q15",
   1471           "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
   1472           "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
   1473    static const HChar* namesD[32]
   1474       = { "d0",  "d1",  "d2",  "d3",  "d4",  "d5",  "d6",  "d7",
   1475           "d8",  "d9",  "d10", "d11", "d12", "d13", "d14", "d15",
   1476           "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
   1477           "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
   1478    static const HChar* namesS[32]
   1479       = { "s0",  "s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",
   1480           "s8",  "s9",  "s10", "s11", "s12", "s13", "s14", "s15",
   1481           "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
   1482           "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
   1483    static const HChar* namesH[32]
   1484       = { "h0",  "h1",  "h2",  "h3",  "h4",  "h5",  "h6",  "h7",
   1485           "h8",  "h9",  "h10", "h11", "h12", "h13", "h14", "h15",
   1486           "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
   1487           "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
   1488    static const HChar* namesB[32]
   1489       = { "b0",  "b1",  "b2",  "b3",  "b4",  "b5",  "b6",  "b7",
   1490           "b8",  "b9",  "b10", "b11", "b12", "b13", "b14", "b15",
   1491           "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
   1492           "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
   1493    vassert(qregNo < 32);
   1494    switch (sizeofIRType(laneTy)) {
   1495       case 1:  return namesB[qregNo];
   1496       case 2:  return namesH[qregNo];
   1497       case 4:  return namesS[qregNo];
   1498       case 8:  return namesD[qregNo];
   1499       case 16: return namesQ[qregNo];
   1500       default: vassert(0);
   1501    }
   1502    /*NOTREACHED*/
   1503 }
   1504 
   1505 static const HChar* nameQReg128 ( UInt qregNo )
   1506 {
   1507    return nameQRegLO(qregNo, Ity_V128);
   1508 }
   1509 
   1510 /* Find the offset of the most significant half (8 bytes) of the given
   1511    Qreg.  This requires knowing the endianness of the host. */
   1512 static Int offsetQRegHI64 ( UInt qregNo )
   1513 {
   1514    return offsetQRegLane(qregNo, Ity_I64, 1);
   1515 }
   1516 
   1517 static IRExpr* getQRegHI64 ( UInt qregNo )
   1518 {
   1519    return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
   1520 }
   1521 
   1522 static void putQRegHI64 ( UInt qregNo, IRExpr* e )
   1523 {
   1524    IRType ty  = typeOfIRExpr(irsb->tyenv, e);
   1525    Int    off = offsetQRegHI64(qregNo);
   1526    switch (ty) {
   1527       case Ity_I64: case Ity_F64:
   1528          break;
   1529       default:
   1530          vassert(0); // Other cases are plain wrong
   1531    }
   1532    stmt(IRStmt_Put(off, e));
   1533 }
   1534 
   1535 /* Put to a specified lane of a Qreg. */
   1536 static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
   1537 {
   1538    IRType laneTy  = typeOfIRExpr(irsb->tyenv, e);
   1539    Int    off     = offsetQRegLane(qregNo, laneTy, laneNo);
   1540    switch (laneTy) {
   1541       case Ity_F64: case Ity_I64:
   1542       case Ity_I32: case Ity_F32:
   1543       case Ity_I16: case Ity_F16:
   1544       case Ity_I8:
   1545          break;
   1546       default:
   1547          vassert(0); // Other cases are ATC
   1548    }
   1549    stmt(IRStmt_Put(off, e));
   1550 }
   1551 
   1552 /* Get from a specified lane of a Qreg. */
   1553 static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
   1554 {
   1555    Int off = offsetQRegLane(qregNo, laneTy, laneNo);
   1556    switch (laneTy) {
   1557       case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
   1558       case Ity_F64: case Ity_F32: case Ity_F16:
   1559          break;
   1560       default:
   1561          vassert(0); // Other cases are ATC
   1562    }
   1563    return IRExpr_Get(off, laneTy);
   1564 }
   1565 
   1566 
   1567 //ZZ /* ---------------- Misc registers ---------------- */
   1568 //ZZ
   1569 //ZZ static void putMiscReg32 ( UInt    gsoffset,
   1570 //ZZ                            IRExpr* e, /* :: Ity_I32 */
   1571 //ZZ                            IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
   1572 //ZZ {
   1573 //ZZ    switch (gsoffset) {
   1574 //ZZ       case OFFB_FPSCR:   break;
   1575 //ZZ       case OFFB_QFLAG32: break;
   1576 //ZZ       case OFFB_GEFLAG0: break;
   1577 //ZZ       case OFFB_GEFLAG1: break;
   1578 //ZZ       case OFFB_GEFLAG2: break;
   1579 //ZZ       case OFFB_GEFLAG3: break;
   1580 //ZZ       default: vassert(0); /* awaiting more cases */
   1581 //ZZ    }
   1582 //ZZ    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
   1583 //ZZ
   1584 //ZZ    if (guardT == IRTemp_INVALID) {
   1585 //ZZ       /* unconditional write */
   1586 //ZZ       stmt(IRStmt_Put(gsoffset, e));
   1587 //ZZ    } else {
   1588 //ZZ       stmt(IRStmt_Put(
   1589 //ZZ          gsoffset,
   1590 //ZZ          IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
   1591 //ZZ                      e, IRExpr_Get(gsoffset, Ity_I32) )
   1592 //ZZ       ));
   1593 //ZZ    }
   1594 //ZZ }
   1595 //ZZ
   1596 //ZZ static IRTemp get_ITSTATE ( void )
   1597 //ZZ {
   1598 //ZZ    ASSERT_IS_THUMB;
   1599 //ZZ    IRTemp t = newTemp(Ity_I32);
   1600 //ZZ    assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
   1601 //ZZ    return t;
   1602 //ZZ }
   1603 //ZZ
   1604 //ZZ static void put_ITSTATE ( IRTemp t )
   1605 //ZZ {
   1606 //ZZ    ASSERT_IS_THUMB;
   1607 //ZZ    stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
   1608 //ZZ }
   1609 //ZZ
   1610 //ZZ static IRTemp get_QFLAG32 ( void )
   1611 //ZZ {
   1612 //ZZ    IRTemp t = newTemp(Ity_I32);
   1613 //ZZ    assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
   1614 //ZZ    return t;
   1615 //ZZ }
   1616 //ZZ
   1617 //ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
   1618 //ZZ {
   1619 //ZZ    putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
   1620 //ZZ }
   1621 //ZZ
   1622 //ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
   1623 //ZZ    Status Register) to indicate that overflow or saturation occurred.
   1624 //ZZ    Nb: t must be zero to denote no saturation, and any nonzero
   1625 //ZZ    value to indicate saturation. */
   1626 //ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
   1627 //ZZ {
   1628 //ZZ    IRTemp old = get_QFLAG32();
   1629 //ZZ    IRTemp nyu = newTemp(Ity_I32);
   1630 //ZZ    assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
   1631 //ZZ    put_QFLAG32(nyu, condT);
   1632 //ZZ }
   1633 
   1634 
   1635 /* ---------------- FPCR stuff ---------------- */
   1636 
   1637 /* Generate IR to get hold of the rounding mode bits in FPCR, and
   1638    convert them to IR format.  Bind the final result to the
   1639    returned temp. */
   1640 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
   1641 {
   1642    /* The ARMvfp encoding for rounding mode bits is:
   1643          00  to nearest
   1644          01  to +infinity
   1645          10  to -infinity
   1646          11  to zero
   1647       We need to convert that to the IR encoding:
   1648          00  to nearest (the default)
   1649          10  to +infinity
   1650          01  to -infinity
   1651          11  to zero
   1652       Which can be done by swapping bits 0 and 1.
   1653       The rmode bits are at 23:22 in FPSCR.
   1654    */
   1655    IRTemp armEncd = newTemp(Ity_I32);
   1656    IRTemp swapped = newTemp(Ity_I32);
   1657    /* Fish FPCR[23:22] out, and slide to bottom.  Doesn't matter that
   1658       we don't zero out bits 24 and above, since the assignment to
   1659       'swapped' will mask them out anyway. */
   1660    assign(armEncd,
   1661           binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
   1662    /* Now swap them. */
   1663    assign(swapped,
   1664           binop(Iop_Or32,
   1665                 binop(Iop_And32,
   1666                       binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
   1667                       mkU32(2)),
   1668                 binop(Iop_And32,
   1669                       binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
   1670                       mkU32(1))
   1671          ));
   1672    return swapped;
   1673 }
   1674 
   1675 
   1676 /*------------------------------------------------------------*/
   1677 /*--- Helpers for flag handling and conditional insns      ---*/
   1678 /*------------------------------------------------------------*/
   1679 
   1680 static const HChar* nameARM64Condcode ( ARM64Condcode cond )
   1681 {
   1682    switch (cond) {
   1683       case ARM64CondEQ:  return "eq";
   1684       case ARM64CondNE:  return "ne";
   1685       case ARM64CondCS:  return "cs";  // or 'hs'
   1686       case ARM64CondCC:  return "cc";  // or 'lo'
   1687       case ARM64CondMI:  return "mi";
   1688       case ARM64CondPL:  return "pl";
   1689       case ARM64CondVS:  return "vs";
   1690       case ARM64CondVC:  return "vc";
   1691       case ARM64CondHI:  return "hi";
   1692       case ARM64CondLS:  return "ls";
   1693       case ARM64CondGE:  return "ge";
   1694       case ARM64CondLT:  return "lt";
   1695       case ARM64CondGT:  return "gt";
   1696       case ARM64CondLE:  return "le";
   1697       case ARM64CondAL:  return "al";
   1698       case ARM64CondNV:  return "nv";
   1699       default: vpanic("name_ARM64Condcode");
   1700    }
   1701 }
   1702 
   1703 /* and a handy shorthand for it */
   1704 static const HChar* nameCC ( ARM64Condcode cond ) {
   1705    return nameARM64Condcode(cond);
   1706 }
   1707 
   1708 
   1709 /* Build IR to calculate some particular condition from stored
   1710    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
   1711    Ity_I64, suitable for narrowing.  Although the return type is
   1712    Ity_I64, the returned value is either 0 or 1.  'cond' must be
   1713    :: Ity_I64 and must denote the condition to compute in
   1714    bits 7:4, and be zero everywhere else.
   1715 */
   1716 static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
   1717 {
   1718    vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
   1719    /* And 'cond' had better produce a value in which only bits 7:4 are
   1720       nonzero.  However, obviously we can't assert for that. */
   1721 
   1722    /* So what we're constructing for the first argument is
   1723       "(cond << 4) | stored-operation".
   1724       However, as per comments above, 'cond' must be supplied
   1725       pre-shifted to this function.
   1726 
   1727       This pairing scheme requires that the ARM64_CC_OP_ values all fit
   1728       in 4 bits.  Hence we are passing a (COND, OP) pair in the lowest
   1729       8 bits of the first argument. */
   1730    IRExpr** args
   1731       = mkIRExprVec_4(
   1732            binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
   1733            IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
   1734            IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
   1735            IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
   1736         );
   1737    IRExpr* call
   1738       = mkIRExprCCall(
   1739            Ity_I64,
   1740            0/*regparm*/,
   1741            "arm64g_calculate_condition", &arm64g_calculate_condition,
   1742            args
   1743         );
   1744 
   1745    /* Exclude the requested condition, OP and NDEP from definedness
   1746       checking.  We're only interested in DEP1 and DEP2. */
   1747    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1748    return call;
   1749 }
   1750 
   1751 
   1752 /* Build IR to calculate some particular condition from stored
   1753    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
   1754    Ity_I64, suitable for narrowing.  Although the return type is
   1755    Ity_I64, the returned value is either 0 or 1.
   1756 */
   1757 static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
   1758 {
   1759   /* First arg is "(cond << 4) | condition".  This requires that the
   1760      ARM64_CC_OP_ values all fit in 4 bits.  Hence we are passing a
   1761      (COND, OP) pair in the lowest 8 bits of the first argument. */
   1762    vassert(cond >= 0 && cond <= 15);
   1763    return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
   1764 }
   1765 
   1766 
   1767 /* Build IR to calculate just the carry flag from stored
   1768    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
   1769    Ity_I64. */
   1770 static IRExpr* mk_arm64g_calculate_flag_c ( void )
   1771 {
   1772    IRExpr** args
   1773       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I64),
   1774                        IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
   1775                        IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
   1776                        IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
   1777    IRExpr* call
   1778       = mkIRExprCCall(
   1779            Ity_I64,
   1780            0/*regparm*/,
   1781            "arm64g_calculate_flag_c", &arm64g_calculate_flag_c,
   1782            args
   1783         );
   1784    /* Exclude OP and NDEP from definedness checking.  We're only
   1785       interested in DEP1 and DEP2. */
   1786    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1787    return call;
   1788 }
   1789 
   1790 
   1791 //ZZ /* Build IR to calculate just the overflow flag from stored
   1792 //ZZ    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
   1793 //ZZ    Ity_I32. */
   1794 //ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
   1795 //ZZ {
   1796 //ZZ    IRExpr** args
   1797 //ZZ       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
   1798 //ZZ                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1799 //ZZ                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1800 //ZZ                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
   1801 //ZZ    IRExpr* call
   1802 //ZZ       = mkIRExprCCall(
   1803 //ZZ            Ity_I32,
   1804 //ZZ            0/*regparm*/,
   1805 //ZZ            "armg_calculate_flag_v", &armg_calculate_flag_v,
   1806 //ZZ            args
   1807 //ZZ         );
   1808 //ZZ    /* Exclude OP and NDEP from definedness checking.  We're only
   1809 //ZZ       interested in DEP1 and DEP2. */
   1810 //ZZ    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1811 //ZZ    return call;
   1812 //ZZ }
   1813 
   1814 
   1815 /* Build IR to calculate N Z C V in bits 31:28 of the
   1816    returned word. */
   1817 static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
   1818 {
   1819    IRExpr** args
   1820       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I64),
   1821                        IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
   1822                        IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
   1823                        IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
   1824    IRExpr* call
   1825       = mkIRExprCCall(
   1826            Ity_I64,
   1827            0/*regparm*/,
   1828            "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
   1829            args
   1830         );
   1831    /* Exclude OP and NDEP from definedness checking.  We're only
   1832       interested in DEP1 and DEP2. */
   1833    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1834    return call;
   1835 }
   1836 
   1837 
   1838 /* Build IR to set the flags thunk, in the most general case. */
   1839 static
   1840 void setFlags_D1_D2_ND ( UInt cc_op,
   1841                          IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
   1842 {
   1843    vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
   1844    vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
   1845    vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
   1846    vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
   1847    stmt( IRStmt_Put( OFFB_CC_OP,   mkU64(cc_op) ));
   1848    stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
   1849    stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
   1850    stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
   1851 }
   1852 
   1853 /* Build IR to set the flags thunk after ADD or SUB. */
   1854 static
   1855 void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
   1856 {
   1857    IRTemp argL64 = IRTemp_INVALID;
   1858    IRTemp argR64 = IRTemp_INVALID;
   1859    IRTemp z64    = newTemp(Ity_I64);
   1860    if (is64) {
   1861       argL64 = argL;
   1862       argR64 = argR;
   1863    } else {
   1864       argL64 = newTemp(Ity_I64);
   1865       argR64 = newTemp(Ity_I64);
   1866       assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
   1867       assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
   1868    }
   1869    assign(z64, mkU64(0));
   1870    UInt cc_op = ARM64G_CC_OP_NUMBER;
   1871    /**/ if ( isSUB &&  is64) { cc_op = ARM64G_CC_OP_SUB64; }
   1872    else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
   1873    else if (!isSUB &&  is64) { cc_op = ARM64G_CC_OP_ADD64; }
   1874    else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
   1875    else                      { vassert(0); }
   1876    setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
   1877 }
   1878 
   1879 /* Build IR to set the flags thunk after ADC or SBC. */
   1880 static
   1881 void setFlags_ADC_SBC ( Bool is64, Bool isSBC,
   1882                         IRTemp argL, IRTemp argR, IRTemp oldC )
   1883 {
   1884    IRTemp argL64 = IRTemp_INVALID;
   1885    IRTemp argR64 = IRTemp_INVALID;
   1886    IRTemp oldC64 = IRTemp_INVALID;
   1887    if (is64) {
   1888       argL64 = argL;
   1889       argR64 = argR;
   1890       oldC64 = oldC;
   1891    } else {
   1892       argL64 = newTemp(Ity_I64);
   1893       argR64 = newTemp(Ity_I64);
   1894       oldC64 = newTemp(Ity_I64);
   1895       assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
   1896       assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
   1897       assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC)));
   1898    }
   1899    UInt cc_op = ARM64G_CC_OP_NUMBER;
   1900    /**/ if ( isSBC &&  is64) { cc_op = ARM64G_CC_OP_SBC64; }
   1901    else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; }
   1902    else if (!isSBC &&  is64) { cc_op = ARM64G_CC_OP_ADC64; }
   1903    else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; }
   1904    else                      { vassert(0); }
   1905    setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64);
   1906 }
   1907 
   1908 /* Build IR to set the flags thunk after ADD or SUB, if the given
   1909    condition evaluates to True at run time.  If not, the flags are set
   1910    to the specified NZCV value. */
   1911 static
   1912 void setFlags_ADD_SUB_conditionally (
   1913         Bool is64, Bool isSUB,
   1914         IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
   1915      )
   1916 {
   1917    /* Generate IR as follows:
   1918         CC_OP   = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
   1919         CC_DEP1 = ITE(cond, argL64, nzcv << 28)
   1920         CC_DEP2 = ITE(cond, argR64, 0)
   1921         CC_NDEP = 0
   1922    */
   1923 
   1924    IRTemp z64 = newTemp(Ity_I64);
   1925    assign(z64, mkU64(0));
   1926 
   1927    /* Establish the operation and operands for the True case. */
   1928    IRTemp t_dep1 = IRTemp_INVALID;
   1929    IRTemp t_dep2 = IRTemp_INVALID;
   1930    UInt   t_op   = ARM64G_CC_OP_NUMBER;
   1931    /**/ if ( isSUB &&  is64) { t_op = ARM64G_CC_OP_SUB64; }
   1932    else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
   1933    else if (!isSUB &&  is64) { t_op = ARM64G_CC_OP_ADD64; }
   1934    else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
   1935    else                      { vassert(0); }
   1936    /* */
   1937    if (is64) {
   1938       t_dep1 = argL;
   1939       t_dep2 = argR;
   1940    } else {
   1941       t_dep1 = newTemp(Ity_I64);
   1942       t_dep2 = newTemp(Ity_I64);
   1943       assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
   1944       assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
   1945    }
   1946 
   1947    /* Establish the operation and operands for the False case. */
   1948    IRTemp f_dep1 = newTemp(Ity_I64);
   1949    IRTemp f_dep2 = z64;
   1950    UInt   f_op   = ARM64G_CC_OP_COPY;
   1951    assign(f_dep1, mkU64(nzcv << 28));
   1952 
   1953    /* Final thunk values */
   1954    IRTemp dep1 = newTemp(Ity_I64);
   1955    IRTemp dep2 = newTemp(Ity_I64);
   1956    IRTemp op   = newTemp(Ity_I64);
   1957 
   1958    assign(op,   IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
   1959    assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
   1960    assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
   1961 
   1962    /* finally .. */
   1963    stmt( IRStmt_Put( OFFB_CC_OP,   mkexpr(op) ));
   1964    stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
   1965    stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
   1966    stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
   1967 }
   1968 
   1969 /* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
   1970 static
   1971 void setFlags_LOGIC ( Bool is64, IRTemp res )
   1972 {
   1973    IRTemp res64 = IRTemp_INVALID;
   1974    IRTemp z64   = newTemp(Ity_I64);
   1975    UInt   cc_op = ARM64G_CC_OP_NUMBER;
   1976    if (is64) {
   1977       res64 = res;
   1978       cc_op = ARM64G_CC_OP_LOGIC64;
   1979    } else {
   1980       res64 = newTemp(Ity_I64);
   1981       assign(res64, unop(Iop_32Uto64, mkexpr(res)));
   1982       cc_op = ARM64G_CC_OP_LOGIC32;
   1983    }
   1984    assign(z64, mkU64(0));
   1985    setFlags_D1_D2_ND(cc_op, res64, z64, z64);
   1986 }
   1987 
   1988 /* Build IR to set the flags thunk to a given NZCV value.  NZCV is
   1989    located in bits 31:28 of the supplied value. */
   1990 static
   1991 void setFlags_COPY ( IRTemp nzcv_28x0 )
   1992 {
   1993    IRTemp z64 = newTemp(Ity_I64);
   1994    assign(z64, mkU64(0));
   1995    setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
   1996 }
   1997 
   1998 
   1999 //ZZ /* Minor variant of the above that sets NDEP to zero (if it
   2000 //ZZ    sets it at all) */
   2001 //ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
   2002 //ZZ                              IRTemp t_dep2,
   2003 //ZZ                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   2004 //ZZ {
   2005 //ZZ    IRTemp z32 = newTemp(Ity_I32);
   2006 //ZZ    assign( z32, mkU32(0) );
   2007 //ZZ    setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
   2008 //ZZ }
   2009 //ZZ
   2010 //ZZ
   2011 //ZZ /* Minor variant of the above that sets DEP2 to zero (if it
   2012 //ZZ    sets it at all) */
   2013 //ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
   2014 //ZZ                              IRTemp t_ndep,
   2015 //ZZ                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   2016 //ZZ {
   2017 //ZZ    IRTemp z32 = newTemp(Ity_I32);
   2018 //ZZ    assign( z32, mkU32(0) );
   2019 //ZZ    setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
   2020 //ZZ }
   2021 //ZZ
   2022 //ZZ
   2023 //ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
   2024 //ZZ    sets them at all) */
   2025 //ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
   2026 //ZZ                           IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   2027 //ZZ {
   2028 //ZZ    IRTemp z32 = newTemp(Ity_I32);
   2029 //ZZ    assign( z32, mkU32(0) );
   2030 //ZZ    setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
   2031 //ZZ }
   2032 
   2033 
   2034 /*------------------------------------------------------------*/
   2035 /*--- Misc math helpers                                    ---*/
   2036 /*------------------------------------------------------------*/
   2037 
   2038 /* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
   2039 static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
   2040 {
   2041    IRTemp maskT = newTemp(Ity_I64);
   2042    IRTemp res   = newTemp(Ity_I64);
   2043    vassert(sh >= 1 && sh <= 63);
   2044    assign(maskT, mkU64(mask));
   2045    assign( res,
   2046            binop(Iop_Or64,
   2047                  binop(Iop_Shr64,
   2048                        binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
   2049                        mkU8(sh)),
   2050                  binop(Iop_And64,
   2051                        binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
   2052                        mkexpr(maskT))
   2053                  )
   2054            );
   2055    return res;
   2056 }
   2057 
   2058 /* Generates byte swaps within 32-bit lanes. */
   2059 static IRTemp math_UINTSWAP64 ( IRTemp src )
   2060 {
   2061    IRTemp res;
   2062    res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
   2063    res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
   2064    return res;
   2065 }
   2066 
   2067 /* Generates byte swaps within 16-bit lanes. */
   2068 static IRTemp math_USHORTSWAP64 ( IRTemp src )
   2069 {
   2070    IRTemp res;
   2071    res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
   2072    return res;
   2073 }
   2074 
   2075 /* Generates a 64-bit byte swap. */
   2076 static IRTemp math_BYTESWAP64 ( IRTemp src )
   2077 {
   2078    IRTemp res;
   2079    res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
   2080    res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
   2081    res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
   2082    return res;
   2083 }
   2084 
   2085 /* Generates a 64-bit bit swap. */
   2086 static IRTemp math_BITSWAP64 ( IRTemp src )
   2087 {
   2088    IRTemp res;
   2089    res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
   2090    res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
   2091    res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
   2092    return math_BYTESWAP64(res);
   2093 }
   2094 
   2095 /* Duplicates the bits at the bottom of the given word to fill the
   2096    whole word.  src :: Ity_I64 is assumed to have zeroes everywhere
   2097    except for the bottom bits. */
   2098 static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
   2099 {
   2100    if (srcTy == Ity_I8) {
   2101       IRTemp t16 = newTemp(Ity_I64);
   2102       assign(t16, binop(Iop_Or64, mkexpr(src),
   2103                                   binop(Iop_Shl64, mkexpr(src), mkU8(8))));
   2104       IRTemp t32 = newTemp(Ity_I64);
   2105       assign(t32, binop(Iop_Or64, mkexpr(t16),
   2106                                   binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
   2107       IRTemp t64 = newTemp(Ity_I64);
   2108       assign(t64, binop(Iop_Or64, mkexpr(t32),
   2109                                   binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
   2110       return t64;
   2111    }
   2112    if (srcTy == Ity_I16) {
   2113       IRTemp t32 = newTemp(Ity_I64);
   2114       assign(t32, binop(Iop_Or64, mkexpr(src),
   2115                                   binop(Iop_Shl64, mkexpr(src), mkU8(16))));
   2116       IRTemp t64 = newTemp(Ity_I64);
   2117       assign(t64, binop(Iop_Or64, mkexpr(t32),
   2118                                   binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
   2119       return t64;
   2120    }
   2121    if (srcTy == Ity_I32) {
   2122       IRTemp t64 = newTemp(Ity_I64);
   2123       assign(t64, binop(Iop_Or64, mkexpr(src),
   2124                                   binop(Iop_Shl64, mkexpr(src), mkU8(32))));
   2125       return t64;
   2126    }
   2127    if (srcTy == Ity_I64) {
   2128       return src;
   2129    }
   2130    vassert(0);
   2131 }
   2132 
   2133 
   2134 /* Duplicates the src element exactly so as to fill a V128 value. */
   2135 static IRTemp math_DUP_TO_V128 ( IRTemp src, IRType srcTy )
   2136 {
   2137    IRTemp res = newTempV128();
   2138    if (srcTy == Ity_F64) {
   2139       IRTemp i64 = newTemp(Ity_I64);
   2140       assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(src)));
   2141       assign(res, binop(Iop_64HLtoV128, mkexpr(i64), mkexpr(i64)));
   2142       return res;
   2143    }
   2144    if (srcTy == Ity_F32) {
   2145       IRTemp i64a = newTemp(Ity_I64);
   2146       assign(i64a, unop(Iop_32Uto64, unop(Iop_ReinterpF32asI32, mkexpr(src))));
   2147       IRTemp i64b = newTemp(Ity_I64);
   2148       assign(i64b, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(i64a), mkU8(32)),
   2149                                    mkexpr(i64a)));
   2150       assign(res, binop(Iop_64HLtoV128, mkexpr(i64b), mkexpr(i64b)));
   2151       return res;
   2152    }
   2153    if (srcTy == Ity_I64) {
   2154       assign(res, binop(Iop_64HLtoV128, mkexpr(src), mkexpr(src)));
   2155       return res;
   2156    }
   2157    if (srcTy == Ity_I32 || srcTy == Ity_I16 || srcTy == Ity_I8) {
   2158       IRTemp t1 = newTemp(Ity_I64);
   2159       assign(t1, widenUto64(srcTy, mkexpr(src)));
   2160       IRTemp t2 = math_DUP_TO_64(t1, srcTy);
   2161       assign(res, binop(Iop_64HLtoV128, mkexpr(t2), mkexpr(t2)));
   2162       return res;
   2163    }
   2164    vassert(0);
   2165 }
   2166 
   2167 
   2168 /* |fullWidth| is a full V128 width result.  Depending on bitQ,
   2169    zero out the upper half. */
   2170 static IRExpr* math_MAYBE_ZERO_HI64 ( UInt bitQ, IRTemp fullWidth )
   2171 {
   2172    if (bitQ == 1) return mkexpr(fullWidth);
   2173    if (bitQ == 0) return unop(Iop_ZeroHI64ofV128, mkexpr(fullWidth));
   2174    vassert(0);
   2175 }
   2176 
   2177 /* The same, but from an expression instead. */
   2178 static IRExpr* math_MAYBE_ZERO_HI64_fromE ( UInt bitQ, IRExpr* fullWidth )
   2179 {
   2180    IRTemp fullWidthT = newTempV128();
   2181    assign(fullWidthT, fullWidth);
   2182    return math_MAYBE_ZERO_HI64(bitQ, fullWidthT);
   2183 }
   2184 
   2185 
   2186 /*------------------------------------------------------------*/
   2187 /*--- FP comparison helpers                                ---*/
   2188 /*------------------------------------------------------------*/
   2189 
   2190 /* irRes :: Ity_I32 holds a floating point comparison result encoded
   2191    as an IRCmpF64Result.  Generate code to convert it to an
   2192    ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
   2193    Assign a new temp to hold that value, and return the temp. */
   2194 static
   2195 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
   2196 {
   2197    IRTemp ix       = newTemp(Ity_I64);
   2198    IRTemp termL    = newTemp(Ity_I64);
   2199    IRTemp termR    = newTemp(Ity_I64);
   2200    IRTemp nzcv     = newTemp(Ity_I64);
   2201    IRTemp irRes    = newTemp(Ity_I64);
   2202 
   2203    /* This is where the fun starts.  We have to convert 'irRes' from
   2204       an IR-convention return result (IRCmpF64Result) to an
   2205       ARM-encoded (N,Z,C,V) group.  The final result is in the bottom
   2206       4 bits of 'nzcv'. */
   2207    /* Map compare result from IR to ARM(nzcv) */
   2208    /*
   2209       FP cmp result | IR   | ARM(nzcv)
   2210       --------------------------------
   2211       UN              0x45   0011
   2212       LT              0x01   1000
   2213       GT              0x00   0010
   2214       EQ              0x40   0110
   2215    */
   2216    /* Now since you're probably wondering WTF ..
   2217 
   2218       ix fishes the useful bits out of the IR value, bits 6 and 0, and
   2219       places them side by side, giving a number which is 0, 1, 2 or 3.
   2220 
   2221       termL is a sequence cooked up by GNU superopt.  It converts ix
   2222          into an almost correct value NZCV value (incredibly), except
   2223          for the case of UN, where it produces 0100 instead of the
   2224          required 0011.
   2225 
   2226       termR is therefore a correction term, also computed from ix.  It
   2227          is 1 in the UN case and 0 for LT, GT and UN.  Hence, to get
   2228          the final correct value, we subtract termR from termL.
   2229 
   2230       Don't take my word for it.  There's a test program at the bottom
   2231       of guest_arm_toIR.c, to try this out with.
   2232    */
   2233    assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
   2234 
   2235    assign(
   2236       ix,
   2237       binop(Iop_Or64,
   2238             binop(Iop_And64,
   2239                   binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
   2240                   mkU64(3)),
   2241             binop(Iop_And64, mkexpr(irRes), mkU64(1))));
   2242 
   2243    assign(
   2244       termL,
   2245       binop(Iop_Add64,
   2246             binop(Iop_Shr64,
   2247                   binop(Iop_Sub64,
   2248                         binop(Iop_Shl64,
   2249                               binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
   2250                               mkU8(62)),
   2251                         mkU64(1)),
   2252                   mkU8(61)),
   2253             mkU64(1)));
   2254 
   2255    assign(
   2256       termR,
   2257       binop(Iop_And64,
   2258             binop(Iop_And64,
   2259                   mkexpr(ix),
   2260                   binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
   2261             mkU64(1)));
   2262 
   2263    assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
   2264    return nzcv;
   2265 }
   2266 
   2267 
   2268 /*------------------------------------------------------------*/
   2269 /*--- Data processing (immediate)                          ---*/
   2270 /*------------------------------------------------------------*/
   2271 
   2272 /* Helper functions for supporting "DecodeBitMasks" */
   2273 
   2274 static ULong dbm_ROR ( Int width, ULong x, Int rot )
   2275 {
   2276    vassert(width > 0 && width <= 64);
   2277    vassert(rot >= 0 && rot < width);
   2278    if (rot == 0) return x;
   2279    ULong res = x >> rot;
   2280    res |= (x << (width - rot));
   2281    if (width < 64)
   2282      res &= ((1ULL << width) - 1);
   2283    return res;
   2284 }
   2285 
   2286 static ULong dbm_RepTo64( Int esize, ULong x )
   2287 {
   2288    switch (esize) {
   2289       case 64:
   2290          return x;
   2291       case 32:
   2292          x &= 0xFFFFFFFF; x |= (x << 32);
   2293          return x;
   2294       case 16:
   2295          x &= 0xFFFF; x |= (x << 16); x |= (x << 32);
   2296          return x;
   2297       case 8:
   2298          x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32);
   2299          return x;
   2300       case 4:
   2301          x &= 0xF; x |= (x << 4); x |= (x << 8);
   2302          x |= (x << 16); x |= (x << 32);
   2303          return x;
   2304       case 2:
   2305          x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8);
   2306          x |= (x << 16); x |= (x << 32);
   2307          return x;
   2308       default:
   2309          break;
   2310    }
   2311    vpanic("dbm_RepTo64");
   2312    /*NOTREACHED*/
   2313    return 0;
   2314 }
   2315 
   2316 static Int dbm_highestSetBit ( ULong x )
   2317 {
   2318    Int i;
   2319    for (i = 63; i >= 0; i--) {
   2320       if (x & (1ULL << i))
   2321          return i;
   2322    }
   2323    vassert(x == 0);
   2324    return -1;
   2325 }
   2326 
   2327 static
   2328 Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask,
   2329                           ULong immN, ULong imms, ULong immr, Bool immediate,
   2330                           UInt M /*32 or 64*/)
   2331 {
   2332    vassert(immN < (1ULL << 1));
   2333    vassert(imms < (1ULL << 6));
   2334    vassert(immr < (1ULL << 6));
   2335    vassert(immediate == False || immediate == True);
   2336    vassert(M == 32 || M == 64);
   2337 
   2338    Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) );
   2339    if (len < 1) { /* printf("fail1\n"); */ return False; }
   2340    vassert(len <= 6);
   2341    vassert(M >= (1 << len));
   2342 
   2343    vassert(len >= 1 && len <= 6);
   2344    ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len);
   2345                   (1 << len) - 1;
   2346    vassert(levels >= 1 && levels <= 63);
   2347 
   2348    if (immediate && ((imms & levels) == levels)) {
   2349       /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
   2350       return False;
   2351    }
   2352 
   2353    ULong S = imms & levels;
   2354    ULong R = immr & levels;
   2355    Int   diff = S - R;
   2356    diff &= 63;
   2357    Int esize = 1 << len;
   2358    vassert(2 <= esize && esize <= 64);
   2359 
   2360    /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
   2361       same below with d.  S can be 63 in which case we have an out of
   2362       range and hence undefined shift. */
   2363    vassert(S >= 0 && S <= 63);
   2364    vassert(esize >= (S+1));
   2365    ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
   2366                   //(1ULL << (S+1)) - 1;
   2367                   ((1ULL << S) - 1) + (1ULL << S);
   2368 
   2369    Int d = // diff<len-1:0>
   2370            diff & ((1 << len)-1);
   2371    vassert(esize >= (d+1));
   2372    vassert(d >= 0 && d <= 63);
   2373 
   2374    ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
   2375                   //(1ULL << (d+1)) - 1;
   2376                   ((1ULL << d) - 1) + (1ULL << d);
   2377 
   2378    if (esize != 64) vassert(elem_s < (1ULL << esize));
   2379    if (esize != 64) vassert(elem_d < (1ULL << esize));
   2380 
   2381    if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
   2382    if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
   2383 
   2384    return True;
   2385 }
   2386 
   2387 
   2388 static
   2389 Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres,
   2390                                          UInt insn)
   2391 {
   2392 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   2393 
   2394    /* insn[28:23]
   2395       10000x PC-rel addressing
   2396       10001x Add/subtract (immediate)
   2397       100100 Logical (immediate)
   2398       100101 Move Wide (immediate)
   2399       100110 Bitfield
   2400       100111 Extract
   2401    */
   2402 
   2403    /* ------------------ ADD/SUB{,S} imm12 ------------------ */
   2404    if (INSN(28,24) == BITS5(1,0,0,0,1)) {
   2405       Bool is64   = INSN(31,31) == 1;
   2406       Bool isSub  = INSN(30,30) == 1;
   2407       Bool setCC  = INSN(29,29) == 1;
   2408       UInt sh     = INSN(23,22);
   2409       UInt uimm12 = INSN(21,10);
   2410       UInt nn     = INSN(9,5);
   2411       UInt dd     = INSN(4,0);
   2412       const HChar* nm = isSub ? "sub" : "add";
   2413       if (sh >= 2) {
   2414          /* Invalid; fall through */
   2415       } else {
   2416          vassert(sh <= 1);
   2417          uimm12 <<= (12 * sh);
   2418          if (is64) {
   2419             IRTemp argL  = newTemp(Ity_I64);
   2420             IRTemp argR  = newTemp(Ity_I64);
   2421             IRTemp res   = newTemp(Ity_I64);
   2422             assign(argL, getIReg64orSP(nn));
   2423             assign(argR, mkU64(uimm12));
   2424             assign(res,  binop(isSub ? Iop_Sub64 : Iop_Add64,
   2425                                mkexpr(argL), mkexpr(argR)));
   2426             if (setCC) {
   2427                putIReg64orZR(dd, mkexpr(res));
   2428                setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
   2429                DIP("%ss %s, %s, 0x%x\n",
   2430                    nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
   2431             } else {
   2432                putIReg64orSP(dd, mkexpr(res));
   2433                DIP("%s %s, %s, 0x%x\n",
   2434                    nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
   2435             }
   2436          } else {
   2437             IRTemp argL  = newTemp(Ity_I32);
   2438             IRTemp argR  = newTemp(Ity_I32);
   2439             IRTemp res   = newTemp(Ity_I32);
   2440             assign(argL, getIReg32orSP(nn));
   2441             assign(argR, mkU32(uimm12));
   2442             assign(res,  binop(isSub ? Iop_Sub32 : Iop_Add32,
   2443                                mkexpr(argL), mkexpr(argR)));
   2444             if (setCC) {
   2445                putIReg32orZR(dd, mkexpr(res));
   2446                setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR);
   2447                DIP("%ss %s, %s, 0x%x\n",
   2448                    nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
   2449             } else {
   2450                putIReg32orSP(dd, mkexpr(res));
   2451                DIP("%s %s, %s, 0x%x\n",
   2452                    nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
   2453             }
   2454          }
   2455          return True;
   2456       }
   2457    }
   2458 
   2459    /* -------------------- ADR/ADRP -------------------- */
   2460    if (INSN(28,24) == BITS5(1,0,0,0,0)) {
   2461       UInt  bP    = INSN(31,31);
   2462       UInt  immLo = INSN(30,29);
   2463       UInt  immHi = INSN(23,5);
   2464       UInt  rD    = INSN(4,0);
   2465       ULong uimm  = (immHi << 2) | immLo;
   2466       ULong simm  = sx_to_64(uimm, 21);
   2467       ULong val;
   2468       if (bP) {
   2469          val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
   2470       } else {
   2471          val = guest_PC_curr_instr + simm;
   2472       }
   2473       putIReg64orZR(rD, mkU64(val));
   2474       DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
   2475       return True;
   2476    }
   2477 
   2478    /* -------------------- LOGIC(imm) -------------------- */
   2479    if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
   2480       /* 31 30 28     22 21   15   9  4
   2481          sf op 100100 N  immr imms Rn Rd
   2482            op=00: AND  Rd|SP, Rn, #imm
   2483            op=01: ORR  Rd|SP, Rn, #imm
   2484            op=10: EOR  Rd|SP, Rn, #imm
   2485            op=11: ANDS Rd|ZR, Rn, #imm
   2486       */
   2487       Bool  is64 = INSN(31,31) == 1;
   2488       UInt  op   = INSN(30,29);
   2489       UInt  N    = INSN(22,22);
   2490       UInt  immR = INSN(21,16);
   2491       UInt  immS = INSN(15,10);
   2492       UInt  nn   = INSN(9,5);
   2493       UInt  dd   = INSN(4,0);
   2494       ULong imm  = 0;
   2495       Bool  ok;
   2496       if (N == 1 && !is64)
   2497          goto after_logic_imm; /* not allowed; fall through */
   2498       ok = dbm_DecodeBitMasks(&imm, NULL,
   2499                               N, immS, immR, True, is64 ? 64 : 32);
   2500       if (!ok)
   2501          goto after_logic_imm;
   2502 
   2503       const HChar* names[4] = { "and", "orr", "eor", "ands" };
   2504       const IROp   ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
   2505       const IROp   ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
   2506 
   2507       vassert(op < 4);
   2508       if (is64) {
   2509          IRExpr* argL = getIReg64orZR(nn);
   2510          IRExpr* argR = mkU64(imm);
   2511          IRTemp  res  = newTemp(Ity_I64);
   2512          assign(res, binop(ops64[op], argL, argR));
   2513          if (op < 3) {
   2514             putIReg64orSP(dd, mkexpr(res));
   2515             DIP("%s %s, %s, 0x%llx\n", names[op],
   2516                 nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
   2517          } else {
   2518             putIReg64orZR(dd, mkexpr(res));
   2519             setFlags_LOGIC(True/*is64*/, res);
   2520             DIP("%s %s, %s, 0x%llx\n", names[op],
   2521                 nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
   2522          }
   2523       } else {
   2524          IRExpr* argL = getIReg32orZR(nn);
   2525          IRExpr* argR = mkU32((UInt)imm);
   2526          IRTemp  res  = newTemp(Ity_I32);
   2527          assign(res, binop(ops32[op], argL, argR));
   2528          if (op < 3) {
   2529             putIReg32orSP(dd, mkexpr(res));
   2530             DIP("%s %s, %s, 0x%x\n", names[op],
   2531                 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
   2532          } else {
   2533             putIReg32orZR(dd, mkexpr(res));
   2534             setFlags_LOGIC(False/*!is64*/, res);
   2535             DIP("%s %s, %s, 0x%x\n", names[op],
   2536                 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
   2537          }
   2538       }
   2539       return True;
   2540    }
   2541    after_logic_imm:
   2542 
   2543    /* -------------------- MOV{Z,N,K} -------------------- */
   2544    if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
   2545       /* 31 30 28      22 20    4
   2546          |  |  |       |  |     |
   2547          sf 10 100 101 hw imm16 Rd   MOV(Z) Rd, (imm16 << (16*hw))
   2548          sf 00 100 101 hw imm16 Rd   MOV(N) Rd, ~(imm16 << (16*hw))
   2549          sf 11 100 101 hw imm16 Rd   MOV(K) Rd, (imm16 << (16*hw))
   2550       */
   2551       Bool is64   = INSN(31,31) == 1;
   2552       UInt subopc = INSN(30,29);
   2553       UInt hw     = INSN(22,21);
   2554       UInt imm16  = INSN(20,5);
   2555       UInt dd     = INSN(4,0);
   2556       if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) {
   2557          /* invalid; fall through */
   2558       } else {
   2559          ULong imm64 = ((ULong)imm16) << (16 * hw);
   2560          if (!is64)
   2561             vassert(imm64 < 0x100000000ULL);
   2562          switch (subopc) {
   2563             case BITS2(1,0): // MOVZ
   2564                putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
   2565                DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
   2566                break;
   2567             case BITS2(0,0): // MOVN
   2568                imm64 = ~imm64;
   2569                if (!is64)
   2570                   imm64 &= 0xFFFFFFFFULL;
   2571                putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
   2572                DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
   2573                break;
   2574             case BITS2(1,1): // MOVK
   2575                /* This is more complex.  We are inserting a slice into
   2576                   the destination register, so we need to have the old
   2577                   value of it. */
   2578                if (is64) {
   2579                   IRTemp old = newTemp(Ity_I64);
   2580                   assign(old, getIReg64orZR(dd));
   2581                   ULong mask = 0xFFFFULL << (16 * hw);
   2582                   IRExpr* res
   2583                      = binop(Iop_Or64,
   2584                              binop(Iop_And64, mkexpr(old), mkU64(~mask)),
   2585                              mkU64(imm64));
   2586                   putIReg64orZR(dd, res);
   2587                   DIP("movk %s, 0x%x, lsl %u\n",
   2588                       nameIReg64orZR(dd), imm16, 16*hw);
   2589                } else {
   2590                   IRTemp old = newTemp(Ity_I32);
   2591                   assign(old, getIReg32orZR(dd));
   2592                   vassert(hw <= 1);
   2593                   UInt mask = 0xFFFF << (16 * hw);
   2594                   IRExpr* res
   2595                      = binop(Iop_Or32,
   2596                              binop(Iop_And32, mkexpr(old), mkU32(~mask)),
   2597                              mkU32((UInt)imm64));
   2598                   putIReg32orZR(dd, res);
   2599                   DIP("movk %s, 0x%x, lsl %u\n",
   2600                       nameIReg32orZR(dd), imm16, 16*hw);
   2601                }
   2602                break;
   2603             default:
   2604                vassert(0);
   2605          }
   2606          return True;
   2607       }
   2608    }
   2609 
   2610    /* -------------------- {U,S,}BFM -------------------- */
   2611    /*    30 28     22 21   15   9  4
   2612 
   2613       sf 10 100110 N  immr imms nn dd
   2614          UBFM Wd, Wn, #immr, #imms   when sf=0, N=0, immr[5]=0, imms[5]=0
   2615          UBFM Xd, Xn, #immr, #imms   when sf=1, N=1
   2616 
   2617       sf 00 100110 N  immr imms nn dd
   2618          SBFM Wd, Wn, #immr, #imms   when sf=0, N=0, immr[5]=0, imms[5]=0
   2619          SBFM Xd, Xn, #immr, #imms   when sf=1, N=1
   2620 
   2621       sf 01 100110 N  immr imms nn dd
   2622          BFM Wd, Wn, #immr, #imms   when sf=0, N=0, immr[5]=0, imms[5]=0
   2623          BFM Xd, Xn, #immr, #imms   when sf=1, N=1
   2624    */
   2625    if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
   2626       UInt sf     = INSN(31,31);
   2627       UInt opc    = INSN(30,29);
   2628       UInt N      = INSN(22,22);
   2629       UInt immR   = INSN(21,16);
   2630       UInt immS   = INSN(15,10);
   2631       UInt nn     = INSN(9,5);
   2632       UInt dd     = INSN(4,0);
   2633       Bool inZero = False;
   2634       Bool extend = False;
   2635       const HChar* nm = "???";
   2636       /* skip invalid combinations */
   2637       switch (opc) {
   2638          case BITS2(0,0):
   2639             inZero = True; extend = True; nm = "sbfm"; break;
   2640          case BITS2(0,1):
   2641             inZero = False; extend = False; nm = "bfm"; break;
   2642          case BITS2(1,0):
   2643             inZero = True; extend = False; nm = "ubfm"; break;
   2644          case BITS2(1,1):
   2645             goto after_bfm; /* invalid */
   2646          default:
   2647             vassert(0);
   2648       }
   2649       if (sf == 1 && N != 1) goto after_bfm;
   2650       if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0
   2651                              || ((immS >> 5) & 1) != 0)) goto after_bfm;
   2652       ULong wmask = 0, tmask = 0;
   2653       Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
   2654                                    N, immS, immR, False, sf == 1 ? 64 : 32);
   2655       if (!ok) goto after_bfm; /* hmmm */
   2656 
   2657       Bool   is64 = sf == 1;
   2658       IRType ty   = is64 ? Ity_I64 : Ity_I32;
   2659 
   2660       IRTemp dst = newTemp(ty);
   2661       IRTemp src = newTemp(ty);
   2662       IRTemp bot = newTemp(ty);
   2663       IRTemp top = newTemp(ty);
   2664       IRTemp res = newTemp(ty);
   2665       assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
   2666       assign(src, getIRegOrZR(is64, nn));
   2667       /* perform bitfield move on low bits */
   2668       assign(bot, binop(mkOR(ty),
   2669                         binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
   2670                         binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
   2671                                          mkU(ty, wmask))));
   2672       /* determine extension bits (sign, zero or dest register) */
   2673       assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
   2674       /* combine extension bits and result bits */
   2675       assign(res, binop(mkOR(ty),
   2676                         binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
   2677                         binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
   2678       putIRegOrZR(is64, dd, mkexpr(res));
   2679       DIP("%s %s, %s, immR=%u, immS=%u\n",
   2680           nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
   2681       return True;
   2682    }
   2683    after_bfm:
   2684 
   2685    /* ---------------------- EXTR ---------------------- */
   2686    /*   30 28     22 20 15   9 4
   2687       1 00 100111 10 m  imm6 n d  EXTR Xd, Xn, Xm, #imm6
   2688       0 00 100111 00 m  imm6 n d  EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
   2689    */
   2690    if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
   2691       Bool is64  = INSN(31,31) == 1;
   2692       UInt mm    = INSN(20,16);
   2693       UInt imm6  = INSN(15,10);
   2694       UInt nn    = INSN(9,5);
   2695       UInt dd    = INSN(4,0);
   2696       Bool valid = True;
   2697       if (INSN(31,31) != INSN(22,22))
   2698         valid = False;
   2699       if (!is64 && imm6 >= 32)
   2700         valid = False;
   2701       if (!valid) goto after_extr;
   2702       IRType ty    = is64 ? Ity_I64 : Ity_I32;
   2703       IRTemp srcHi = newTemp(ty);
   2704       IRTemp srcLo = newTemp(ty);
   2705       IRTemp res   = newTemp(ty);
   2706       assign(srcHi, getIRegOrZR(is64, nn));
   2707       assign(srcLo, getIRegOrZR(is64, mm));
   2708       if (imm6 == 0) {
   2709         assign(res, mkexpr(srcLo));
   2710       } else {
   2711         UInt szBits = 8 * sizeofIRType(ty);
   2712         vassert(imm6 > 0 && imm6 < szBits);
   2713         assign(res, binop(mkOR(ty),
   2714                           binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
   2715                           binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
   2716       }
   2717       putIRegOrZR(is64, dd, mkexpr(res));
   2718       DIP("extr %s, %s, %s, #%u\n",
   2719           nameIRegOrZR(is64,dd),
   2720           nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
   2721       return True;
   2722    }
   2723   after_extr:
   2724 
   2725    vex_printf("ARM64 front end: data_processing_immediate\n");
   2726    return False;
   2727 #  undef INSN
   2728 }
   2729 
   2730 
   2731 /*------------------------------------------------------------*/
   2732 /*--- Data processing (register) instructions              ---*/
   2733 /*------------------------------------------------------------*/
   2734 
   2735 static const HChar* nameSH ( UInt sh ) {
   2736    switch (sh) {
   2737       case 0: return "lsl";
   2738       case 1: return "lsr";
   2739       case 2: return "asr";
   2740       case 3: return "ror";
   2741       default: vassert(0);
   2742    }
   2743 }
   2744 
   2745 /* Generate IR to get a register value, possibly shifted by an
   2746    immediate.  Returns either a 32- or 64-bit temporary holding the
   2747    result.  After the shift, the value can optionally be NOT-ed
   2748    too.
   2749 
   2750    sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR.  sh_amt may only be
   2751    in the range 0 to (is64 ? 64 : 32)-1.  For some instructions, ROR
   2752    isn't allowed, but it's the job of the caller to check that.
   2753 */
   2754 static IRTemp getShiftedIRegOrZR ( Bool is64,
   2755                                    UInt sh_how, UInt sh_amt, UInt regNo,
   2756                                    Bool invert )
   2757 {
   2758    vassert(sh_how < 4);
   2759    vassert(sh_amt < (is64 ? 64 : 32));
   2760    IRType ty = is64 ? Ity_I64 : Ity_I32;
   2761    IRTemp t0 = newTemp(ty);
   2762    assign(t0, getIRegOrZR(is64, regNo));
   2763    IRTemp t1 = newTemp(ty);
   2764    switch (sh_how) {
   2765       case BITS2(0,0):
   2766          assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
   2767          break;
   2768       case BITS2(0,1):
   2769          assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
   2770          break;
   2771       case BITS2(1,0):
   2772          assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
   2773          break;
   2774       case BITS2(1,1):
   2775          assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
   2776          break;
   2777       default:
   2778          vassert(0);
   2779    }
   2780    if (invert) {
   2781       IRTemp t2 = newTemp(ty);
   2782       assign(t2, unop(mkNOT(ty), mkexpr(t1)));
   2783       return t2;
   2784    } else {
   2785       return t1;
   2786    }
   2787 }
   2788 
   2789 
   2790 static
   2791 Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres,
   2792                                         UInt insn)
   2793 {
   2794 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   2795 
   2796    /* ------------------- ADD/SUB(reg) ------------------- */
   2797    /* x==0 => 32 bit op      x==1 => 64 bit op
   2798       sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
   2799 
   2800       31 30 29 28    23 21 20 15   9  4
   2801       |  |  |  |     |  |  |  |    |  |
   2802       x  0  0  01011 sh 0  Rm imm6 Rn Rd   ADD  Rd,Rn, sh(Rm,imm6)
   2803       x  0  1  01011 sh 0  Rm imm6 Rn Rd   ADDS Rd,Rn, sh(Rm,imm6)
   2804       x  1  0  01011 sh 0  Rm imm6 Rn Rd   SUB  Rd,Rn, sh(Rm,imm6)
   2805       x  1  1  01011 sh 0  Rm imm6 Rn Rd   SUBS Rd,Rn, sh(Rm,imm6)
   2806    */
   2807    if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
   2808       UInt   bX    = INSN(31,31);
   2809       UInt   bOP   = INSN(30,30); /* 0: ADD, 1: SUB */
   2810       UInt   bS    = INSN(29, 29); /* set flags? */
   2811       UInt   sh    = INSN(23,22);
   2812       UInt   rM    = INSN(20,16);
   2813       UInt   imm6  = INSN(15,10);
   2814       UInt   rN    = INSN(9,5);
   2815       UInt   rD    = INSN(4,0);
   2816       Bool   isSUB = bOP == 1;
   2817       Bool   is64  = bX == 1;
   2818       IRType ty    = is64 ? Ity_I64 : Ity_I32;
   2819       if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) {
   2820          /* invalid; fall through */
   2821       } else {
   2822          IRTemp argL = newTemp(ty);
   2823          assign(argL, getIRegOrZR(is64, rN));
   2824          IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
   2825          IROp   op   = isSUB ? mkSUB(ty) : mkADD(ty);
   2826          IRTemp res  = newTemp(ty);
   2827          assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
   2828          if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
   2829          if (bS) {
   2830             setFlags_ADD_SUB(is64, isSUB, argL, argR);
   2831          }
   2832          DIP("%s%s %s, %s, %s, %s #%u\n",
   2833              bOP ? "sub" : "add", bS ? "s" : "",
   2834              nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
   2835              nameIRegOrZR(is64, rM), nameSH(sh), imm6);
   2836          return True;
   2837       }
   2838    }
   2839 
   2840    /* ------------------- ADC/SBC(reg) ------------------- */
   2841    /* x==0 => 32 bit op      x==1 => 64 bit op
   2842 
   2843       31 30 29 28    23 21 20 15     9  4
   2844       |  |  |  |     |  |  |  |      |  |
   2845       x  0  0  11010 00 0  Rm 000000 Rn Rd   ADC  Rd,Rn,Rm
   2846       x  0  1  11010 00 0  Rm 000000 Rn Rd   ADCS Rd,Rn,Rm
   2847       x  1  0  11010 00 0  Rm 000000 Rn Rd   SBC  Rd,Rn,Rm
   2848       x  1  1  11010 00 0  Rm 000000 Rn Rd   SBCS Rd,Rn,Rm
   2849    */
   2850 
   2851    if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) {
   2852       UInt   bX    = INSN(31,31);
   2853       UInt   bOP   = INSN(30,30); /* 0: ADC, 1: SBC */
   2854       UInt   bS    = INSN(29,29); /* set flags */
   2855       UInt   rM    = INSN(20,16);
   2856       UInt   rN    = INSN(9,5);
   2857       UInt   rD    = INSN(4,0);
   2858 
   2859       Bool   isSUB = bOP == 1;
   2860       Bool   is64  = bX == 1;
   2861       IRType ty    = is64 ? Ity_I64 : Ity_I32;
   2862 
   2863       IRTemp oldC = newTemp(ty);
   2864       assign(oldC,
   2865              is64 ? mk_arm64g_calculate_flag_c()
   2866                   : unop(Iop_64to32, mk_arm64g_calculate_flag_c()) );
   2867 
   2868       IRTemp argL = newTemp(ty);
   2869       assign(argL, getIRegOrZR(is64, rN));
   2870       IRTemp argR = newTemp(ty);
   2871       assign(argR, getIRegOrZR(is64, rM));
   2872 
   2873       IROp   op   = isSUB ? mkSUB(ty) : mkADD(ty);
   2874       IRTemp res  = newTemp(ty);
   2875       if (isSUB) {
   2876          IRExpr* one = is64 ? mkU64(1) : mkU32(1);
   2877          IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32;
   2878          assign(res,
   2879                 binop(op,
   2880                       binop(op, mkexpr(argL), mkexpr(argR)),
   2881                       binop(xorOp, mkexpr(oldC), one)));
   2882       } else {
   2883          assign(res,
   2884                 binop(op,
   2885                       binop(op, mkexpr(argL), mkexpr(argR)),
   2886                       mkexpr(oldC)));
   2887       }
   2888 
   2889       if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
   2890 
   2891       if (bS) {
   2892          setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC);
   2893       }
   2894 
   2895       DIP("%s%s %s, %s, %s\n",
   2896           bOP ? "sbc" : "adc", bS ? "s" : "",
   2897           nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
   2898           nameIRegOrZR(is64, rM));
   2899       return True;
   2900    }
   2901 
   2902    /* -------------------- LOGIC(reg) -------------------- */
   2903    /* x==0 => 32 bit op      x==1 => 64 bit op
   2904       N==0 => inv? is no-op (no inversion)
   2905       N==1 => inv? is NOT
   2906       sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
   2907 
   2908       31 30 28    23 21 20 15   9  4
   2909       |  |  |     |  |  |  |    |  |
   2910       x  00 01010 sh N  Rm imm6 Rn Rd  AND  Rd,Rn, inv?(sh(Rm,imm6))
   2911       x  01 01010 sh N  Rm imm6 Rn Rd  ORR  Rd,Rn, inv?(sh(Rm,imm6))
   2912       x  10 01010 sh N  Rm imm6 Rn Rd  EOR  Rd,Rn, inv?(sh(Rm,imm6))
   2913       x  11 01010 sh N  Rm imm6 Rn Rd  ANDS Rd,Rn, inv?(sh(Rm,imm6))
   2914       With N=1, the names are: BIC ORN EON BICS
   2915    */
   2916    if (INSN(28,24) == BITS5(0,1,0,1,0)) {
   2917       UInt   bX   = INSN(31,31);
   2918       UInt   sh   = INSN(23,22);
   2919       UInt   bN   = INSN(21,21);
   2920       UInt   rM   = INSN(20,16);
   2921       UInt   imm6 = INSN(15,10);
   2922       UInt   rN   = INSN(9,5);
   2923       UInt   rD   = INSN(4,0);
   2924       Bool   is64 = bX == 1;
   2925       IRType ty   = is64 ? Ity_I64 : Ity_I32;
   2926       if (!is64 && imm6 > 31) {
   2927          /* invalid; fall though */
   2928       } else {
   2929          IRTemp argL = newTemp(ty);
   2930          assign(argL, getIRegOrZR(is64, rN));
   2931          IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
   2932          IROp   op   = Iop_INVALID;
   2933          switch (INSN(30,29)) {
   2934             case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
   2935             case BITS2(0,1):                  op = mkOR(ty);  break;
   2936             case BITS2(1,0):                  op = mkXOR(ty); break;
   2937             default: vassert(0);
   2938          }
   2939          IRTemp res = newTemp(ty);
   2940          assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
   2941          if (INSN(30,29) == BITS2(1,1)) {
   2942             setFlags_LOGIC(is64, res);
   2943          }
   2944          putIRegOrZR(is64, rD, mkexpr(res));
   2945 
   2946          static const HChar* names_op[8]
   2947             = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
   2948          vassert(((bN << 2) | INSN(30,29)) < 8);
   2949          const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)];
   2950          /* Special-case the printing of "MOV" */
   2951          if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) {
   2952             DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
   2953                                 nameIRegOrZR(is64, rM));
   2954          } else {
   2955             DIP("%s %s, %s, %s, %s #%u\n", nm_op,
   2956                 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
   2957                 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
   2958          }
   2959          return True;
   2960       }
   2961    }
   2962 
   2963    /* -------------------- {U,S}MULH -------------------- */
   2964    /* 31       23 22 20 15     9   4
   2965       10011011 1  10 Rm 011111 Rn Rd   UMULH Xd,Xn,Xm
   2966       10011011 0  10 Rm 011111 Rn Rd   SMULH Xd,Xn,Xm
   2967    */
   2968    if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
   2969        && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
   2970       Bool isU = INSN(23,23) == 1;
   2971       UInt mm  = INSN(20,16);
   2972       UInt nn  = INSN(9,5);
   2973       UInt dd  = INSN(4,0);
   2974       putIReg64orZR(dd, unop(Iop_128HIto64,
   2975                              binop(isU ? Iop_MullU64 : Iop_MullS64,
   2976                                    getIReg64orZR(nn), getIReg64orZR(mm))));
   2977       DIP("%cmulh %s, %s, %s\n",
   2978           isU ? 'u' : 's',
   2979           nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
   2980       return True;
   2981    }
   2982 
   2983    /* -------------------- M{ADD,SUB} -------------------- */
   2984    /* 31 30           20 15 14 9 4
   2985       sf 00 11011 000 m  0  a  n r   MADD Rd,Rn,Rm,Ra  d = a+m*n
   2986       sf 00 11011 000 m  1  a  n r   MADD Rd,Rn,Rm,Ra  d = a-m*n
   2987    */
   2988    if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
   2989       Bool is64  = INSN(31,31) == 1;
   2990       UInt mm    = INSN(20,16);
   2991       Bool isAdd = INSN(15,15) == 0;
   2992       UInt aa    = INSN(14,10);
   2993       UInt nn    = INSN(9,5);
   2994       UInt dd    = INSN(4,0);
   2995       if (is64) {
   2996          putIReg64orZR(
   2997             dd,
   2998             binop(isAdd ? Iop_Add64 : Iop_Sub64,
   2999                   getIReg64orZR(aa),
   3000                   binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
   3001       } else {
   3002          putIReg32orZR(
   3003             dd,
   3004             binop(isAdd ? Iop_Add32 : Iop_Sub32,
   3005                   getIReg32orZR(aa),
   3006                   binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
   3007       }
   3008       DIP("%s %s, %s, %s, %s\n",
   3009           isAdd ? "madd" : "msub",
   3010           nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
   3011           nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
   3012       return True;
   3013    }
   3014 
   3015    /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
   3016    /* 31 30 28        20 15   11 9  4
   3017       sf 00 1101 0100 mm cond 00 nn dd   CSEL  Rd,Rn,Rm
   3018       sf 00 1101 0100 mm cond 01 nn dd   CSINC Rd,Rn,Rm
   3019       sf 10 1101 0100 mm cond 00 nn dd   CSINV Rd,Rn,Rm
   3020       sf 10 1101 0100 mm cond 01 nn dd   CSNEG Rd,Rn,Rm
   3021       In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
   3022    */
   3023    if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
   3024       Bool    is64 = INSN(31,31) == 1;
   3025       UInt    b30  = INSN(30,30);
   3026       UInt    mm   = INSN(20,16);
   3027       UInt    cond = INSN(15,12);
   3028       UInt    b10  = INSN(10,10);
   3029       UInt    nn   = INSN(9,5);
   3030       UInt    dd   = INSN(4,0);
   3031       UInt    op   = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */
   3032       IRType  ty   = is64 ? Ity_I64 : Ity_I32;
   3033       IRExpr* argL = getIRegOrZR(is64, nn);
   3034       IRExpr* argR = getIRegOrZR(is64, mm);
   3035       switch (op) {
   3036          case BITS2(0,0):
   3037             break;
   3038          case BITS2(0,1):
   3039             argR = binop(mkADD(ty), argR, mkU(ty,1));
   3040             break;
   3041          case BITS2(1,0):
   3042             argR = unop(mkNOT(ty), argR);
   3043             break;
   3044          case BITS2(1,1):
   3045             argR = binop(mkSUB(ty), mkU(ty,0), argR);
   3046             break;
   3047          default:
   3048             vassert(0);
   3049       }
   3050       putIRegOrZR(
   3051          is64, dd,
   3052          IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
   3053                     argL, argR)
   3054       );
   3055       const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
   3056       DIP("%s %s, %s, %s, %s\n", op_nm[op],
   3057           nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
   3058           nameIRegOrZR(is64, mm), nameCC(cond));
   3059       return True;
   3060    }
   3061 
   3062    /* -------------- ADD/SUB(extended reg) -------------- */
   3063    /*     28         20 15  12   9 4
   3064       000 01011 00 1 m  opt imm3 n d   ADD  Wd|SP, Wn|SP, Wm ext&lsld
   3065       100 01011 00 1 m  opt imm3 n d   ADD  Xd|SP, Xn|SP, Rm ext&lsld
   3066 
   3067       001 01011 00 1 m  opt imm3 n d   ADDS Wd,    Wn|SP, Wm ext&lsld
   3068       101 01011 00 1 m  opt imm3 n d   ADDS Xd,    Xn|SP, Rm ext&lsld
   3069 
   3070       010 01011 00 1 m  opt imm3 n d   SUB  Wd|SP, Wn|SP, Wm ext&lsld
   3071       110 01011 00 1 m  opt imm3 n d   SUB  Xd|SP, Xn|SP, Rm ext&lsld
   3072 
   3073       011 01011 00 1 m  opt imm3 n d   SUBS Wd,    Wn|SP, Wm ext&lsld
   3074       111 01011 00 1 m  opt imm3 n d   SUBS Xd,    Xn|SP, Rm ext&lsld
   3075 
   3076       The 'm' operand is extended per opt, thusly:
   3077 
   3078         000   Xm & 0xFF           UXTB
   3079         001   Xm & 0xFFFF         UXTH
   3080         010   Xm & (2^32)-1       UXTW
   3081         011   Xm                  UXTX
   3082 
   3083         100   Xm sx from bit 7    SXTB
   3084         101   Xm sx from bit 15   SXTH
   3085         110   Xm sx from bit 31   SXTW
   3086         111   Xm                  SXTX
   3087 
   3088       In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
   3089       operation on Xm.  In the 32 bit case, UXTW, UXTX, SXTW and SXTX
   3090       are the identity operation on Wm.
   3091 
   3092       After extension, the value is shifted left by imm3 bits, which
   3093       may only be in the range 0 .. 4 inclusive.
   3094    */
   3095    if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
   3096       Bool is64  = INSN(31,31) == 1;
   3097       Bool isSub = INSN(30,30) == 1;
   3098       Bool setCC = INSN(29,29) == 1;
   3099       UInt mm    = INSN(20,16);
   3100       UInt opt   = INSN(15,13);
   3101       UInt imm3  = INSN(12,10);
   3102       UInt nn    = INSN(9,5);
   3103       UInt dd    = INSN(4,0);
   3104       const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
   3105                                   "sxtb", "sxth", "sxtw", "sxtx" };
   3106       /* Do almost the same thing in the 32- and 64-bit cases. */
   3107       IRTemp xN = newTemp(Ity_I64);
   3108       IRTemp xM = newTemp(Ity_I64);
   3109       assign(xN, getIReg64orSP(nn));
   3110       assign(xM, getIReg64orZR(mm));
   3111       IRExpr* xMw  = mkexpr(xM); /* "xM widened" */
   3112       Int     shSX = 0;
   3113       /* widen Xm .. */
   3114       switch (opt) {
   3115          case BITS3(0,0,0): // UXTB
   3116             xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
   3117          case BITS3(0,0,1): // UXTH
   3118             xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
   3119          case BITS3(0,1,0): // UXTW -- noop for the 32bit case
   3120             if (is64) {
   3121                xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
   3122             }
   3123             break;
   3124          case BITS3(0,1,1): // UXTX -- always a noop
   3125             break;
   3126          case BITS3(1,0,0): // SXTB
   3127             shSX = 56; goto sxTo64;
   3128          case BITS3(1,0,1): // SXTH
   3129             shSX = 48; goto sxTo64;
   3130          case BITS3(1,1,0): // SXTW -- noop for the 32bit case
   3131             if (is64) {
   3132                shSX = 32; goto sxTo64;
   3133             }
   3134             break;
   3135          case BITS3(1,1,1): // SXTX -- always a noop
   3136             break;
   3137          sxTo64:
   3138             vassert(shSX >= 32);
   3139             xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
   3140                         mkU8(shSX));
   3141             break;
   3142          default:
   3143             vassert(0);
   3144       }
   3145       /* and now shift */
   3146       IRTemp argL = xN;
   3147       IRTemp argR = newTemp(Ity_I64);
   3148       assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
   3149       IRTemp res = newTemp(Ity_I64);
   3150       assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
   3151                         mkexpr(argL), mkexpr(argR)));
   3152       if (is64) {
   3153          if (setCC) {
   3154             putIReg64orZR(dd, mkexpr(res));
   3155             setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
   3156          } else {
   3157             putIReg64orSP(dd, mkexpr(res));
   3158          }
   3159       } else {
   3160          if (setCC) {
   3161             IRTemp argL32 = newTemp(Ity_I32);
   3162             IRTemp argR32 = newTemp(Ity_I32);
   3163             putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
   3164             assign(argL32, unop(Iop_64to32, mkexpr(argL)));
   3165             assign(argR32, unop(Iop_64to32, mkexpr(argR)));
   3166             setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32);
   3167          } else {
   3168             putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
   3169          }
   3170       }
   3171       DIP("%s%s %s, %s, %s %s lsl %u\n",
   3172           isSub ? "sub" : "add", setCC ? "s" : "",
   3173           setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
   3174           nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
   3175           nameExt[opt], imm3);
   3176       return True;
   3177    }
   3178 
   3179    /* ---------------- CCMP/CCMN(imm) ---------------- */
   3180    /* Bizarrely, these appear in the "data processing register"
   3181       category, even though they are operations against an
   3182       immediate. */
   3183    /* 31   29        20   15   11 9    3
   3184       sf 1 111010010 imm5 cond 10 Rn 0 nzcv   CCMP Rn, #imm5, #nzcv, cond
   3185       sf 0 111010010 imm5 cond 10 Rn 0 nzcv   CCMN Rn, #imm5, #nzcv, cond
   3186 
   3187       Operation is:
   3188          (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
   3189          (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
   3190    */
   3191    if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
   3192        && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
   3193       Bool is64  = INSN(31,31) == 1;
   3194       Bool isSUB = INSN(30,30) == 1;
   3195       UInt imm5  = INSN(20,16);
   3196       UInt cond  = INSN(15,12);
   3197       UInt nn    = INSN(9,5);
   3198       UInt nzcv  = INSN(3,0);
   3199 
   3200       IRTemp condT = newTemp(Ity_I1);
   3201       assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
   3202 
   3203       IRType ty   = is64 ? Ity_I64 : Ity_I32;
   3204       IRTemp argL = newTemp(ty);
   3205       IRTemp argR = newTemp(ty);
   3206 
   3207       if (is64) {
   3208          assign(argL, getIReg64orZR(nn));
   3209          assign(argR, mkU64(imm5));
   3210       } else {
   3211          assign(argL, getIReg32orZR(nn));
   3212          assign(argR, mkU32(imm5));
   3213       }
   3214       setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
   3215 
   3216       DIP("ccm%c %s, #%u, #%u, %s\n",
   3217           isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
   3218           imm5, nzcv, nameCC(cond));
   3219       return True;
   3220    }
   3221 
   3222    /* ---------------- CCMP/CCMN(reg) ---------------- */
   3223    /* 31   29        20 15   11 9    3
   3224       sf 1 111010010 Rm cond 00 Rn 0 nzcv   CCMP Rn, Rm, #nzcv, cond
   3225       sf 0 111010010 Rm cond 00 Rn 0 nzcv   CCMN Rn, Rm, #nzcv, cond
   3226       Operation is:
   3227          (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
   3228          (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
   3229    */
   3230    if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
   3231        && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
   3232       Bool is64  = INSN(31,31) == 1;
   3233       Bool isSUB = INSN(30,30) == 1;
   3234       UInt mm    = INSN(20,16);
   3235       UInt cond  = INSN(15,12);
   3236       UInt nn    = INSN(9,5);
   3237       UInt nzcv  = INSN(3,0);
   3238 
   3239       IRTemp condT = newTemp(Ity_I1);
   3240       assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
   3241 
   3242       IRType ty   = is64 ? Ity_I64 : Ity_I32;
   3243       IRTemp argL = newTemp(ty);
   3244       IRTemp argR = newTemp(ty);
   3245 
   3246       if (is64) {
   3247          assign(argL, getIReg64orZR(nn));
   3248          assign(argR, getIReg64orZR(mm));
   3249       } else {
   3250          assign(argL, getIReg32orZR(nn));
   3251          assign(argR, getIReg32orZR(mm));
   3252       }
   3253       setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
   3254 
   3255       DIP("ccm%c %s, %s, #%u, %s\n",
   3256           isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
   3257           nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
   3258       return True;
   3259    }
   3260 
   3261 
   3262    /* -------------- REV/REV16/REV32/RBIT -------------- */
   3263    /* 31 30 28       20    15   11 9 4
   3264 
   3265       1  10 11010110 00000 0000 11 n d    (1) REV   Xd, Xn
   3266       0  10 11010110 00000 0000 10 n d    (2) REV   Wd, Wn
   3267 
   3268       1  10 11010110 00000 0000 00 n d    (3) RBIT  Xd, Xn
   3269       0  10 11010110 00000 0000 00 n d    (4) RBIT  Wd, Wn
   3270 
   3271       1  10 11010110 00000 0000 01 n d    (5) REV16 Xd, Xn
   3272       0  10 11010110 00000 0000 01 n d    (6) REV16 Wd, Wn
   3273 
   3274       1  10 11010110 00000 0000 10 n d    (7) REV32 Xd, Xn
   3275    */
   3276    if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
   3277        && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
   3278       UInt b31 = INSN(31,31);
   3279       UInt opc = INSN(11,10);
   3280 
   3281       UInt ix = 0;
   3282       /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
   3283       else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
   3284       else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
   3285       else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
   3286       else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
   3287       else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
   3288       else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
   3289       if (ix >= 1 && ix <= 7) {
   3290          Bool   is64  = ix == 1 || ix == 3 || ix == 5 || ix == 7;
   3291          UInt   nn    = INSN(9,5);
   3292          UInt   dd    = INSN(4,0);
   3293          IRTemp src   = newTemp(Ity_I64);
   3294          IRTemp dst   = IRTemp_INVALID;
   3295          IRTemp (*math)(IRTemp) = NULL;
   3296          switch (ix) {
   3297             case 1: case 2: math = math_BYTESWAP64;   break;
   3298             case 3: case 4: math = math_BITSWAP64;    break;
   3299             case 5: case 6: math = math_USHORTSWAP64; break;
   3300             case 7:         math = math_UINTSWAP64;   break;
   3301             default: vassert(0);
   3302          }
   3303          const HChar* names[7]
   3304            = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
   3305          const HChar* nm = names[ix-1];
   3306          vassert(math);
   3307          if (ix == 6) {
   3308             /* This has to be special cased, since the logic below doesn't
   3309                handle it correctly. */
   3310             assign(src, getIReg64orZR(nn));
   3311             dst = math(src);
   3312             putIReg64orZR(dd,
   3313                           unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
   3314          } else if (is64) {
   3315             assign(src, getIReg64orZR(nn));
   3316             dst = math(src);
   3317             putIReg64orZR(dd, mkexpr(dst));
   3318          } else {
   3319             assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
   3320             dst = math(src);
   3321             putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
   3322          }
   3323          DIP("%s %s, %s\n", nm,
   3324              nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
   3325          return True;
   3326       }
   3327       /* else fall through */
   3328    }
   3329 
   3330    /* -------------------- CLZ/CLS -------------------- */
   3331    /*    30 28   24   20    15      9 4
   3332       sf 10 1101 0110 00000 00010 0 n d    CLZ Rd, Rn
   3333       sf 10 1101 0110 00000 00010 1 n d    CLS Rd, Rn
   3334    */
   3335    if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
   3336        && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
   3337       Bool   is64  = INSN(31,31) == 1;
   3338       Bool   isCLS = INSN(10,10) == 1;
   3339       UInt   nn    = INSN(9,5);
   3340       UInt   dd    = INSN(4,0);
   3341       IRTemp src   = newTemp(Ity_I64);
   3342       IRTemp srcZ  = newTemp(Ity_I64);
   3343       IRTemp dst   = newTemp(Ity_I64);
   3344       /* Get the argument, widened out to 64 bit */
   3345       if (is64) {
   3346          assign(src, getIReg64orZR(nn));
   3347       } else {
   3348          assign(src, binop(Iop_Shl64,
   3349                            unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
   3350       }
   3351       /* If this is CLS, mash the arg around accordingly */
   3352       if (isCLS) {
   3353          IRExpr* one = mkU8(1);
   3354          assign(srcZ,
   3355          binop(Iop_Xor64,
   3356                binop(Iop_Shl64, mkexpr(src), one),
   3357                binop(Iop_Shl64, binop(Iop_Shr64, mkexpr(src), one), one)));
   3358       } else {
   3359          assign(srcZ, mkexpr(src));
   3360       }
   3361       /* And compute CLZ. */
   3362       if (is64) {
   3363          assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
   3364                                 mkU64(isCLS ? 63 : 64),
   3365                                 unop(Iop_Clz64, mkexpr(srcZ))));
   3366          putIReg64orZR(dd, mkexpr(dst));
   3367       } else {
   3368          assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
   3369                                 mkU64(isCLS ? 31 : 32),
   3370                                 unop(Iop_Clz64, mkexpr(srcZ))));
   3371          putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
   3372       }
   3373       DIP("cl%c %s, %s\n", isCLS ? 's' : 'z',
   3374           nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
   3375       return True;
   3376    }
   3377 
   3378    /* ------------------ LSLV/LSRV/ASRV/RORV ------------------ */
   3379    /*    30 28        20 15   11 9 4
   3380       sf 00 1101 0110 m  0010 00 n d   LSLV Rd,Rn,Rm
   3381       sf 00 1101 0110 m  0010 01 n d   LSRV Rd,Rn,Rm
   3382       sf 00 1101 0110 m  0010 10 n d   ASRV Rd,Rn,Rm
   3383       sf 00 1101 0110 m  0010 11 n d   RORV Rd,Rn,Rm
   3384    */
   3385    if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
   3386        && INSN(15,12) == BITS4(0,0,1,0)) {
   3387       Bool   is64 = INSN(31,31) == 1;
   3388       UInt   mm   = INSN(20,16);
   3389       UInt   op   = INSN(11,10);
   3390       UInt   nn   = INSN(9,5);
   3391       UInt   dd   = INSN(4,0);
   3392       IRType ty   = is64 ? Ity_I64 : Ity_I32;
   3393       IRTemp srcL = newTemp(ty);
   3394       IRTemp srcR = newTemp(Ity_I64);
   3395       IRTemp res  = newTemp(ty);
   3396       IROp   iop  = Iop_INVALID;
   3397       assign(srcL, getIRegOrZR(is64, nn));
   3398       assign(srcR, binop(Iop_And64, getIReg64orZR(mm),
   3399                                     mkU64(is64 ? 63 : 31)));
   3400       if (op < 3) {
   3401          // LSLV, LSRV, ASRV
   3402          switch (op) {
   3403             case BITS2(0,0): iop = mkSHL(ty); break;
   3404             case BITS2(0,1): iop = mkSHR(ty); break;
   3405             case BITS2(1,0): iop = mkSAR(ty); break;
   3406             default: vassert(0);
   3407          }
   3408          assign(res, binop(iop, mkexpr(srcL),
   3409                                 unop(Iop_64to8, mkexpr(srcR))));
   3410       } else {
   3411          // RORV
   3412          IROp opSHL = mkSHL(ty);
   3413          IROp opSHR = mkSHR(ty);
   3414          IROp opOR  = mkOR(ty);
   3415          IRExpr* width = mkU64(is64 ? 64: 32);
   3416          assign(
   3417             res,
   3418             IRExpr_ITE(
   3419                binop(Iop_CmpEQ64, mkexpr(srcR), mkU64(0)),
   3420                mkexpr(srcL),
   3421                binop(opOR,
   3422                      binop(opSHL,
   3423                            mkexpr(srcL),
   3424                            unop(Iop_64to8, binop(Iop_Sub64, width,
   3425                                                             mkexpr(srcR)))),
   3426                      binop(opSHR,
   3427                            mkexpr(srcL), unop(Iop_64to8, mkexpr(srcR))))
   3428          ));
   3429       }
   3430       putIRegOrZR(is64, dd, mkexpr(res));
   3431       vassert(op < 4);
   3432       const HChar* names[4] = { "lslv", "lsrv", "asrv", "rorv" };
   3433       DIP("%s %s, %s, %s\n",
   3434           names[op], nameIRegOrZR(is64,dd),
   3435                      nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
   3436       return True;
   3437    }
   3438 
   3439    /* -------------------- SDIV/UDIV -------------------- */
   3440    /*    30 28        20 15    10 9 4
   3441       sf 00 1101 0110 m  00001  1 n d  SDIV Rd,Rn,Rm
   3442       sf 00 1101 0110 m  00001  0 n d  UDIV Rd,Rn,Rm
   3443    */
   3444    if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
   3445        && INSN(15,11) == BITS5(0,0,0,0,1)) {
   3446       Bool is64 = INSN(31,31) == 1;
   3447       UInt mm   = INSN(20,16);
   3448       Bool isS  = INSN(10,10) == 1;
   3449       UInt nn   = INSN(9,5);
   3450       UInt dd   = INSN(4,0);
   3451       if (isS) {
   3452          putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
   3453                                      getIRegOrZR(is64, nn),
   3454                                      getIRegOrZR(is64, mm)));
   3455       } else {
   3456          putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
   3457                                      getIRegOrZR(is64, nn),
   3458                                      getIRegOrZR(is64, mm)));
   3459       }
   3460       DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
   3461           nameIRegOrZR(is64, dd),
   3462           nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
   3463       return True;
   3464    }
   3465 
   3466    /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
   3467    /* 31        23  20 15 14 9 4
   3468       1001 1011 101 m  0  a  n d   UMADDL Xd,Wn,Wm,Xa
   3469       1001 1011 001 m  0  a  n d   SMADDL Xd,Wn,Wm,Xa
   3470       1001 1011 101 m  1  a  n d   UMSUBL Xd,Wn,Wm,Xa
   3471       1001 1011 001 m  1  a  n d   SMSUBL Xd,Wn,Wm,Xa
   3472       with operation
   3473          Xd = Xa +/- (Wn *u/s Wm)
   3474    */
   3475    if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
   3476       Bool   isU   = INSN(23,23) == 1;
   3477       UInt   mm    = INSN(20,16);
   3478       Bool   isAdd = INSN(15,15) == 0;
   3479       UInt   aa    = INSN(14,10);
   3480       UInt   nn    = INSN(9,5);
   3481       UInt   dd    = INSN(4,0);
   3482       IRTemp wN    = newTemp(Ity_I32);
   3483       IRTemp wM    = newTemp(Ity_I32);
   3484       IRTemp xA    = newTemp(Ity_I64);
   3485       IRTemp muld  = newTemp(Ity_I64);
   3486       IRTemp res   = newTemp(Ity_I64);
   3487       assign(wN, getIReg32orZR(nn));
   3488       assign(wM, getIReg32orZR(mm));
   3489       assign(xA, getIReg64orZR(aa));
   3490       assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
   3491                          mkexpr(wN), mkexpr(wM)));
   3492       assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
   3493                         mkexpr(xA), mkexpr(muld)));
   3494       putIReg64orZR(dd, mkexpr(res));
   3495       DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
   3496           nameIReg64orZR(dd), nameIReg32orZR(nn),
   3497           nameIReg32orZR(mm), nameIReg64orZR(aa));
   3498       return True;
   3499    }
   3500    vex_printf("ARM64 front end: data_processing_register\n");
   3501    return False;
   3502 #  undef INSN
   3503 }
   3504 
   3505 
   3506 /*------------------------------------------------------------*/
   3507 /*--- Math helpers for vector interleave/deinterleave      ---*/
   3508 /*------------------------------------------------------------*/
   3509 
   3510 #define EX(_tmp) \
   3511            mkexpr(_tmp)
   3512 #define SL(_hi128,_lo128,_nbytes) \
   3513            ( (_nbytes) == 0 \
   3514                 ? (_lo128) \
   3515                 : triop(Iop_SliceV128,(_hi128),(_lo128),mkU8(_nbytes)) )
   3516 #define ROR(_v128,_nbytes) \
   3517            SL((_v128),(_v128),(_nbytes))
   3518 #define ROL(_v128,_nbytes) \
   3519            SL((_v128),(_v128),16-(_nbytes))
   3520 #define SHR(_v128,_nbytes) \
   3521            binop(Iop_ShrV128,(_v128),mkU8(8*(_nbytes)))
   3522 #define SHL(_v128,_nbytes) \
   3523            binop(Iop_ShlV128,(_v128),mkU8(8*(_nbytes)))
   3524 #define ILO64x2(_argL,_argR) \
   3525            binop(Iop_InterleaveLO64x2,(_argL),(_argR))
   3526 #define IHI64x2(_argL,_argR) \
   3527            binop(Iop_InterleaveHI64x2,(_argL),(_argR))
   3528 #define ILO32x4(_argL,_argR) \
   3529            binop(Iop_InterleaveLO32x4,(_argL),(_argR))
   3530 #define IHI32x4(_argL,_argR) \
   3531            binop(Iop_InterleaveHI32x4,(_argL),(_argR))
   3532 #define ILO16x8(_argL,_argR) \
   3533            binop(Iop_InterleaveLO16x8,(_argL),(_argR))
   3534 #define IHI16x8(_argL,_argR) \
   3535            binop(Iop_InterleaveHI16x8,(_argL),(_argR))
   3536 #define ILO8x16(_argL,_argR) \
   3537            binop(Iop_InterleaveLO8x16,(_argL),(_argR))
   3538 #define IHI8x16(_argL,_argR) \
   3539            binop(Iop_InterleaveHI8x16,(_argL),(_argR))
   3540 #define CEV32x4(_argL,_argR) \
   3541            binop(Iop_CatEvenLanes32x4,(_argL),(_argR))
   3542 #define COD32x4(_argL,_argR) \
   3543            binop(Iop_CatOddLanes32x4,(_argL),(_argR))
   3544 #define COD16x8(_argL,_argR) \
   3545            binop(Iop_CatOddLanes16x8,(_argL),(_argR))
   3546 #define COD8x16(_argL,_argR) \
   3547            binop(Iop_CatOddLanes8x16,(_argL),(_argR))
   3548 #define CEV8x16(_argL,_argR) \
   3549            binop(Iop_CatEvenLanes8x16,(_argL),(_argR))
   3550 #define AND(_arg1,_arg2) \
   3551            binop(Iop_AndV128,(_arg1),(_arg2))
   3552 #define OR2(_arg1,_arg2) \
   3553            binop(Iop_OrV128,(_arg1),(_arg2))
   3554 #define OR3(_arg1,_arg2,_arg3) \
   3555            binop(Iop_OrV128,(_arg1),binop(Iop_OrV128,(_arg2),(_arg3)))
   3556 #define OR4(_arg1,_arg2,_arg3,_arg4) \
   3557            binop(Iop_OrV128, \
   3558                  binop(Iop_OrV128,(_arg1),(_arg2)), \
   3559                  binop(Iop_OrV128,(_arg3),(_arg4)))
   3560 
   3561 
   3562 /* Do interleaving for 1 128 bit vector, for ST1 insns. */
   3563 static
   3564 void math_INTERLEAVE1_128( /*OUTx1*/ IRTemp* i0,
   3565                            UInt laneSzBlg2, IRTemp u0 )
   3566 {
   3567    assign(*i0, mkexpr(u0));
   3568 }
   3569 
   3570 
   3571 /* Do interleaving for 2 128 bit vectors, for ST2 insns. */
   3572 static
   3573 void math_INTERLEAVE2_128( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
   3574                            UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
   3575 {
   3576    /* This is pretty easy, since we have primitives directly to
   3577       hand. */
   3578    if (laneSzBlg2 == 3) {
   3579       // 64x2
   3580       // u1 == B1 B0, u0 == A1 A0
   3581       // i1 == B1 A1, i0 == B0 A0
   3582       assign(*i0, binop(Iop_InterleaveLO64x2, mkexpr(u1), mkexpr(u0)));
   3583       assign(*i1, binop(Iop_InterleaveHI64x2, mkexpr(u1), mkexpr(u0)));
   3584       return;
   3585    }
   3586    if (laneSzBlg2 == 2) {
   3587       // 32x4
   3588       // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
   3589       // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
   3590       assign(*i0, binop(Iop_InterleaveLO32x4, mkexpr(u1), mkexpr(u0)));
   3591       assign(*i1, binop(Iop_InterleaveHI32x4, mkexpr(u1), mkexpr(u0)));
   3592       return;
   3593    }
   3594    if (laneSzBlg2 == 1) {
   3595       // 16x8
   3596       // u1 == B{7..0}, u0 == A{7..0}
   3597       // i0 == B3 A3 B2 A2 B1 A1 B0 A0
   3598       // i1 == B7 A7 B6 A6 B5 A5 B4 A4
   3599       assign(*i0, binop(Iop_InterleaveLO16x8, mkexpr(u1), mkexpr(u0)));
   3600       assign(*i1, binop(Iop_InterleaveHI16x8, mkexpr(u1), mkexpr(u0)));
   3601       return;
   3602    }
   3603    if (laneSzBlg2 == 0) {
   3604       // 8x16
   3605       // u1 == B{f..0}, u0 == A{f..0}
   3606       // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
   3607       // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
   3608       assign(*i0, binop(Iop_InterleaveLO8x16, mkexpr(u1), mkexpr(u0)));
   3609       assign(*i1, binop(Iop_InterleaveHI8x16, mkexpr(u1), mkexpr(u0)));
   3610       return;
   3611    }
   3612    /*NOTREACHED*/
   3613    vassert(0);
   3614 }
   3615 
   3616 
   3617 /* Do interleaving for 3 128 bit vectors, for ST3 insns. */
   3618 static
   3619 void math_INTERLEAVE3_128(
   3620         /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
   3621         UInt laneSzBlg2,
   3622         IRTemp u0, IRTemp u1, IRTemp u2 )
   3623 {
   3624    if (laneSzBlg2 == 3) {
   3625       // 64x2
   3626       // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
   3627       // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
   3628       assign(*i2, IHI64x2( EX(u2), EX(u1) ));
   3629       assign(*i1, ILO64x2( ROR(EX(u0),8), EX(u2) ));
   3630       assign(*i0, ILO64x2( EX(u1), EX(u0) ));
   3631       return;
   3632    }
   3633 
   3634    if (laneSzBlg2 == 2) {
   3635       // 32x4
   3636       // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
   3637       // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
   3638       // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
   3639       IRTemp p0    = newTempV128();
   3640       IRTemp p1    = newTempV128();
   3641       IRTemp p2    = newTempV128();
   3642       IRTemp c1100 = newTempV128();
   3643       IRTemp c0011 = newTempV128();
   3644       IRTemp c0110 = newTempV128();
   3645       assign(c1100, mkV128(0xFF00));
   3646       assign(c0011, mkV128(0x00FF));
   3647       assign(c0110, mkV128(0x0FF0));
   3648       // First interleave them at 64x2 granularity,
   3649       // generating partial ("p") values.
   3650       math_INTERLEAVE3_128(&p0, &p1, &p2, 3, u0, u1, u2);
   3651       // And more shuffling around for the final answer
   3652       assign(*i2, OR2( AND( IHI32x4(EX(p2), ROL(EX(p2),8)), EX(c1100) ),
   3653                        AND( IHI32x4(ROR(EX(p1),4), EX(p2)), EX(c0011) ) ));
   3654       assign(*i1, OR3( SHL(EX(p2),12),
   3655                        AND(EX(p1),EX(c0110)),
   3656                        SHR(EX(p0),12) ));
   3657       assign(*i0, OR2( AND( ILO32x4(EX(p0),ROL(EX(p1),4)), EX(c1100) ),
   3658                        AND( ILO32x4(ROR(EX(p0),8),EX(p0)), EX(c0011) ) ));
   3659       return;
   3660    }
   3661 
   3662    if (laneSzBlg2 == 1) {
   3663       // 16x8
   3664       // u2 == C7 C6 C5 C4 C3 C2 C1 C0
   3665       // u1 == B7 B6 B5 B4 B3 B2 B1 B0
   3666       // u0 == A7 A6 A5 A4 A3 A2 A1 A0
   3667       //
   3668       // p2 == C7 C6 B7 B6 A7 A6 C5 C4
   3669       // p1 == B5 B4 A5 A4 C3 C2 B3 B2
   3670       // p0 == A3 A2 C1 C0 B1 B0 A1 A0
   3671       //
   3672       // i2 == C7 B7 A7 C6 B6 A6 C5 B5
   3673       // i1 == A5 C4 B4 A4 C4 B3 A3 C2
   3674       // i0 == B2 A2 C1 B1 A1 C0 B0 A0
   3675       IRTemp p0    = newTempV128();
   3676       IRTemp p1    = newTempV128();
   3677       IRTemp p2    = newTempV128();
   3678       IRTemp c1000 = newTempV128();
   3679       IRTemp c0100 = newTempV128();
   3680       IRTemp c0010 = newTempV128();
   3681       IRTemp c0001 = newTempV128();
   3682       assign(c1000, mkV128(0xF000));
   3683       assign(c0100, mkV128(0x0F00));
   3684       assign(c0010, mkV128(0x00F0));
   3685       assign(c0001, mkV128(0x000F));
   3686       // First interleave them at 32x4 granularity,
   3687       // generating partial ("p") values.
   3688       math_INTERLEAVE3_128(&p0, &p1, &p2, 2, u0, u1, u2);
   3689       // And more shuffling around for the final answer
   3690       assign(*i2,
   3691              OR4( AND( IHI16x8( EX(p2),        ROL(EX(p2),4) ), EX(c1000) ),
   3692                   AND( IHI16x8( ROL(EX(p2),6), EX(p2)        ), EX(c0100) ),
   3693                   AND( IHI16x8( ROL(EX(p2),2), ROL(EX(p2),6) ), EX(c0010) ),
   3694                   AND( ILO16x8( ROR(EX(p2),2), ROL(EX(p1),2) ), EX(c0001) )
   3695       ));
   3696       assign(*i1,
   3697              OR4( AND( IHI16x8( ROL(EX(p1),4), ROR(EX(p2),2) ), EX(c1000) ),
   3698                   AND( IHI16x8( EX(p1),        ROL(EX(p1),4) ), EX(c0100) ),
   3699                   AND( IHI16x8( ROL(EX(p1),4), ROL(EX(p1),8) ), EX(c0010) ),
   3700                   AND( IHI16x8( ROR(EX(p0),6), ROL(EX(p1),4) ), EX(c0001) )
   3701       ));
   3702       assign(*i0,
   3703              OR4( AND( IHI16x8( ROR(EX(p1),2), ROL(EX(p0),2) ), EX(c1000) ),
   3704                   AND( IHI16x8( ROL(EX(p0),2), ROL(EX(p0),6) ), EX(c0100) ),
   3705                   AND( IHI16x8( ROL(EX(p0),8), ROL(EX(p0),2) ), EX(c0010) ),
   3706                   AND( IHI16x8( ROL(EX(p0),4), ROL(EX(p0),8) ), EX(c0001) )
   3707       ));
   3708       return;
   3709    }
   3710 
   3711    if (laneSzBlg2 == 0) {
   3712       // 8x16.  It doesn't seem worth the hassle of first doing a
   3713       // 16x8 interleave, so just generate all 24 partial results
   3714       // directly :-(
   3715       // u2 == Cf .. C0, u1 == Bf .. B0, u0 == Af .. A0
   3716       // i2 == Cf Bf Af Ce .. Bb Ab Ca
   3717       // i1 == Ba Aa C9 B9 .. A6 C5 B5
   3718       // i0 == A5 C4 B4 A4 .. C0 B0 A0
   3719 
   3720       IRTemp i2_FEDC = newTempV128(); IRTemp i2_BA98 = newTempV128();
   3721       IRTemp i2_7654 = newTempV128(); IRTemp i2_3210 = newTempV128();
   3722       IRTemp i1_FEDC = newTempV128(); IRTemp i1_BA98 = newTempV128();
   3723       IRTemp i1_7654 = newTempV128(); IRTemp i1_3210 = newTempV128();
   3724       IRTemp i0_FEDC = newTempV128(); IRTemp i0_BA98 = newTempV128();
   3725       IRTemp i0_7654 = newTempV128(); IRTemp i0_3210 = newTempV128();
   3726       IRTemp i2_hi64 = newTempV128(); IRTemp i2_lo64 = newTempV128();
   3727       IRTemp i1_hi64 = newTempV128(); IRTemp i1_lo64 = newTempV128();
   3728       IRTemp i0_hi64 = newTempV128(); IRTemp i0_lo64 = newTempV128();
   3729 
   3730       // eg XXXX(qqq, CC, 0xF, BB, 0xA)) sets qqq to be a vector
   3731       // of the form 14 bytes junk : CC[0xF] : BB[0xA]
   3732       //
   3733 #     define XXXX(_tempName,_srcVec1,_srcShift1,_srcVec2,_srcShift2) \
   3734          IRTemp t_##_tempName = newTempV128(); \
   3735          assign(t_##_tempName, \
   3736                 ILO8x16( ROR(EX(_srcVec1),(_srcShift1)), \
   3737                          ROR(EX(_srcVec2),(_srcShift2)) ) )
   3738 
   3739       // Let CC, BB, AA be (handy) aliases of u2, u1, u0 respectively
   3740       IRTemp CC = u2; IRTemp BB = u1; IRTemp AA = u0;
   3741 
   3742       // The slicing and reassembly are done as interleavedly as possible,
   3743       // so as to minimise the demand for registers in the back end, which
   3744       // was observed to be a problem in testing.
   3745 
   3746       XXXX(CfBf, CC, 0xf, BB, 0xf); // i2[15:14]
   3747       XXXX(AfCe, AA, 0xf, CC, 0xe);
   3748       assign(i2_FEDC, ILO16x8(EX(t_CfBf), EX(t_AfCe)));
   3749 
   3750       XXXX(BeAe, BB, 0xe, AA, 0xe);
   3751       XXXX(CdBd, CC, 0xd, BB, 0xd);
   3752       assign(i2_BA98, ILO16x8(EX(t_BeAe), EX(t_CdBd)));
   3753       assign(i2_hi64, ILO32x4(EX(i2_FEDC), EX(i2_BA98)));
   3754 
   3755       XXXX(AdCc, AA, 0xd, CC, 0xc);
   3756       XXXX(BcAc, BB, 0xc, AA, 0xc);
   3757       assign(i2_7654, ILO16x8(EX(t_AdCc), EX(t_BcAc)));
   3758 
   3759       XXXX(CbBb, CC, 0xb, BB, 0xb);
   3760       XXXX(AbCa, AA, 0xb, CC, 0xa); // i2[1:0]
   3761       assign(i2_3210, ILO16x8(EX(t_CbBb), EX(t_AbCa)));
   3762       assign(i2_lo64, ILO32x4(EX(i2_7654), EX(i2_3210)));
   3763       assign(*i2, ILO64x2(EX(i2_hi64), EX(i2_lo64)));
   3764 
   3765       XXXX(BaAa, BB, 0xa, AA, 0xa); // i1[15:14]
   3766       XXXX(C9B9, CC, 0x9, BB, 0x9);
   3767       assign(i1_FEDC, ILO16x8(EX(t_BaAa), EX(t_C9B9)));
   3768 
   3769       XXXX(A9C8, AA, 0x9, CC, 0x8);
   3770       XXXX(B8A8, BB, 0x8, AA, 0x8);
   3771       assign(i1_BA98, ILO16x8(EX(t_A9C8), EX(t_B8A8)));
   3772       assign(i1_hi64, ILO32x4(EX(i1_FEDC), EX(i1_BA98)));
   3773 
   3774       XXXX(C7B7, CC, 0x7, BB, 0x7);
   3775       XXXX(A7C6, AA, 0x7, CC, 0x6);
   3776       assign(i1_7654, ILO16x8(EX(t_C7B7), EX(t_A7C6)));
   3777 
   3778       XXXX(B6A6, BB, 0x6, AA, 0x6);
   3779       XXXX(C5B5, CC, 0x5, BB, 0x5); // i1[1:0]
   3780       assign(i1_3210, ILO16x8(EX(t_B6A6), EX(t_C5B5)));
   3781       assign(i1_lo64, ILO32x4(EX(i1_7654), EX(i1_3210)));
   3782       assign(*i1, ILO64x2(EX(i1_hi64), EX(i1_lo64)));
   3783 
   3784       XXXX(A5C4, AA, 0x5, CC, 0x4); // i0[15:14]
   3785       XXXX(B4A4, BB, 0x4, AA, 0x4);
   3786       assign(i0_FEDC, ILO16x8(EX(t_A5C4), EX(t_B4A4)));
   3787 
   3788       XXXX(C3B3, CC, 0x3, BB, 0x3);
   3789       XXXX(A3C2, AA, 0x3, CC, 0x2);
   3790       assign(i0_BA98, ILO16x8(EX(t_C3B3), EX(t_A3C2)));
   3791       assign(i0_hi64, ILO32x4(EX(i0_FEDC), EX(i0_BA98)));
   3792 
   3793       XXXX(B2A2, BB, 0x2, AA, 0x2);
   3794       XXXX(C1B1, CC, 0x1, BB, 0x1);
   3795       assign(i0_7654, ILO16x8(EX(t_B2A2), EX(t_C1B1)));
   3796 
   3797       XXXX(A1C0, AA, 0x1, CC, 0x0);
   3798       XXXX(B0A0, BB, 0x0, AA, 0x0); // i0[1:0]
   3799       assign(i0_3210, ILO16x8(EX(t_A1C0), EX(t_B0A0)));
   3800       assign(i0_lo64, ILO32x4(EX(i0_7654), EX(i0_3210)));
   3801       assign(*i0, ILO64x2(EX(i0_hi64), EX(i0_lo64)));
   3802 
   3803 #     undef XXXX
   3804       return;
   3805    }
   3806 
   3807    /*NOTREACHED*/
   3808    vassert(0);
   3809 }
   3810 
   3811 
   3812 /* Do interleaving for 4 128 bit vectors, for ST4 insns. */
   3813 static
   3814 void math_INTERLEAVE4_128(
   3815         /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
   3816         UInt laneSzBlg2,
   3817         IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
   3818 {
   3819    if (laneSzBlg2 == 3) {
   3820       // 64x2
   3821       assign(*i0, ILO64x2(EX(u1), EX(u0)));
   3822       assign(*i1, ILO64x2(EX(u3), EX(u2)));
   3823       assign(*i2, IHI64x2(EX(u1), EX(u0)));
   3824       assign(*i3, IHI64x2(EX(u3), EX(u2)));
   3825       return;
   3826    }
   3827    if (laneSzBlg2 == 2) {
   3828       // 32x4
   3829       // First, interleave at the 64-bit lane size.
   3830       IRTemp p0 = newTempV128();
   3831       IRTemp p1 = newTempV128();
   3832       IRTemp p2 = newTempV128();
   3833       IRTemp p3 = newTempV128();
   3834       math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 3, u0, u1, u2, u3);
   3835       // And interleave (cat) at the 32 bit size.
   3836       assign(*i0, CEV32x4(EX(p1), EX(p0)));
   3837       assign(*i1, COD32x4(EX(p1), EX(p0)));
   3838       assign(*i2, CEV32x4(EX(p3), EX(p2)));
   3839       assign(*i3, COD32x4(EX(p3), EX(p2)));
   3840       return;
   3841    }
   3842    if (laneSzBlg2 == 1) {
   3843       // 16x8
   3844       // First, interleave at the 32-bit lane size.
   3845       IRTemp p0 = newTempV128();
   3846       IRTemp p1 = newTempV128();
   3847       IRTemp p2 = newTempV128();
   3848       IRTemp p3 = newTempV128();
   3849       math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 2, u0, u1, u2, u3);
   3850       // And rearrange within each vector, to get the right 16 bit lanes.
   3851       assign(*i0, COD16x8(EX(p0), SHL(EX(p0), 2)));
   3852       assign(*i1, COD16x8(EX(p1), SHL(EX(p1), 2)));
   3853       assign(*i2, COD16x8(EX(p2), SHL(EX(p2), 2)));
   3854       assign(*i3, COD16x8(EX(p3), SHL(EX(p3), 2)));
   3855       return;
   3856    }
   3857    if (laneSzBlg2 == 0) {
   3858       // 8x16
   3859       // First, interleave at the 16-bit lane size.
   3860       IRTemp p0 = newTempV128();
   3861       IRTemp p1 = newTempV128();
   3862       IRTemp p2 = newTempV128();
   3863       IRTemp p3 = newTempV128();
   3864       math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 1, u0, u1, u2, u3);
   3865       // And rearrange within each vector, to get the right 8 bit lanes.
   3866       assign(*i0, IHI32x4(COD8x16(EX(p0),EX(p0)), CEV8x16(EX(p0),EX(p0))));
   3867       assign(*i1, IHI32x4(COD8x16(EX(p1),EX(p1)), CEV8x16(EX(p1),EX(p1))));
   3868       assign(*i2, IHI32x4(COD8x16(EX(p2),EX(p2)), CEV8x16(EX(p2),EX(p2))));
   3869       assign(*i3, IHI32x4(COD8x16(EX(p3),EX(p3)), CEV8x16(EX(p3),EX(p3))));
   3870       return;
   3871    }
   3872    /*NOTREACHED*/
   3873    vassert(0);
   3874 }
   3875 
   3876 
   3877 /* Do deinterleaving for 1 128 bit vector, for LD1 insns. */
   3878 static
   3879 void math_DEINTERLEAVE1_128( /*OUTx1*/ IRTemp* u0,
   3880                              UInt laneSzBlg2, IRTemp i0 )
   3881 {
   3882    assign(*u0, mkexpr(i0));
   3883 }
   3884 
   3885 
   3886 /* Do deinterleaving for 2 128 bit vectors, for LD2 insns. */
   3887 static
   3888 void math_DEINTERLEAVE2_128( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
   3889                              UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
   3890 {
   3891    /* This is pretty easy, since we have primitives directly to
   3892       hand. */
   3893    if (laneSzBlg2 == 3) {
   3894       // 64x2
   3895       // i1 == B1 A1, i0 == B0 A0
   3896       // u1 == B1 B0, u0 == A1 A0
   3897       assign(*u0, binop(Iop_InterleaveLO64x2, mkexpr(i1), mkexpr(i0)));
   3898       assign(*u1, binop(Iop_InterleaveHI64x2, mkexpr(i1), mkexpr(i0)));
   3899       return;
   3900    }
   3901    if (laneSzBlg2 == 2) {
   3902       // 32x4
   3903       // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
   3904       // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
   3905       assign(*u0, binop(Iop_CatEvenLanes32x4, mkexpr(i1), mkexpr(i0)));
   3906       assign(*u1, binop(Iop_CatOddLanes32x4, mkexpr(i1), mkexpr(i0)));
   3907       return;
   3908    }
   3909    if (laneSzBlg2 == 1) {
   3910       // 16x8
   3911       // i0 == B3 A3 B2 A2 B1 A1 B0 A0
   3912       // i1 == B7 A7 B6 A6 B5 A5 B4 A4
   3913       // u1 == B{7..0}, u0 == A{7..0}
   3914       assign(*u0, binop(Iop_CatEvenLanes16x8, mkexpr(i1), mkexpr(i0)));
   3915       assign(*u1, binop(Iop_CatOddLanes16x8,  mkexpr(i1), mkexpr(i0)));
   3916       return;
   3917    }
   3918    if (laneSzBlg2 == 0) {
   3919       // 8x16
   3920       // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
   3921       // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
   3922       // u1 == B{f..0}, u0 == A{f..0}
   3923       assign(*u0, binop(Iop_CatEvenLanes8x16, mkexpr(i1), mkexpr(i0)));
   3924       assign(*u1, binop(Iop_CatOddLanes8x16,  mkexpr(i1), mkexpr(i0)));
   3925       return;
   3926    }
   3927    /*NOTREACHED*/
   3928    vassert(0);
   3929 }
   3930 
   3931 
   3932 /* Do deinterleaving for 3 128 bit vectors, for LD3 insns. */
   3933 static
   3934 void math_DEINTERLEAVE3_128(
   3935         /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
   3936         UInt laneSzBlg2,
   3937         IRTemp i0, IRTemp i1, IRTemp i2 )
   3938 {
   3939    if (laneSzBlg2 == 3) {
   3940       // 64x2
   3941       // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
   3942       // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
   3943       assign(*u2, ILO64x2( ROL(EX(i2),8), EX(i1)        ));
   3944       assign(*u1, ILO64x2( EX(i2),        ROL(EX(i0),8) ));
   3945       assign(*u0, ILO64x2( ROL(EX(i1),8), EX(i0)        ));
   3946       return;
   3947    }
   3948 
   3949    if (laneSzBlg2 == 2) {
   3950       // 32x4
   3951       // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
   3952       // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
   3953       // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
   3954       IRTemp t_a1c0b0a0 = newTempV128();
   3955       IRTemp t_a2c1b1a1 = newTempV128();
   3956       IRTemp t_a3c2b2a2 = newTempV128();
   3957       IRTemp t_a0c3b3a3 = newTempV128();
   3958       IRTemp p0 = newTempV128();
   3959       IRTemp p1 = newTempV128();
   3960       IRTemp p2 = newTempV128();
   3961       // Compute some intermediate values.
   3962       assign(t_a1c0b0a0, EX(i0));
   3963       assign(t_a2c1b1a1, SL(EX(i1),EX(i0),3*4));
   3964       assign(t_a3c2b2a2, SL(EX(i2),EX(i1),2*4));
   3965       assign(t_a0c3b3a3, SL(EX(i0),EX(i2),1*4));
   3966       // First deinterleave into lane-pairs
   3967       assign(p0, ILO32x4(EX(t_a2c1b1a1),EX(t_a1c0b0a0)));
   3968       assign(p1, ILO64x2(ILO32x4(EX(t_a0c3b3a3), EX(t_a3c2b2a2)),
   3969                          IHI32x4(EX(t_a2c1b1a1), EX(t_a1c0b0a0))));
   3970       assign(p2, ILO32x4(ROR(EX(t_a0c3b3a3),1*4), ROR(EX(t_a3c2b2a2),1*4)));
   3971       // Then deinterleave at 64x2 granularity.
   3972       math_DEINTERLEAVE3_128(u0, u1, u2, 3, p0, p1, p2);
   3973       return;
   3974    }
   3975 
   3976    if (laneSzBlg2 == 1) {
   3977       // 16x8
   3978       // u2 == C7 C6 C5 C4 C3 C2 C1 C0
   3979       // u1 == B7 B6 B5 B4 B3 B2 B1 B0
   3980       // u0 == A7 A6 A5 A4 A3 A2 A1 A0
   3981       //
   3982       // i2 == C7 B7 A7 C6 B6 A6 C5 B5
   3983       // i1 == A5 C4 B4 A4 C4 B3 A3 C2
   3984       // i0 == B2 A2 C1 B1 A1 C0 B0 A0
   3985       //
   3986       // p2 == C7 C6 B7 B6 A7 A6 C5 C4
   3987       // p1 == B5 B4 A5 A4 C3 C2 B3 B2
   3988       // p0 == A3 A2 C1 C0 B1 B0 A1 A0
   3989 
   3990       IRTemp s0, s1, s2, s3, t0, t1, t2, t3, p0, p1, p2, c00111111;
   3991       s0 = s1 = s2 = s3
   3992          = t0 = t1 = t2 = t3 = p0 = p1 = p2 = c00111111 = IRTemp_INVALID;
   3993       newTempsV128_4(&s0, &s1, &s2, &s3);
   3994       newTempsV128_4(&t0, &t1, &t2, &t3);
   3995       newTempsV128_4(&p0, &p1, &p2, &c00111111);
   3996 
   3997       // s0 == b2a2 c1b1a1 c0b0a0
   3998       // s1 == b4a4 c3b3c3 c2b2a2
   3999       // s2 == b6a6 c5b5a5 c4b4a4
   4000       // s3 == b0a0 c7b7a7 c6b6a6
   4001       assign(s0, EX(i0));
   4002       assign(s1, SL(EX(i1),EX(i0),6*2));
   4003       assign(s2, SL(EX(i2),EX(i1),4*2));
   4004       assign(s3, SL(EX(i0),EX(i2),2*2));
   4005 
   4006       // t0 == 0 0 c1c0 b1b0 a1a0
   4007       // t1 == 0 0 c3c2 b3b2 a3a2
   4008       // t2 == 0 0 c5c4 b5b4 a5a4
   4009       // t3 == 0 0 c7c6 b7b6 a7a6
   4010       assign(c00111111, mkV128(0x0FFF));
   4011       assign(t0, AND( ILO16x8( ROR(EX(s0),3*2), EX(s0)), EX(c00111111)));
   4012       assign(t1, AND( ILO16x8( ROR(EX(s1),3*2), EX(s1)), EX(c00111111)));
   4013       assign(t2, AND( ILO16x8( ROR(EX(s2),3*2), EX(s2)), EX(c00111111)));
   4014       assign(t3, AND( ILO16x8( ROR(EX(s3),3*2), EX(s3)), EX(c00111111)));
   4015 
   4016       assign(p0, OR2(EX(t0),          SHL(EX(t1),6*2)));
   4017       assign(p1, OR2(SHL(EX(t2),4*2), SHR(EX(t1),2*2)));
   4018       assign(p2, OR2(SHL(EX(t3),2*2), SHR(EX(t2),4*2)));
   4019 
   4020       // Then deinterleave at 32x4 granularity.
   4021       math_DEINTERLEAVE3_128(u0, u1, u2, 2, p0, p1, p2);
   4022       return;
   4023    }
   4024 
   4025    if (laneSzBlg2 == 0) {
   4026       // 8x16.  This is the same scheme as for 16x8, with twice the
   4027       // number of intermediate values.
   4028       //
   4029       // u2 == C{f..0}
   4030       // u1 == B{f..0}
   4031       // u0 == A{f..0}
   4032       //
   4033       // i2 == CBA{f} CBA{e} CBA{d} CBA{c} CBA{b} C{a}
   4034       // i1 ==  BA{a} CBA{9} CBA{8} CBA{7} CBA{6} CB{5}
   4035       // i0 ==   A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
   4036       //
   4037       // p2 == C{fe} B{fe} A{fe} C{dc} B{dc} A{dc} C{ba} B{ba}
   4038       // p1 == A{ba} C{98} B{98} A{98} C{76} B{76} A{76} C{54}
   4039       // p0 == B{54} A{54} C{32} B{32} A{32} C{10} B{10} A{10}
   4040       //
   4041       IRTemp s0, s1, s2, s3, s4, s5, s6, s7,
   4042              t0, t1, t2, t3, t4, t5, t6, t7, p0, p1, p2, cMASK;
   4043       s0 = s1 = s2 = s3 = s4 = s5 = s6 = s7
   4044          = t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = p0 = p1 = p2 = cMASK
   4045          = IRTemp_INVALID;
   4046       newTempsV128_4(&s0, &s1, &s2, &s3);
   4047       newTempsV128_4(&s4, &s5, &s6, &s7);
   4048       newTempsV128_4(&t0, &t1, &t2, &t3);
   4049       newTempsV128_4(&t4, &t5, &t6, &t7);
   4050       newTempsV128_4(&p0, &p1, &p2, &cMASK);
   4051 
   4052       // s0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
   4053       // s1 == A{7} CBA{6} CBA{5} CBA{4} CBA{3} CBA{2}
   4054       // s2 == A{9} CBA{8} CBA{7} CBA{6} CBA{5} CBA{4}
   4055       // s3 == A{b} CBA{a} CBA{9} CBA{8} CBA{7} CBA{6}
   4056       // s4 == A{d} CBA{c} CBA{b} CBA{a} CBA{9} CBA{8}
   4057       // s5 == A{f} CBA{e} CBA{d} CBA{c} CBA{b} CBA{a}
   4058       // s6 == A{1} CBA{0} CBA{f} CBA{e} CBA{d} CBA{c}
   4059       // s7 == A{3} CBA{2} CBA{1} CBA{0} CBA{f} CBA{e}
   4060       assign(s0, SL(EX(i1),EX(i0), 0));
   4061       assign(s1, SL(EX(i1),EX(i0), 6));
   4062       assign(s2, SL(EX(i1),EX(i0),12));
   4063       assign(s3, SL(EX(i2),EX(i1), 2));
   4064       assign(s4, SL(EX(i2),EX(i1), 8));
   4065       assign(s5, SL(EX(i2),EX(i1),14));
   4066       assign(s6, SL(EX(i0),EX(i2), 4));
   4067       assign(s7, SL(EX(i0),EX(i2),10));
   4068 
   4069       // t0 == 0--(ten)--0 C1 C0 B1 B0 A1 A0
   4070       // t1 == 0--(ten)--0 C3 C2 B3 B2 A3 A2
   4071       // t2 == 0--(ten)--0 C5 C4 B5 B4 A5 A4
   4072       // t3 == 0--(ten)--0 C7 C6 B7 B6 A7 A6
   4073       // t4 == 0--(ten)--0 C9 C8 B9 B8 A9 A8
   4074       // t5 == 0--(ten)--0 Cb Ca Bb Ba Ab Aa
   4075       // t6 == 0--(ten)--0 Cd Cc Bd Bc Ad Ac
   4076       // t7 == 0--(ten)--0 Cf Ce Bf Be Af Ae
   4077       assign(cMASK, mkV128(0x003F));
   4078       assign(t0, AND( ILO8x16( ROR(EX(s0),3), EX(s0)), EX(cMASK)));
   4079       assign(t1, AND( ILO8x16( ROR(EX(s1),3), EX(s1)), EX(cMASK)));
   4080       assign(t2, AND( ILO8x16( ROR(EX(s2),3), EX(s2)), EX(cMASK)));
   4081       assign(t3, AND( ILO8x16( ROR(EX(s3),3), EX(s3)), EX(cMASK)));
   4082       assign(t4, AND( ILO8x16( ROR(EX(s4),3), EX(s4)), EX(cMASK)));
   4083       assign(t5, AND( ILO8x16( ROR(EX(s5),3), EX(s5)), EX(cMASK)));
   4084       assign(t6, AND( ILO8x16( ROR(EX(s6),3), EX(s6)), EX(cMASK)));
   4085       assign(t7, AND( ILO8x16( ROR(EX(s7),3), EX(s7)), EX(cMASK)));
   4086 
   4087       assign(p0, OR3( SHL(EX(t2),12), SHL(EX(t1),6), EX(t0) ));
   4088       assign(p1, OR4( SHL(EX(t5),14), SHL(EX(t4),8),
   4089                  SHL(EX(t3),2), SHR(EX(t2),4) ));
   4090       assign(p2, OR3( SHL(EX(t7),10), SHL(EX(t6),4), SHR(EX(t5),2) ));
   4091 
   4092       // Then deinterleave at 16x8 granularity.
   4093       math_DEINTERLEAVE3_128(u0, u1, u2, 1, p0, p1, p2);
   4094       return;
   4095    }
   4096 
   4097    /*NOTREACHED*/
   4098    vassert(0);
   4099 }
   4100 
   4101 
   4102 /* Do deinterleaving for 4 128 bit vectors, for LD4 insns. */
   4103 static
   4104 void math_DEINTERLEAVE4_128(
   4105         /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
   4106         UInt laneSzBlg2,
   4107         IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
   4108 {
   4109    if (laneSzBlg2 == 3) {
   4110       // 64x2
   4111       assign(*u0, ILO64x2(EX(i2), EX(i0)));
   4112       assign(*u1, IHI64x2(EX(i2), EX(i0)));
   4113       assign(*u2, ILO64x2(EX(i3), EX(i1)));
   4114       assign(*u3, IHI64x2(EX(i3), EX(i1)));
   4115       return;
   4116    }
   4117    if (laneSzBlg2 == 2) {
   4118       // 32x4
   4119       IRTemp p0 = newTempV128();
   4120       IRTemp p2 = newTempV128();
   4121       IRTemp p1 = newTempV128();
   4122       IRTemp p3 = newTempV128();
   4123       assign(p0, ILO32x4(EX(i1), EX(i0)));
   4124       assign(p1, IHI32x4(EX(i1), EX(i0)));
   4125       assign(p2, ILO32x4(EX(i3), EX(i2)));
   4126       assign(p3, IHI32x4(EX(i3), EX(i2)));
   4127       // And now do what we did for the 64-bit case.
   4128       math_DEINTERLEAVE4_128(u0, u1, u2, u3, 3, p0, p1, p2, p3);
   4129       return;
   4130    }
   4131    if (laneSzBlg2 == 1) {
   4132       // 16x8
   4133       // Deinterleave into 32-bit chunks, then do as the 32-bit case.
   4134       IRTemp p0 = newTempV128();
   4135       IRTemp p1 = newTempV128();
   4136       IRTemp p2 = newTempV128();
   4137       IRTemp p3 = newTempV128();
   4138       assign(p0, IHI16x8(EX(i0), SHL(EX(i0), 8)));
   4139       assign(p1, IHI16x8(EX(i1), SHL(EX(i1), 8)));
   4140       assign(p2, IHI16x8(EX(i2), SHL(EX(i2), 8)));
   4141       assign(p3, IHI16x8(EX(i3), SHL(EX(i3), 8)));
   4142       // From here on is like the 32 bit case.
   4143       math_DEINTERLEAVE4_128(u0, u1, u2, u3, 2, p0, p1, p2, p3);
   4144       return;
   4145    }
   4146    if (laneSzBlg2 == 0) {
   4147       // 8x16
   4148       // Deinterleave into 16-bit chunks, then do as the 16-bit case.
   4149       IRTemp p0 = newTempV128();
   4150       IRTemp p1 = newTempV128();
   4151       IRTemp p2 = newTempV128();
   4152       IRTemp p3 = newTempV128();
   4153       assign(p0, IHI64x2( IHI8x16(EX(i0),ROL(EX(i0),4)),
   4154                           ILO8x16(EX(i0),ROL(EX(i0),4)) ));
   4155       assign(p1, IHI64x2( IHI8x16(EX(i1),ROL(EX(i1),4)),
   4156                           ILO8x16(EX(i1),ROL(EX(i1),4)) ));
   4157       assign(p2, IHI64x2( IHI8x16(EX(i2),ROL(EX(i2),4)),
   4158                           ILO8x16(EX(i2),ROL(EX(i2),4)) ));
   4159       assign(p3, IHI64x2( IHI8x16(EX(i3),ROL(EX(i3),4)),
   4160                           ILO8x16(EX(i3),ROL(EX(i3),4)) ));
   4161       // From here on is like the 16 bit case.
   4162       math_DEINTERLEAVE4_128(u0, u1, u2, u3, 1, p0, p1, p2, p3);
   4163       return;
   4164    }
   4165    /*NOTREACHED*/
   4166    vassert(0);
   4167 }
   4168 
   4169 
   4170 /* Wrappers that use the full-width (de)interleavers to do half-width
   4171    (de)interleaving.  The scheme is to clone each input lane in the
   4172    lower half of each incoming value, do a full width (de)interleave
   4173    at the next lane size up, and remove every other lane of the the
   4174    result.  The returned values may have any old junk in the upper
   4175    64 bits -- the caller must ignore that. */
   4176 
   4177 /* Helper function -- get doubling and narrowing operations. */
   4178 static
   4179 void math_get_doubler_and_halver ( /*OUT*/IROp* doubler,
   4180                                    /*OUT*/IROp* halver,
   4181                                    UInt laneSzBlg2 )
   4182 {
   4183    switch (laneSzBlg2) {
   4184       case 2:
   4185          *doubler = Iop_InterleaveLO32x4; *halver = Iop_CatEvenLanes32x4;
   4186          break;
   4187       case 1:
   4188          *doubler = Iop_InterleaveLO16x8; *halver = Iop_CatEvenLanes16x8;
   4189          break;
   4190       case 0:
   4191          *doubler = Iop_InterleaveLO8x16; *halver = Iop_CatEvenLanes8x16;
   4192          break;
   4193       default:
   4194          vassert(0);
   4195    }
   4196 }
   4197 
   4198 /* Do interleaving for 1 64 bit vector, for ST1 insns. */
   4199 static
   4200 void math_INTERLEAVE1_64( /*OUTx1*/ IRTemp* i0,
   4201                           UInt laneSzBlg2, IRTemp u0 )
   4202 {
   4203    assign(*i0, mkexpr(u0));
   4204 }
   4205 
   4206 
   4207 /* Do interleaving for 2 64 bit vectors, for ST2 insns. */
   4208 static
   4209 void math_INTERLEAVE2_64( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
   4210                           UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
   4211 {
   4212    if (laneSzBlg2 == 3) {
   4213       // 1x64, degenerate case
   4214       assign(*i0, EX(u0));
   4215       assign(*i1, EX(u1));
   4216       return;
   4217    }
   4218 
   4219    vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
   4220    IROp doubler = Iop_INVALID, halver = Iop_INVALID;
   4221    math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
   4222 
   4223    IRTemp du0 = newTempV128();
   4224    IRTemp du1 = newTempV128();
   4225    assign(du0, binop(doubler, EX(u0), EX(u0)));
   4226    assign(du1, binop(doubler, EX(u1), EX(u1)));
   4227    IRTemp di0 = newTempV128();
   4228    IRTemp di1 = newTempV128();
   4229    math_INTERLEAVE2_128(&di0, &di1, laneSzBlg2 + 1, du0, du1);
   4230    assign(*i0, binop(halver, EX(di0), EX(di0)));
   4231    assign(*i1, binop(halver, EX(di1), EX(di1)));
   4232 }
   4233 
   4234 
   4235 /* Do interleaving for 3 64 bit vectors, for ST3 insns. */
   4236 static
   4237 void math_INTERLEAVE3_64(
   4238         /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
   4239         UInt laneSzBlg2,
   4240         IRTemp u0, IRTemp u1, IRTemp u2 )
   4241 {
   4242    if (laneSzBlg2 == 3) {
   4243       // 1x64, degenerate case
   4244       assign(*i0, EX(u0));
   4245       assign(*i1, EX(u1));
   4246       assign(*i2, EX(u2));
   4247       return;
   4248    }
   4249 
   4250    vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
   4251    IROp doubler = Iop_INVALID, halver = Iop_INVALID;
   4252    math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
   4253 
   4254    IRTemp du0 = newTempV128();
   4255    IRTemp du1 = newTempV128();
   4256    IRTemp du2 = newTempV128();
   4257    assign(du0, binop(doubler, EX(u0), EX(u0)));
   4258    assign(du1, binop(doubler, EX(u1), EX(u1)));
   4259    assign(du2, binop(doubler, EX(u2), EX(u2)));
   4260    IRTemp di0 = newTempV128();
   4261    IRTemp di1 = newTempV128();
   4262    IRTemp di2 = newTempV128();
   4263    math_INTERLEAVE3_128(&di0, &di1, &di2, laneSzBlg2 + 1, du0, du1, du2);
   4264    assign(*i0, binop(halver, EX(di0), EX(di0)));
   4265    assign(*i1, binop(halver, EX(di1), EX(di1)));
   4266    assign(*i2, binop(halver, EX(di2), EX(di2)));
   4267 }
   4268 
   4269 
   4270 /* Do interleaving for 4 64 bit vectors, for ST4 insns. */
   4271 static
   4272 void math_INTERLEAVE4_64(
   4273         /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
   4274         UInt laneSzBlg2,
   4275         IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
   4276 {
   4277    if (laneSzBlg2 == 3) {
   4278       // 1x64, degenerate case
   4279       assign(*i0, EX(u0));
   4280       assign(*i1, EX(u1));
   4281       assign(*i2, EX(u2));
   4282       assign(*i3, EX(u3));
   4283       return;
   4284    }
   4285 
   4286    vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
   4287    IROp doubler = Iop_INVALID, halver = Iop_INVALID;
   4288    math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
   4289 
   4290    IRTemp du0 = newTempV128();
   4291    IRTemp du1 = newTempV128();
   4292    IRTemp du2 = newTempV128();
   4293    IRTemp du3 = newTempV128();
   4294    assign(du0, binop(doubler, EX(u0), EX(u0)));
   4295    assign(du1, binop(doubler, EX(u1), EX(u1)));
   4296    assign(du2, binop(doubler, EX(u2), EX(u2)));
   4297    assign(du3, binop(doubler, EX(u3), EX(u3)));
   4298    IRTemp di0 = newTempV128();
   4299    IRTemp di1 = newTempV128();
   4300    IRTemp di2 = newTempV128();
   4301    IRTemp di3 = newTempV128();
   4302    math_INTERLEAVE4_128(&di0, &di1, &di2, &di3,
   4303                         laneSzBlg2 + 1, du0, du1, du2, du3);
   4304    assign(*i0, binop(halver, EX(di0), EX(di0)));
   4305    assign(*i1, binop(halver, EX(di1), EX(di1)));
   4306    assign(*i2, binop(halver, EX(di2), EX(di2)));
   4307    assign(*i3, binop(halver, EX(di3), EX(di3)));
   4308 }
   4309 
   4310 
   4311 /* Do deinterleaving for 1 64 bit vector, for LD1 insns. */
   4312 static
   4313 void math_DEINTERLEAVE1_64( /*OUTx1*/ IRTemp* u0,
   4314                             UInt laneSzBlg2, IRTemp i0 )
   4315 {
   4316    assign(*u0, mkexpr(i0));
   4317 }
   4318 
   4319 
   4320 /* Do deinterleaving for 2 64 bit vectors, for LD2 insns. */
   4321 static
   4322 void math_DEINTERLEAVE2_64( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
   4323                             UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
   4324 {
   4325    if (laneSzBlg2 == 3) {
   4326       // 1x64, degenerate case
   4327       assign(*u0, EX(i0));
   4328       assign(*u1, EX(i1));
   4329       return;
   4330    }
   4331 
   4332    vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
   4333    IROp doubler = Iop_INVALID, halver = Iop_INVALID;
   4334    math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
   4335 
   4336    IRTemp di0 = newTempV128();
   4337    IRTemp di1 = newTempV128();
   4338    assign(di0, binop(doubler, EX(i0), EX(i0)));
   4339    assign(di1, binop(doubler, EX(i1), EX(i1)));
   4340 
   4341    IRTemp du0 = newTempV128();
   4342    IRTemp du1 = newTempV128();
   4343    math_DEINTERLEAVE2_128(&du0, &du1, laneSzBlg2 + 1, di0, di1);
   4344    assign(*u0, binop(halver, EX(du0), EX(du0)));
   4345    assign(*u1, binop(halver, EX(du1), EX(du1)));
   4346 }
   4347 
   4348 
   4349 /* Do deinterleaving for 3 64 bit vectors, for LD3 insns. */
   4350 static
   4351 void math_DEINTERLEAVE3_64(
   4352         /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
   4353         UInt laneSzBlg2,
   4354         IRTemp i0, IRTemp i1, IRTemp i2 )
   4355 {
   4356    if (laneSzBlg2 == 3) {
   4357       // 1x64, degenerate case
   4358       assign(*u0, EX(i0));
   4359       assign(*u1, EX(i1));
   4360       assign(*u2, EX(i2));
   4361       return;
   4362    }
   4363 
   4364    vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
   4365    IROp doubler = Iop_INVALID, halver = Iop_INVALID;
   4366    math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
   4367 
   4368    IRTemp di0 = newTempV128();
   4369    IRTemp di1 = newTempV128();
   4370    IRTemp di2 = newTempV128();
   4371    assign(di0, binop(doubler, EX(i0), EX(i0)));
   4372    assign(di1, binop(doubler, EX(i1), EX(i1)));
   4373    assign(di2, binop(doubler, EX(i2), EX(i2)));
   4374    IRTemp du0 = newTempV128();
   4375    IRTemp du1 = newTempV128();
   4376    IRTemp du2 = newTempV128();
   4377    math_DEINTERLEAVE3_128(&du0, &du1, &du2, laneSzBlg2 + 1, di0, di1, di2);
   4378    assign(*u0, binop(halver, EX(du0), EX(du0)));
   4379    assign(*u1, binop(halver, EX(du1), EX(du1)));
   4380    assign(*u2, binop(halver, EX(du2), EX(du2)));
   4381 }
   4382 
   4383 
   4384 /* Do deinterleaving for 4 64 bit vectors, for LD4 insns. */
   4385 static
   4386 void math_DEINTERLEAVE4_64(
   4387         /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
   4388         UInt laneSzBlg2,
   4389         IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
   4390 {
   4391    if (laneSzBlg2 == 3) {
   4392       // 1x64, degenerate case
   4393       assign(*u0, EX(i0));
   4394       assign(*u1, EX(i1));
   4395       assign(*u2, EX(i2));
   4396       assign(*u3, EX(i3));
   4397       return;
   4398    }
   4399 
   4400    vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
   4401    IROp doubler = Iop_INVALID, halver = Iop_INVALID;
   4402    math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
   4403 
   4404    IRTemp di0 = newTempV128();
   4405    IRTemp di1 = newTempV128();
   4406    IRTemp di2 = newTempV128();
   4407    IRTemp di3 = newTempV128();
   4408    assign(di0, binop(doubler, EX(i0), EX(i0)));
   4409    assign(di1, binop(doubler, EX(i1), EX(i1)));
   4410    assign(di2, binop(doubler, EX(i2), EX(i2)));
   4411    assign(di3, binop(doubler, EX(i3), EX(i3)));
   4412    IRTemp du0 = newTempV128();
   4413    IRTemp du1 = newTempV128();
   4414    IRTemp du2 = newTempV128();
   4415    IRTemp du3 = newTempV128();
   4416    math_DEINTERLEAVE4_128(&du0, &du1, &du2, &du3,
   4417                           laneSzBlg2 + 1, di0, di1, di2, di3);
   4418    assign(*u0, binop(halver, EX(du0), EX(du0)));
   4419    assign(*u1, binop(halver, EX(du1), EX(du1)));
   4420    assign(*u2, binop(halver, EX(du2), EX(du2)));
   4421    assign(*u3, binop(halver, EX(du3), EX(du3)));
   4422 }
   4423 
   4424 
   4425 #undef EX
   4426 #undef SL
   4427 #undef ROR
   4428 #undef ROL
   4429 #undef SHR
   4430 #undef SHL
   4431 #undef ILO64x2
   4432 #undef IHI64x2
   4433 #undef ILO32x4
   4434 #undef IHI32x4
   4435 #undef ILO16x8
   4436 #undef IHI16x8
   4437 #undef ILO16x8
   4438 #undef IHI16x8
   4439 #undef CEV32x4
   4440 #undef COD32x4
   4441 #undef COD16x8
   4442 #undef COD8x16
   4443 #undef CEV8x16
   4444 #undef AND
   4445 #undef OR2
   4446 #undef OR3
   4447 #undef OR4
   4448 
   4449 
   4450 /*------------------------------------------------------------*/
   4451 /*--- Load and Store instructions                          ---*/
   4452 /*------------------------------------------------------------*/
   4453 
   4454 /* Generate the EA for a "reg + reg" style amode.  This is done from
   4455    parts of the insn, but for sanity checking sake it takes the whole
   4456    insn.  This appears to depend on insn[15:12], with opt=insn[15:13]
   4457    and S=insn[12]:
   4458 
   4459    The possible forms, along with their opt:S values, are:
   4460       011:0   Xn|SP + Xm
   4461       111:0   Xn|SP + Xm
   4462       011:1   Xn|SP + Xm * transfer_szB
   4463       111:1   Xn|SP + Xm * transfer_szB
   4464       010:0   Xn|SP + 32Uto64(Wm)
   4465       010:1   Xn|SP + 32Uto64(Wm) * transfer_szB
   4466       110:0   Xn|SP + 32Sto64(Wm)
   4467       110:1   Xn|SP + 32Sto64(Wm) * transfer_szB
   4468 
   4469    Rm is insn[20:16].  Rn is insn[9:5].  Rt is insn[4:0].  Log2 of
   4470    the transfer size is insn[23,31,30].  For integer loads/stores,
   4471    insn[23] is zero, hence szLg2 can be at most 3 in such cases.
   4472 
   4473    If the decoding fails, it returns IRTemp_INVALID.
   4474 
   4475    isInt is True iff this is decoding is for transfers to/from integer
   4476    registers.  If False it is for transfers to/from vector registers.
   4477 */
   4478 static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt )
   4479 {
   4480    UInt    optS  = SLICE_UInt(insn, 15, 12);
   4481    UInt    mm    = SLICE_UInt(insn, 20, 16);
   4482    UInt    nn    = SLICE_UInt(insn, 9, 5);
   4483    UInt    szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
   4484                    | SLICE_UInt(insn, 31, 30); // Log2 of the size
   4485 
   4486    buf[0] = 0;
   4487 
   4488    /* Sanity checks, that this really is a load/store insn. */
   4489    if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
   4490       goto fail;
   4491 
   4492    if (isInt
   4493        && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
   4494        && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
   4495        && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
   4496        && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
   4497       goto fail;
   4498 
   4499    if (!isInt
   4500        && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
   4501       goto fail;
   4502 
   4503    /* Throw out non-verified but possibly valid cases. */
   4504    switch (szLg2) {
   4505       case BITS3(0,0,0): break; //  8 bit, valid for both int and vec
   4506       case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
   4507       case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
   4508       case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
   4509       case BITS3(1,0,0): // can only ever be valid for the vector case
   4510                          if (isInt) goto fail; else break;
   4511       case BITS3(1,0,1): // these sizes are never valid
   4512       case BITS3(1,1,0):
   4513       case BITS3(1,1,1): goto fail;
   4514 
   4515       default: vassert(0);
   4516    }
   4517 
   4518    IRExpr* rhs  = NULL;
   4519    switch (optS) {
   4520       case BITS4(1,1,1,0): goto fail; //ATC
   4521       case BITS4(0,1,1,0):
   4522          rhs = getIReg64orZR(mm);
   4523          vex_sprintf(buf, "[%s, %s]",
   4524                      nameIReg64orZR(nn), nameIReg64orZR(mm));
   4525          break;
   4526       case BITS4(1,1,1,1): goto fail; //ATC
   4527       case BITS4(0,1,1,1):
   4528          rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
   4529          vex_sprintf(buf, "[%s, %s lsl %u]",
   4530                      nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
   4531          break;
   4532       case BITS4(0,1,0,0):
   4533          rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
   4534          vex_sprintf(buf, "[%s, %s uxtx]",
   4535                      nameIReg64orZR(nn), nameIReg32orZR(mm));
   4536          break;
   4537       case BITS4(0,1,0,1):
   4538          rhs = binop(Iop_Shl64,
   4539                      unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
   4540          vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
   4541                      nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
   4542          break;
   4543       case BITS4(1,1,0,0):
   4544          rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
   4545          vex_sprintf(buf, "[%s, %s sxtx]",
   4546                      nameIReg64orZR(nn), nameIReg32orZR(mm));
   4547          break;
   4548       case BITS4(1,1,0,1):
   4549          rhs = binop(Iop_Shl64,
   4550                      unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
   4551          vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
   4552                      nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
   4553          break;
   4554       default:
   4555          /* The rest appear to be genuinely invalid */
   4556          goto fail;
   4557    }
   4558 
   4559    vassert(rhs);
   4560    IRTemp res = newTemp(Ity_I64);
   4561    assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
   4562    return res;
   4563 
   4564   fail:
   4565    vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
   4566    return IRTemp_INVALID;
   4567 }
   4568 
   4569 
   4570 /* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
   4571    bits of DATAE :: Ity_I64. */
   4572 static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
   4573 {
   4574    IRExpr* addrE = mkexpr(addr);
   4575    switch (szB) {
   4576       case 8:
   4577          storeLE(addrE, dataE);
   4578          break;
   4579       case 4:
   4580          storeLE(addrE, unop(Iop_64to32, dataE));
   4581          break;
   4582       case 2:
   4583          storeLE(addrE, unop(Iop_64to16, dataE));
   4584          break;
   4585       case 1:
   4586          storeLE(addrE, unop(Iop_64to8, dataE));
   4587          break;
   4588       default:
   4589          vassert(0);
   4590    }
   4591 }
   4592 
   4593 
   4594 /* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
   4595    placing the result in an Ity_I64 temporary. */
   4596 static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
   4597 {
   4598    IRTemp  res   = newTemp(Ity_I64);
   4599    IRExpr* addrE = mkexpr(addr);
   4600    switch (szB) {
   4601       case 8:
   4602          assign(res, loadLE(Ity_I64,addrE));
   4603          break;
   4604       case 4:
   4605          assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
   4606          break;
   4607       case 2:
   4608          assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
   4609          break;
   4610       case 1:
   4611          assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
   4612          break;
   4613       default:
   4614          vassert(0);
   4615    }
   4616    return res;
   4617 }
   4618 
   4619 
   4620 /* Generate a "standard 7" name, from bitQ and size.  But also
   4621    allow ".1d" since that's occasionally useful. */
   4622 static
   4623 const HChar* nameArr_Q_SZ ( UInt bitQ, UInt size )
   4624 {
   4625    vassert(bitQ <= 1 && size <= 3);
   4626    const HChar* nms[8]
   4627       = { "8b", "4h", "2s", "1d", "16b", "8h", "4s", "2d" };
   4628    UInt ix = (bitQ << 2) | size;
   4629    vassert(ix < 8);
   4630    return nms[ix];
   4631 }
   4632 
   4633 
   4634 static
   4635 Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn)
   4636 {
   4637 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   4638 
   4639    /* ------------ LDR,STR (immediate, uimm12) ----------- */
   4640    /* uimm12 is scaled by the transfer size
   4641 
   4642       31 29  26    21    9  4
   4643       |  |   |     |     |  |
   4644       11 111 00100 imm12 nn tt    STR  Xt, [Xn|SP, #imm12 * 8]
   4645       11 111 00101 imm12 nn tt    LDR  Xt, [Xn|SP, #imm12 * 8]
   4646 
   4647       10 111 00100 imm12 nn tt    STR  Wt, [Xn|SP, #imm12 * 4]
   4648       10 111 00101 imm12 nn tt    LDR  Wt, [Xn|SP, #imm12 * 4]
   4649 
   4650       01 111 00100 imm12 nn tt    STRH Wt, [Xn|SP, #imm12 * 2]
   4651       01 111 00101 imm12 nn tt    LDRH Wt, [Xn|SP, #imm12 * 2]
   4652 
   4653       00 111 00100 imm12 nn tt    STRB Wt, [Xn|SP, #imm12 * 1]
   4654       00 111 00101 imm12 nn tt    LDRB Wt, [Xn|SP, #imm12 * 1]
   4655    */
   4656    if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
   4657       UInt   szLg2 = INSN(31,30);
   4658       UInt   szB   = 1 << szLg2;
   4659       Bool   isLD  = INSN(22,22) == 1;
   4660       UInt   offs  = INSN(21,10) * szB;
   4661       UInt   nn    = INSN(9,5);
   4662       UInt   tt    = INSN(4,0);
   4663       IRTemp ta    = newTemp(Ity_I64);
   4664       assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
   4665       if (nn == 31) { /* FIXME generate stack alignment check */ }
   4666       vassert(szLg2 < 4);
   4667       if (isLD) {
   4668          putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
   4669       } else {
   4670          gen_narrowing_store(szB, ta, getIReg64orZR(tt));
   4671       }
   4672       const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
   4673       const HChar* st_name[4] = { "strb", "strh", "str", "str" };
   4674       DIP("%s %s, [%s, #%u]\n",
   4675           (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
   4676           nameIReg64orSP(nn), offs);
   4677       return True;
   4678    }
   4679 
   4680    /* ------------ LDUR,STUR (immediate, simm9) ----------- */
   4681    /*
   4682       31 29  26      20   11 9  4
   4683       |  |   |       |    |  |  |
   4684       (at-Rn-then-Rn=EA)  |  |  |
   4685       sz 111 00000 0 imm9 01 Rn Rt   STR Rt, [Xn|SP], #simm9
   4686       sz 111 00001 0 imm9 01 Rn Rt   LDR Rt, [Xn|SP], #simm9
   4687 
   4688       (at-EA-then-Rn=EA)
   4689       sz 111 00000 0 imm9 11 Rn Rt   STR Rt, [Xn|SP, #simm9]!
   4690       sz 111 00001 0 imm9 11 Rn Rt   LDR Rt, [Xn|SP, #simm9]!
   4691 
   4692       (at-EA)
   4693       sz 111 00000 0 imm9 00 Rn Rt   STR Rt, [Xn|SP, #simm9]
   4694       sz 111 00001 0 imm9 00 Rn Rt   LDR Rt, [Xn|SP, #simm9]
   4695 
   4696       simm9 is unscaled.
   4697 
   4698       The case 'wback && Rn == Rt && Rt != 31' is disallowed.  In the
   4699       load case this is because would create two competing values for
   4700       Rt.  In the store case the reason is unclear, but the spec
   4701       disallows it anyway.
   4702 
   4703       Stores are narrowing, loads are unsigned widening.  sz encodes
   4704       the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
   4705    */
   4706    if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
   4707        == BITS9(1,1,1, 0,0,0,0,0, 0)) {
   4708       UInt szLg2  = INSN(31,30);
   4709       UInt szB    = 1 << szLg2;
   4710       Bool isLoad = INSN(22,22) == 1;
   4711       UInt imm9   = INSN(20,12);
   4712       UInt nn     = INSN(9,5);
   4713       UInt tt     = INSN(4,0);
   4714       Bool wBack  = INSN(10,10) == 1;
   4715       UInt how    = INSN(11,10);
   4716       if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) {
   4717          /* undecodable; fall through */
   4718       } else {
   4719          if (nn == 31) { /* FIXME generate stack alignment check */ }
   4720 
   4721          // Compute the transfer address TA and the writeback address WA.
   4722          IRTemp tRN = newTemp(Ity_I64);
   4723          assign(tRN, getIReg64orSP(nn));
   4724          IRTemp tEA = newTemp(Ity_I64);
   4725          Long simm9 = (Long)sx_to_64(imm9, 9);
   4726          assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
   4727 
   4728          IRTemp tTA = newTemp(Ity_I64);
   4729          IRTemp tWA = newTemp(Ity_I64);
   4730          switch (how) {
   4731             case BITS2(0,1):
   4732                assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
   4733             case BITS2(1,1):
   4734                assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
   4735             case BITS2(0,0):
   4736                assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
   4737             default:
   4738                vassert(0); /* NOTREACHED */
   4739          }
   4740 
   4741          /* Normally rN would be updated after the transfer.  However, in
   4742             the special case typifed by
   4743                str x30, [sp,#-16]!
   4744             it is necessary to update SP before the transfer, (1)
   4745             because Memcheck will otherwise complain about a write
   4746             below the stack pointer, and (2) because the segfault
   4747             stack extension mechanism will otherwise extend the stack
   4748             only down to SP before the instruction, which might not be
   4749             far enough, if the -16 bit takes the actual access
   4750             address to the next page.
   4751          */
   4752          Bool earlyWBack
   4753            = wBack && simm9 < 0 && szB == 8
   4754              && how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn;
   4755 
   4756          if (wBack && earlyWBack)
   4757             putIReg64orSP(nn, mkexpr(tEA));
   4758 
   4759          if (isLoad) {
   4760             putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
   4761          } else {
   4762             gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
   4763          }
   4764 
   4765          if (wBack && !earlyWBack)
   4766             putIReg64orSP(nn, mkexpr(tEA));
   4767 
   4768          const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
   4769          const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
   4770          const HChar* fmt_str = NULL;
   4771          switch (how) {
   4772             case BITS2(0,1):
   4773                fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
   4774                break;
   4775             case BITS2(1,1):
   4776                fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
   4777                break;
   4778             case BITS2(0,0):
   4779                fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
   4780                break;
   4781             default:
   4782                vassert(0);
   4783          }
   4784          DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
   4785                       nameIRegOrZR(szB == 8, tt),
   4786                       nameIReg64orSP(nn), simm9);
   4787          return True;
   4788       }
   4789    }
   4790 
   4791    /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
   4792    /* L==1 => mm==LD
   4793       L==0 => mm==ST
   4794       x==0 => 32 bit transfers, and zero extended loads
   4795       x==1 => 64 bit transfers
   4796       simm7 is scaled by the (single-register) transfer size
   4797 
   4798       (at-Rn-then-Rn=EA)
   4799       x0 101 0001 L imm7 Rt2 Rn Rt1  mmP Rt1,Rt2, [Xn|SP], #imm
   4800 
   4801       (at-EA-then-Rn=EA)
   4802       x0 101 0011 L imm7 Rt2 Rn Rt1  mmP Rt1,Rt2, [Xn|SP, #imm]!
   4803 
   4804       (at-EA)
   4805       x0 101 0010 L imm7 Rt2 Rn Rt1  mmP Rt1,Rt2, [Xn|SP, #imm]
   4806    */
   4807 
   4808    UInt insn_30_23 = INSN(30,23);
   4809    if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
   4810        || insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
   4811        || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
   4812       UInt bL     = INSN(22,22);
   4813       UInt bX     = INSN(31,31);
   4814       UInt bWBack = INSN(23,23);
   4815       UInt rT1    = INSN(4,0);
   4816       UInt rN     = INSN(9,5);
   4817       UInt rT2    = INSN(14,10);
   4818       Long simm7  = (Long)sx_to_64(INSN(21,15), 7);
   4819       if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
   4820           || (bL && rT1 == rT2)) {
   4821          /* undecodable; fall through */
   4822       } else {
   4823          if (rN == 31) { /* FIXME generate stack alignment check */ }
   4824 
   4825          // Compute the transfer address TA and the writeback address WA.
   4826          IRTemp tRN = newTemp(Ity_I64);
   4827          assign(tRN, getIReg64orSP(rN));
   4828          IRTemp tEA = newTemp(Ity_I64);
   4829          simm7 = (bX ? 8 : 4) * simm7;
   4830          assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
   4831 
   4832          IRTemp tTA = newTemp(Ity_I64);
   4833          IRTemp tWA = newTemp(Ity_I64);
   4834          switch (INSN(24,23)) {
   4835             case BITS2(0,1):
   4836                assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
   4837             case BITS2(1,1):
   4838                assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
   4839             case BITS2(1,0):
   4840                assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
   4841             default:
   4842                vassert(0); /* NOTREACHED */
   4843          }
   4844 
   4845          /* Normally rN would be updated after the transfer.  However, in
   4846             the special case typifed by
   4847                stp x29, x30, [sp,#-112]!
   4848             it is necessary to update SP before the transfer, (1)
   4849             because Memcheck will otherwise complain about a write
   4850             below the stack pointer, and (2) because the segfault
   4851             stack extension mechanism will otherwise extend the stack
   4852             only down to SP before the instruction, which might not be
   4853             far enough, if the -112 bit takes the actual access
   4854             address to the next page.
   4855          */
   4856          Bool earlyWBack
   4857            = bWBack && simm7 < 0
   4858              && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
   4859 
   4860          if (bWBack && earlyWBack)
   4861             putIReg64orSP(rN, mkexpr(tEA));
   4862 
   4863          /**/ if (bL == 1 && bX == 1) {
   4864             // 64 bit load
   4865             putIReg64orZR(rT1, loadLE(Ity_I64,
   4866                                       binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
   4867             putIReg64orZR(rT2, loadLE(Ity_I64,
   4868                                       binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
   4869          } else if (bL == 1 && bX == 0) {
   4870             // 32 bit load
   4871             putIReg32orZR(rT1, loadLE(Ity_I32,
   4872                                       binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
   4873             putIReg32orZR(rT2, loadLE(Ity_I32,
   4874                                       binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
   4875          } else if (bL == 0 && bX == 1) {
   4876             // 64 bit store
   4877             storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
   4878                     getIReg64orZR(rT1));
   4879             storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
   4880                     getIReg64orZR(rT2));
   4881          } else {
   4882             vassert(bL == 0 && bX == 0);
   4883             // 32 bit store
   4884             storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
   4885                     getIReg32orZR(rT1));
   4886             storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
   4887                     getIReg32orZR(rT2));
   4888          }
   4889 
   4890          if (bWBack && !earlyWBack)
   4891             putIReg64orSP(rN, mkexpr(tEA));
   4892 
   4893          const HChar* fmt_str = NULL;
   4894          switch (INSN(24,23)) {
   4895             case BITS2(0,1):
   4896                fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
   4897                break;
   4898             case BITS2(1,1):
   4899                fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
   4900                break;
   4901             case BITS2(1,0):
   4902                fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
   4903                break;
   4904             default:
   4905                vassert(0);
   4906          }
   4907          DIP(fmt_str, bL == 0 ? "st" : "ld",
   4908                       nameIRegOrZR(bX == 1, rT1),
   4909                       nameIRegOrZR(bX == 1, rT2),
   4910                       nameIReg64orSP(rN), simm7);
   4911          return True;
   4912       }
   4913    }
   4914 
   4915    /* ---------------- LDR (literal, int reg) ---------------- */
   4916    /* 31 29      23    4
   4917       00 011 000 imm19 Rt   LDR   Wt, [PC + sxTo64(imm19 << 2)]
   4918       01 011 000 imm19 Rt   LDR   Xt, [PC + sxTo64(imm19 << 2)]
   4919       10 011 000 imm19 Rt   LDRSW Xt, [PC + sxTo64(imm19 << 2)]
   4920       11 011 000 imm19 Rt   prefetch  [PC + sxTo64(imm19 << 2)]
   4921       Just handles the first two cases for now.
   4922    */
   4923    if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
   4924       UInt  imm19 = INSN(23,5);
   4925       UInt  rT    = INSN(4,0);
   4926       UInt  bX    = INSN(30,30);
   4927       ULong ea    = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
   4928       if (bX) {
   4929          putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
   4930       } else {
   4931          putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
   4932       }
   4933       DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
   4934       return True;
   4935    }
   4936 
   4937    /* -------------- {LD,ST}R (integer register) --------------- */
   4938    /* 31 29        20 15     12 11 9  4
   4939       |  |         |  |      |  |  |  |
   4940       11 111000011 Rm option S  10 Rn Rt  LDR  Xt, [Xn|SP, R<m>{ext/sh}]
   4941       10 111000011 Rm option S  10 Rn Rt  LDR  Wt, [Xn|SP, R<m>{ext/sh}]
   4942       01 111000011 Rm option S  10 Rn Rt  LDRH Wt, [Xn|SP, R<m>{ext/sh}]
   4943       00 111000011 Rm option S  10 Rn Rt  LDRB Wt, [Xn|SP, R<m>{ext/sh}]
   4944 
   4945       11 111000001 Rm option S  10 Rn Rt  STR  Xt, [Xn|SP, R<m>{ext/sh}]
   4946       10 111000001 Rm option S  10 Rn Rt  STR  Wt, [Xn|SP, R<m>{ext/sh}]
   4947       01 111000001 Rm option S  10 Rn Rt  STRH Wt, [Xn|SP, R<m>{ext/sh}]
   4948       00 111000001 Rm option S  10 Rn Rt  STRB Wt, [Xn|SP, R<m>{ext/sh}]
   4949    */
   4950    if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
   4951        && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
   4952       HChar  dis_buf[64];
   4953       UInt   szLg2 = INSN(31,30);
   4954       Bool   isLD  = INSN(22,22) == 1;
   4955       UInt   tt    = INSN(4,0);
   4956       IRTemp ea    = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
   4957       if (ea != IRTemp_INVALID) {
   4958          switch (szLg2) {
   4959             case 3: /* 64 bit */
   4960                if (isLD) {
   4961                   putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
   4962                   DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
   4963                } else {
   4964                   storeLE(mkexpr(ea), getIReg64orZR(tt));
   4965                   DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
   4966                }
   4967                break;
   4968             case 2: /* 32 bit */
   4969                if (isLD) {
   4970                   putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
   4971                   DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
   4972                } else {
   4973                   storeLE(mkexpr(ea), getIReg32orZR(tt));
   4974                   DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
   4975                }
   4976                break;
   4977             case 1: /* 16 bit */
   4978                if (isLD) {
   4979                   putIReg64orZR(tt, unop(Iop_16Uto64,
   4980                                          loadLE(Ity_I16, mkexpr(ea))));
   4981                   DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
   4982                } else {
   4983                   storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
   4984                   DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
   4985                }
   4986                break;
   4987             case 0: /* 8 bit */
   4988                if (isLD) {
   4989                   putIReg64orZR(tt, unop(Iop_8Uto64,
   4990                                          loadLE(Ity_I8, mkexpr(ea))));
   4991                   DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
   4992                } else {
   4993                   storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
   4994                   DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
   4995                }
   4996                break;
   4997             default:
   4998                vassert(0);
   4999          }
   5000          return True;
   5001       }
   5002    }
   5003 
   5004    /* -------------- LDRS{B,H,W} (uimm12) -------------- */
   5005    /* 31 29  26  23 21    9 4
   5006       10 111 001 10 imm12 n t   LDRSW Xt, [Xn|SP, #pimm12 * 4]
   5007       01 111 001 1x imm12 n t   LDRSH Rt, [Xn|SP, #pimm12 * 2]
   5008       00 111 001 1x imm12 n t   LDRSB Rt, [Xn|SP, #pimm12 * 1]
   5009       where
   5010          Rt is Wt when x==1, Xt when x==0
   5011    */
   5012    if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
   5013       /* Further checks on bits 31:30 and 22 */
   5014       Bool valid = False;
   5015       switch ((INSN(31,30) << 1) | INSN(22,22)) {
   5016          case BITS3(1,0,0):
   5017          case BITS3(0,1,0): case BITS3(0,1,1):
   5018          case BITS3(0,0,0): case BITS3(0,0,1):
   5019             valid = True;
   5020             break;
   5021       }
   5022       if (valid) {
   5023          UInt    szLg2 = INSN(31,30);
   5024          UInt    bitX  = INSN(22,22);
   5025          UInt    imm12 = INSN(21,10);
   5026          UInt    nn    = INSN(9,5);
   5027          UInt    tt    = INSN(4,0);
   5028          UInt    szB   = 1 << szLg2;
   5029          IRExpr* ea    = binop(Iop_Add64,
   5030                                getIReg64orSP(nn), mkU64(imm12 * szB));
   5031          switch (szB) {
   5032             case 4:
   5033                vassert(bitX == 0);
   5034                putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
   5035                DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
   5036                    nameIReg64orSP(nn), imm12 * szB);
   5037                break;
   5038             case 2:
   5039                if (bitX == 1) {
   5040                   putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
   5041                } else {
   5042                   putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
   5043                }
   5044                DIP("ldrsh %s, [%s, #%u]\n",
   5045                    nameIRegOrZR(bitX == 0, tt),
   5046                    nameIReg64orSP(nn), imm12 * szB);
   5047                break;
   5048             case 1:
   5049                if (bitX == 1) {
   5050                   putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
   5051                } else {
   5052                   putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
   5053                }
   5054                DIP("ldrsb %s, [%s, #%u]\n",
   5055                    nameIRegOrZR(bitX == 0, tt),
   5056                    nameIReg64orSP(nn), imm12 * szB);
   5057                break;
   5058             default:
   5059                vassert(0);
   5060          }
   5061          return True;
   5062       }
   5063       /* else fall through */
   5064    }
   5065 
   5066    /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
   5067    /* (at-Rn-then-Rn=EA)
   5068       31 29      23 21 20   11 9 4
   5069       00 111 000 1x 0  imm9 01 n t  LDRSB Rt, [Xn|SP], #simm9
   5070       01 111 000 1x 0  imm9 01 n t  LDRSH Rt, [Xn|SP], #simm9
   5071       10 111 000 10 0  imm9 01 n t  LDRSW Xt, [Xn|SP], #simm9
   5072 
   5073       (at-EA-then-Rn=EA)
   5074       00 111 000 1x 0  imm9 11 n t  LDRSB Rt, [Xn|SP, #simm9]!
   5075       01 111 000 1x 0  imm9 11 n t  LDRSH Rt, [Xn|SP, #simm9]!
   5076       10 111 000 10 0  imm9 11 n t  LDRSW Xt, [Xn|SP, #simm9]!
   5077       where
   5078          Rt is Wt when x==1, Xt when x==0
   5079          transfer-at-Rn when [11]==0, at EA when [11]==1
   5080    */
   5081    if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
   5082        && INSN(21,21) == 0 && INSN(10,10) == 1) {
   5083       /* Further checks on bits 31:30 and 22 */
   5084       Bool valid = False;
   5085       switch ((INSN(31,30) << 1) | INSN(22,22)) {
   5086          case BITS3(1,0,0):                    // LDRSW Xt
   5087          case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
   5088          case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
   5089             valid = True;
   5090             break;
   5091       }
   5092       if (valid) {
   5093          UInt   szLg2 = INSN(31,30);
   5094          UInt   imm9  = INSN(20,12);
   5095          Bool   atRN  = INSN(11,11) == 0;
   5096          UInt   nn    = INSN(9,5);
   5097          UInt   tt    = INSN(4,0);
   5098          IRTemp tRN   = newTemp(Ity_I64);
   5099          IRTemp tEA   = newTemp(Ity_I64);
   5100          IRTemp tTA   = IRTemp_INVALID;
   5101          ULong  simm9 = sx_to_64(imm9, 9);
   5102          Bool   is64  = INSN(22,22) == 0;
   5103          assign(tRN, getIReg64orSP(nn));
   5104          assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
   5105          tTA = atRN ? tRN : tEA;
   5106          HChar ch = '?';
   5107          /* There are 5 cases:
   5108                byte     load,           SX to 64
   5109                byte     load, SX to 32, ZX to 64
   5110                halfword load,           SX to 64
   5111                halfword load, SX to 32, ZX to 64
   5112                word     load,           SX to 64
   5113             The ifs below handle them in the listed order.
   5114          */
   5115          if (szLg2 == 0) {
   5116             ch = 'b';
   5117             if (is64) {
   5118                putIReg64orZR(tt, unop(Iop_8Sto64,
   5119                                       loadLE(Ity_I8, mkexpr(tTA))));
   5120             } else {
   5121                putIReg32orZR(tt, unop(Iop_8Sto32,
   5122                                       loadLE(Ity_I8, mkexpr(tTA))));
   5123             }
   5124          }
   5125          else if (szLg2 == 1) {
   5126             ch = 'h';
   5127             if (is64) {
   5128                putIReg64orZR(tt, unop(Iop_16Sto64,
   5129                                       loadLE(Ity_I16, mkexpr(tTA))));
   5130             } else {
   5131                putIReg32orZR(tt, unop(Iop_16Sto32,
   5132                                       loadLE(Ity_I16, mkexpr(tTA))));
   5133             }
   5134          }
   5135          else if (szLg2 == 2 && is64) {
   5136             ch = 'w';
   5137             putIReg64orZR(tt, unop(Iop_32Sto64,
   5138                                    loadLE(Ity_I32, mkexpr(tTA))));
   5139          }
   5140          else {
   5141             vassert(0);
   5142          }
   5143          putIReg64orSP(nn, mkexpr(tEA));
   5144          DIP(atRN ? "ldrs%c %s, [%s], #%lld\n" : "ldrs%c %s, [%s, #%lld]!",
   5145              ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
   5146          return True;
   5147       }
   5148       /* else fall through */
   5149    }
   5150 
   5151    /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
   5152    /* 31 29      23 21 20   11 9 4
   5153       00 111 000 1x 0  imm9 00 n t  LDURSB Rt, [Xn|SP, #simm9]
   5154       01 111 000 1x 0  imm9 00 n t  LDURSH Rt, [Xn|SP, #simm9]
   5155       10 111 000 10 0  imm9 00 n t  LDURSW Xt, [Xn|SP, #simm9]
   5156       where
   5157          Rt is Wt when x==1, Xt when x==0
   5158    */
   5159    if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
   5160        && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
   5161       /* Further checks on bits 31:30 and 22 */
   5162       Bool valid = False;
   5163       switch ((INSN(31,30) << 1) | INSN(22,22)) {
   5164          case BITS3(1,0,0):                    // LDURSW Xt
   5165          case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
   5166          case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
   5167             valid = True;
   5168             break;
   5169       }
   5170       if (valid) {
   5171          UInt   szLg2 = INSN(31,30);
   5172          UInt   imm9  = INSN(20,12);
   5173          UInt   nn    = INSN(9,5);
   5174          UInt   tt    = INSN(4,0);
   5175          IRTemp tRN   = newTemp(Ity_I64);
   5176          IRTemp tEA   = newTemp(Ity_I64);
   5177          ULong  simm9 = sx_to_64(imm9, 9);
   5178          Bool   is64  = INSN(22,22) == 0;
   5179          assign(tRN, getIReg64orSP(nn));
   5180          assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
   5181          HChar ch = '?';
   5182          /* There are 5 cases:
   5183                byte     load,           SX to 64
   5184                byte     load, SX to 32, ZX to 64
   5185                halfword load,           SX to 64
   5186                halfword load, SX to 32, ZX to 64
   5187                word     load,           SX to 64
   5188             The ifs below handle them in the listed order.
   5189          */
   5190          if (szLg2 == 0) {
   5191             ch = 'b';
   5192             if (is64) {
   5193                putIReg64orZR(tt, unop(Iop_8Sto64,
   5194                                       loadLE(Ity_I8, mkexpr(tEA))));
   5195             } else {
   5196                putIReg32orZR(tt, unop(Iop_8Sto32,
   5197                                       loadLE(Ity_I8, mkexpr(tEA))));
   5198             }
   5199          }
   5200          else if (szLg2 == 1) {
   5201             ch = 'h';
   5202             if (is64) {
   5203                putIReg64orZR(tt, unop(Iop_16Sto64,
   5204                                       loadLE(Ity_I16, mkexpr(tEA))));
   5205             } else {
   5206                putIReg32orZR(tt, unop(Iop_16Sto32,
   5207                                       loadLE(Ity_I16, mkexpr(tEA))));
   5208             }
   5209          }
   5210          else if (szLg2 == 2 && is64) {
   5211             ch = 'w';
   5212             putIReg64orZR(tt, unop(Iop_32Sto64,
   5213                                    loadLE(Ity_I32, mkexpr(tEA))));
   5214          }
   5215          else {
   5216             vassert(0);
   5217          }
   5218          DIP("ldurs%c %s, [%s, #%lld]",
   5219              ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
   5220          return True;
   5221       }
   5222       /* else fall through */
   5223    }
   5224 
   5225    /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
   5226    /* L==1    => mm==LD
   5227       L==0    => mm==ST
   5228       sz==00  => 32 bit (S) transfers
   5229       sz==01  => 64 bit (D) transfers
   5230       sz==10  => 128 bit (Q) transfers
   5231       sz==11  isn't allowed
   5232       simm7 is scaled by the (single-register) transfer size
   5233 
   5234       31 29  26   22 21   14 9 4
   5235 
   5236       sz 101 1000 L  imm7 t2 n t1   mmNP SDQt1, SDQt2, [Xn|SP, #imm]
   5237                                     (at-EA, with nontemporal hint)
   5238 
   5239       sz 101 1001 L  imm7 t2 n t1   mmP SDQt1, SDQt2, [Xn|SP], #imm
   5240                                     (at-Rn-then-Rn=EA)
   5241 
   5242       sz 101 1010 L  imm7 t2 n t1   mmP SDQt1, SDQt2, [Xn|SP, #imm]
   5243                                     (at-EA)
   5244 
   5245       sz 101 1011 L  imm7 t2 n t1   mmP SDQt1, SDQt2, [Xn|SP, #imm]!
   5246                                     (at-EA-then-Rn=EA)
   5247    */
   5248    if (INSN(29,25) == BITS5(1,0,1,1,0)) {
   5249       UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
   5250       Bool isLD   = INSN(22,22) == 1;
   5251       Bool wBack  = INSN(23,23) == 1;
   5252       Long simm7  = (Long)sx_to_64(INSN(21,15), 7);
   5253       UInt tt2    = INSN(14,10);
   5254       UInt nn     = INSN(9,5);
   5255       UInt tt1    = INSN(4,0);
   5256       if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) {
   5257          /* undecodable; fall through */
   5258       } else {
   5259          if (nn == 31) { /* FIXME generate stack alignment check */ }
   5260 
   5261          // Compute the transfer address TA and the writeback address WA.
   5262          UInt   szB = 4 << szSlg2; /* szB is the per-register size */
   5263          IRTemp tRN = newTemp(Ity_I64);
   5264          assign(tRN, getIReg64orSP(nn));
   5265          IRTemp tEA = newTemp(Ity_I64);
   5266          simm7 = szB * simm7;
   5267          assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
   5268 
   5269          IRTemp tTA = newTemp(Ity_I64);
   5270          IRTemp tWA = newTemp(Ity_I64);
   5271          switch (INSN(24,23)) {
   5272             case BITS2(0,1):
   5273                assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
   5274             case BITS2(1,1):
   5275                assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
   5276             case BITS2(1,0):
   5277             case BITS2(0,0):
   5278                assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
   5279             default:
   5280                vassert(0); /* NOTREACHED */
   5281          }
   5282 
   5283          IRType ty = Ity_INVALID;
   5284          switch (szB) {
   5285             case 4:  ty = Ity_F32;  break;
   5286             case 8:  ty = Ity_F64;  break;
   5287             case 16: ty = Ity_V128; break;
   5288             default: vassert(0);
   5289          }
   5290 
   5291          /* Normally rN would be updated after the transfer.  However, in
   5292             the special cases typifed by
   5293                stp q0, q1, [sp,#-512]!
   5294                stp d0, d1, [sp,#-512]!
   5295                stp s0, s1, [sp,#-512]!
   5296             it is necessary to update SP before the transfer, (1)
   5297             because Memcheck will otherwise complain about a write
   5298             below the stack pointer, and (2) because the segfault
   5299             stack extension mechanism will otherwise extend the stack
   5300             only down to SP before the instruction, which might not be
   5301             far enough, if the -512 bit takes the actual access
   5302             address to the next page.
   5303          */
   5304          Bool earlyWBack
   5305            = wBack && simm7 < 0
   5306              && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
   5307 
   5308          if (wBack && earlyWBack)
   5309             putIReg64orSP(nn, mkexpr(tEA));
   5310 
   5311          if (isLD) {
   5312             if (szB < 16) {
   5313                putQReg128(tt1, mkV128(0x0000));
   5314             }
   5315             putQRegLO(tt1,
   5316                       loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
   5317             if (szB < 16) {
   5318                putQReg128(tt2, mkV128(0x0000));
   5319             }
   5320             putQRegLO(tt2,
   5321                       loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
   5322          } else {
   5323             storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
   5324                     getQRegLO(tt1, ty));
   5325             storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
   5326                     getQRegLO(tt2, ty));
   5327          }
   5328 
   5329          if (wBack && !earlyWBack)
   5330             putIReg64orSP(nn, mkexpr(tEA));
   5331 
   5332          const HChar* fmt_str = NULL;
   5333          switch (INSN(24,23)) {
   5334             case BITS2(0,1):
   5335                fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
   5336                break;
   5337             case BITS2(1,1):
   5338                fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
   5339                break;
   5340             case BITS2(1,0):
   5341                fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
   5342                break;
   5343             case BITS2(0,0):
   5344                fmt_str = "%snp %s, %s, [%s, #%lld] (at-Rn)\n";
   5345                break;
   5346             default:
   5347                vassert(0);
   5348          }
   5349          DIP(fmt_str, isLD ? "ld" : "st",
   5350                       nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
   5351                       nameIReg64orSP(nn), simm7);
   5352          return True;
   5353       }
   5354    }
   5355 
   5356    /* -------------- {LD,ST}R (vector register) --------------- */
   5357    /* 31 29     23  20 15     12 11 9  4
   5358       |  |      |   |  |      |  |  |  |
   5359       00 111100 011 Rm option S  10 Rn Rt  LDR Bt, [Xn|SP, R<m>{ext/sh}]
   5360       01 111100 011 Rm option S  10 Rn Rt  LDR Ht, [Xn|SP, R<m>{ext/sh}]
   5361       10 111100 011 Rm option S  10 Rn Rt  LDR St, [Xn|SP, R<m>{ext/sh}]
   5362       11 111100 011 Rm option S  10 Rn Rt  LDR Dt, [Xn|SP, R<m>{ext/sh}]
   5363       00 111100 111 Rm option S  10 Rn Rt  LDR Qt, [Xn|SP, R<m>{ext/sh}]
   5364 
   5365       00 111100 001 Rm option S  10 Rn Rt  STR Bt, [Xn|SP, R<m>{ext/sh}]
   5366       01 111100 001 Rm option S  10 Rn Rt  STR Ht, [Xn|SP, R<m>{ext/sh}]
   5367       10 111100 001 Rm option S  10 Rn Rt  STR St, [Xn|SP, R<m>{ext/sh}]
   5368       11 111100 001 Rm option S  10 Rn Rt  STR Dt, [Xn|SP, R<m>{ext/sh}]
   5369       00 111100 101 Rm option S  10 Rn Rt  STR Qt, [Xn|SP, R<m>{ext/sh}]
   5370    */
   5371    if (INSN(29,24) == BITS6(1,1,1,1,0,0)
   5372        && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
   5373       HChar  dis_buf[64];
   5374       UInt   szLg2 = (INSN(23,23) << 2) | INSN(31,30);
   5375       Bool   isLD  = INSN(22,22) == 1;
   5376       UInt   tt    = INSN(4,0);
   5377       if (szLg2 > 4) goto after_LDR_STR_vector_register;
   5378       IRTemp ea    = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/);
   5379       if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
   5380       switch (szLg2) {
   5381          case 0: /* 8 bit */
   5382             if (isLD) {
   5383                putQReg128(tt, mkV128(0x0000));
   5384                putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
   5385                DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
   5386             } else {
   5387                storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
   5388                DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
   5389             }
   5390             break;
   5391          case 1:
   5392             if (isLD) {
   5393                putQReg128(tt, mkV128(0x0000));
   5394                putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
   5395                DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
   5396             } else {
   5397                storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
   5398                DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
   5399             }
   5400             break;
   5401          case 2: /* 32 bit */
   5402             if (isLD) {
   5403                putQReg128(tt, mkV128(0x0000));
   5404                putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
   5405                DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
   5406             } else {
   5407                storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
   5408                DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
   5409             }
   5410             break;
   5411          case 3: /* 64 bit */
   5412             if (isLD) {
   5413                putQReg128(tt, mkV128(0x0000));
   5414                putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
   5415                DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
   5416             } else {
   5417                storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
   5418                DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
   5419             }
   5420             break;
   5421          case 4:
   5422             if (isLD) {
   5423                putQReg128(tt, loadLE(Ity_V128, mkexpr(ea)));
   5424                DIP("ldr %s, %s\n", nameQReg128(tt), dis_buf);
   5425             } else {
   5426                storeLE(mkexpr(ea), getQReg128(tt));
   5427                DIP("str %s, %s\n", nameQReg128(tt), dis_buf);
   5428             }
   5429             break;
   5430          default:
   5431             vassert(0);
   5432       }
   5433       return True;
   5434    }
   5435   after_LDR_STR_vector_register:
   5436 
   5437    /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
   5438    /* 31 29      22 20 15  12 11 9  4
   5439       |  |       |  |  |   |  |  |  |
   5440       10 1110001 01 Rm opt S 10 Rn Rt    LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
   5441 
   5442       01 1110001 01 Rm opt S 10 Rn Rt    LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
   5443       01 1110001 11 Rm opt S 10 Rn Rt    LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
   5444 
   5445       00 1110001 01 Rm opt S 10 Rn Rt    LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
   5446       00 1110001 11 Rm opt S 10 Rn Rt    LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
   5447    */
   5448    if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
   5449        && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
   5450       HChar  dis_buf[64];
   5451       UInt   szLg2  = INSN(31,30);
   5452       Bool   sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
   5453       UInt   tt     = INSN(4,0);
   5454       if (szLg2 == 3) goto after_LDRS_integer_register;
   5455       IRTemp ea     = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
   5456       if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
   5457       /* Enumerate the 5 variants explicitly. */
   5458       if (