Home | History | Annotate | Download | only in priv
      1 /* -*- mode: C; c-basic-offset: 3; -*- */
      2 
      3 /*--------------------------------------------------------------------*/
      4 /*--- begin                                     guest_arm64_toIR.c ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Valgrind, a dynamic binary instrumentation
      9    framework.
     10 
     11    Copyright (C) 2013-2013 OpenWorks
     12       info (at) open-works.net
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     27    02110-1301, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 */
     31 
     32 /* KNOWN LIMITATIONS 2014-Nov-16
     33 
     34    * Correctness: FMAXNM, FMINNM are implemented the same as FMAX/FMIN.
     35 
     36      Also FP comparison "unordered" .. is implemented as normal FP
     37      comparison.
     38 
     39      Both should be fixed.  They behave incorrectly in the presence of
     40      NaNs.
     41 
     42      FMULX is treated the same as FMUL.  That's also not correct.
     43 
     44    * Floating multiply-add (etc) insns.  Are split into a multiply and
     45      an add, and so suffer double rounding and hence sometimes the
     46      least significant mantissa bit is incorrect.  Fix: use the IR
     47      multiply-add IROps instead.
     48 
     49    * FRINTA, FRINTN are kludged .. they just round to nearest.  No special
     50      handling for the "ties" case.  FRINTX might be dubious too.
     51 
     52    * Ditto FCVTXN.  No idea what "round to odd" means.  This implementation
     53      just rounds to nearest.
     54 */
     55 
     56 /* "Special" instructions.
     57 
     58    This instruction decoder can decode four special instructions
     59    which mean nothing natively (are no-ops as far as regs/mem are
     60    concerned) but have meaning for supporting Valgrind.  A special
     61    instruction is flagged by a 16-byte preamble:
     62 
     63       93CC0D8C 93CC358C 93CCCD8C 93CCF58C
     64       (ror x12, x12, #3;   ror x12, x12, #13
     65        ror x12, x12, #51;  ror x12, x12, #61)
     66 
     67    Following that, one of the following 3 are allowed
     68    (standard interpretation in parentheses):
     69 
     70       AA0A014A (orr x10,x10,x10)   X3 = client_request ( X4 )
     71       AA0B016B (orr x11,x11,x11)   X3 = guest_NRADDR
     72       AA0C018C (orr x12,x12,x12)   branch-and-link-to-noredir X8
     73       AA090129 (orr x9,x9,x9)      IR injection
     74 
     75    Any other bytes following the 16-byte preamble are illegal and
     76    constitute a failure in instruction decoding.  This all assumes
     77    that the preamble will never occur except in specific code
     78    fragments designed for Valgrind to catch.
     79 */
     80 
     81 /* Translates ARM64 code to IR. */
     82 
     83 #include "libvex_basictypes.h"
     84 #include "libvex_ir.h"
     85 #include "libvex.h"
     86 #include "libvex_guest_arm64.h"
     87 
     88 #include "main_util.h"
     89 #include "main_globals.h"
     90 #include "guest_generic_bb_to_IR.h"
     91 #include "guest_arm64_defs.h"
     92 
     93 
     94 /*------------------------------------------------------------*/
     95 /*--- Globals                                              ---*/
     96 /*------------------------------------------------------------*/
     97 
     98 /* These are set at the start of the translation of a instruction, so
     99    that we don't have to pass them around endlessly.  CONST means does
    100    not change during translation of the instruction.
    101 */
    102 
    103 /* CONST: what is the host's endianness?  We need to know this in
    104    order to do sub-register accesses to the SIMD/FP registers
    105    correctly. */
    106 static VexEndness host_endness;
    107 
    108 /* CONST: The guest address for the instruction currently being
    109    translated.  */
    110 static Addr64 guest_PC_curr_instr;
    111 
    112 /* MOD: The IRSB* into which we're generating code. */
    113 static IRSB* irsb;
    114 
    115 
    116 /*------------------------------------------------------------*/
    117 /*--- Debugging output                                     ---*/
    118 /*------------------------------------------------------------*/
    119 
    120 #define DIP(format, args...)           \
    121    if (vex_traceflags & VEX_TRACE_FE)  \
    122       vex_printf(format, ## args)
    123 
    124 #define DIS(buf, format, args...)      \
    125    if (vex_traceflags & VEX_TRACE_FE)  \
    126       vex_sprintf(buf, format, ## args)
    127 
    128 
    129 /*------------------------------------------------------------*/
    130 /*--- Helper bits and pieces for deconstructing the        ---*/
    131 /*--- arm insn stream.                                     ---*/
    132 /*------------------------------------------------------------*/
    133 
    134 /* Do a little-endian load of a 32-bit word, regardless of the
    135    endianness of the underlying host. */
    136 static inline UInt getUIntLittleEndianly ( const UChar* p )
    137 {
    138    UInt w = 0;
    139    w = (w << 8) | p[3];
    140    w = (w << 8) | p[2];
    141    w = (w << 8) | p[1];
    142    w = (w << 8) | p[0];
    143    return w;
    144 }
    145 
    146 /* Sign extend a N-bit value up to 64 bits, by copying
    147    bit N-1 into all higher positions. */
    148 static ULong sx_to_64 ( ULong x, UInt n )
    149 {
    150    vassert(n > 1 && n < 64);
    151    Long r = (Long)x;
    152    r = (r << (64-n)) >> (64-n);
    153    return (ULong)r;
    154 }
    155 
    156 //ZZ /* Do a little-endian load of a 16-bit word, regardless of the
    157 //ZZ    endianness of the underlying host. */
    158 //ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
    159 //ZZ {
    160 //ZZ    UShort w = 0;
    161 //ZZ    w = (w << 8) | p[1];
    162 //ZZ    w = (w << 8) | p[0];
    163 //ZZ    return w;
    164 //ZZ }
    165 //ZZ
    166 //ZZ static UInt ROR32 ( UInt x, UInt sh ) {
    167 //ZZ    vassert(sh >= 0 && sh < 32);
    168 //ZZ    if (sh == 0)
    169 //ZZ       return x;
    170 //ZZ    else
    171 //ZZ       return (x << (32-sh)) | (x >> sh);
    172 //ZZ }
    173 //ZZ
    174 //ZZ static Int popcount32 ( UInt x )
    175 //ZZ {
    176 //ZZ    Int res = 0, i;
    177 //ZZ    for (i = 0; i < 32; i++) {
    178 //ZZ       res += (x & 1);
    179 //ZZ       x >>= 1;
    180 //ZZ    }
    181 //ZZ    return res;
    182 //ZZ }
    183 //ZZ
    184 //ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
    185 //ZZ {
    186 //ZZ    UInt mask = 1 << ix;
    187 //ZZ    x &= ~mask;
    188 //ZZ    x |= ((b << ix) & mask);
    189 //ZZ    return x;
    190 //ZZ }
    191 
    192 #define BITS2(_b1,_b0)  \
    193    (((_b1) << 1) | (_b0))
    194 
    195 #define BITS3(_b2,_b1,_b0)  \
    196   (((_b2) << 2) | ((_b1) << 1) | (_b0))
    197 
    198 #define BITS4(_b3,_b2,_b1,_b0)  \
    199    (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
    200 
    201 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    202    ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4)  \
    203     | BITS4((_b3),(_b2),(_b1),(_b0)))
    204 
    205 #define BITS5(_b4,_b3,_b2,_b1,_b0)  \
    206    (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
    207 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0)  \
    208    (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    209 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    210    (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    211 
    212 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    213    (((_b8) << 8)  \
    214     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    215 
    216 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    217    (((_b9) << 9) | ((_b8) << 8)  \
    218     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
    219 
    220 #define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
    221    (((_b10) << 10)  \
    222     | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
    223 
    224 #define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
    225    (((_b11) << 11)  \
    226     | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
    227 
    228 #define X00 BITS2(0,0)
    229 #define X01 BITS2(0,1)
    230 #define X10 BITS2(1,0)
    231 #define X11 BITS2(1,1)
    232 
    233 // produces _uint[_bMax:_bMin]
    234 #define SLICE_UInt(_uint,_bMax,_bMin)  \
    235    (( ((UInt)(_uint)) >> (_bMin))  \
    236     & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
    237 
    238 
    239 /*------------------------------------------------------------*/
    240 /*--- Helper bits and pieces for creating IR fragments.    ---*/
    241 /*------------------------------------------------------------*/
    242 
    243 static IRExpr* mkV128 ( UShort w )
    244 {
    245    return IRExpr_Const(IRConst_V128(w));
    246 }
    247 
    248 static IRExpr* mkU64 ( ULong i )
    249 {
    250    return IRExpr_Const(IRConst_U64(i));
    251 }
    252 
    253 static IRExpr* mkU32 ( UInt i )
    254 {
    255    return IRExpr_Const(IRConst_U32(i));
    256 }
    257 
    258 static IRExpr* mkU16 ( UInt i )
    259 {
    260    vassert(i < 65536);
    261    return IRExpr_Const(IRConst_U16(i));
    262 }
    263 
    264 static IRExpr* mkU8 ( UInt i )
    265 {
    266    vassert(i < 256);
    267    return IRExpr_Const(IRConst_U8( (UChar)i ));
    268 }
    269 
    270 static IRExpr* mkexpr ( IRTemp tmp )
    271 {
    272    return IRExpr_RdTmp(tmp);
    273 }
    274 
    275 static IRExpr* unop ( IROp op, IRExpr* a )
    276 {
    277    return IRExpr_Unop(op, a);
    278 }
    279 
    280 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
    281 {
    282    return IRExpr_Binop(op, a1, a2);
    283 }
    284 
    285 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
    286 {
    287    return IRExpr_Triop(op, a1, a2, a3);
    288 }
    289 
    290 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
    291 {
    292    return IRExpr_Load(Iend_LE, ty, addr);
    293 }
    294 
    295 /* Add a statement to the list held by "irbb". */
    296 static void stmt ( IRStmt* st )
    297 {
    298    addStmtToIRSB( irsb, st );
    299 }
    300 
    301 static void assign ( IRTemp dst, IRExpr* e )
    302 {
    303    stmt( IRStmt_WrTmp(dst, e) );
    304 }
    305 
    306 static void storeLE ( IRExpr* addr, IRExpr* data )
    307 {
    308    stmt( IRStmt_Store(Iend_LE, addr, data) );
    309 }
    310 
    311 //ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
    312 //ZZ {
    313 //ZZ    if (guardT == IRTemp_INVALID) {
    314 //ZZ       /* unconditional */
    315 //ZZ       storeLE(addr, data);
    316 //ZZ    } else {
    317 //ZZ       stmt( IRStmt_StoreG(Iend_LE, addr, data,
    318 //ZZ                           binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
    319 //ZZ    }
    320 //ZZ }
    321 //ZZ
    322 //ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
    323 //ZZ                             IRExpr* addr, IRExpr* alt,
    324 //ZZ                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
    325 //ZZ {
    326 //ZZ    if (guardT == IRTemp_INVALID) {
    327 //ZZ       /* unconditional */
    328 //ZZ       IRExpr* loaded = NULL;
    329 //ZZ       switch (cvt) {
    330 //ZZ          case ILGop_Ident32:
    331 //ZZ             loaded = loadLE(Ity_I32, addr); break;
    332 //ZZ          case ILGop_8Uto32:
    333 //ZZ             loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
    334 //ZZ          case ILGop_8Sto32:
    335 //ZZ             loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
    336 //ZZ          case ILGop_16Uto32:
    337 //ZZ             loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
    338 //ZZ          case ILGop_16Sto32:
    339 //ZZ             loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
    340 //ZZ          default:
    341 //ZZ             vassert(0);
    342 //ZZ       }
    343 //ZZ       vassert(loaded != NULL);
    344 //ZZ       assign(dst, loaded);
    345 //ZZ    } else {
    346 //ZZ       /* Generate a guarded load into 'dst', but apply 'cvt' to the
    347 //ZZ          loaded data before putting the data in 'dst'.  If the load
    348 //ZZ          does not take place, 'alt' is placed directly in 'dst'. */
    349 //ZZ       stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
    350 //ZZ                          binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
    351 //ZZ    }
    352 //ZZ }
    353 
    354 /* Generate a new temporary of the given type. */
    355 static IRTemp newTemp ( IRType ty )
    356 {
    357    vassert(isPlausibleIRType(ty));
    358    return newIRTemp( irsb->tyenv, ty );
    359 }
    360 
    361 /* This is used in many places, so the brevity is an advantage. */
    362 static IRTemp newTempV128(void)
    363 {
    364    return newTemp(Ity_V128);
    365 }
    366 
    367 /* Initialise V128 temporaries en masse. */
    368 static
    369 void newTempsV128_2(IRTemp* t1, IRTemp* t2)
    370 {
    371    vassert(t1 && *t1 == IRTemp_INVALID);
    372    vassert(t2 && *t2 == IRTemp_INVALID);
    373    *t1 = newTempV128();
    374    *t2 = newTempV128();
    375 }
    376 
    377 static
    378 void newTempsV128_3(IRTemp* t1, IRTemp* t2, IRTemp* t3)
    379 {
    380    vassert(t1 && *t1 == IRTemp_INVALID);
    381    vassert(t2 && *t2 == IRTemp_INVALID);
    382    vassert(t3 && *t3 == IRTemp_INVALID);
    383    *t1 = newTempV128();
    384    *t2 = newTempV128();
    385    *t3 = newTempV128();
    386 }
    387 
    388 static
    389 void newTempsV128_4(IRTemp* t1, IRTemp* t2, IRTemp* t3, IRTemp* t4)
    390 {
    391    vassert(t1 && *t1 == IRTemp_INVALID);
    392    vassert(t2 && *t2 == IRTemp_INVALID);
    393    vassert(t3 && *t3 == IRTemp_INVALID);
    394    vassert(t4 && *t4 == IRTemp_INVALID);
    395    *t1 = newTempV128();
    396    *t2 = newTempV128();
    397    *t3 = newTempV128();
    398    *t4 = newTempV128();
    399 }
    400 
    401 static
    402 void newTempsV128_7(IRTemp* t1, IRTemp* t2, IRTemp* t3,
    403                     IRTemp* t4, IRTemp* t5, IRTemp* t6, IRTemp* t7)
    404 {
    405    vassert(t1 && *t1 == IRTemp_INVALID);
    406    vassert(t2 && *t2 == IRTemp_INVALID);
    407    vassert(t3 && *t3 == IRTemp_INVALID);
    408    vassert(t4 && *t4 == IRTemp_INVALID);
    409    vassert(t5 && *t5 == IRTemp_INVALID);
    410    vassert(t6 && *t6 == IRTemp_INVALID);
    411    vassert(t7 && *t7 == IRTemp_INVALID);
    412    *t1 = newTempV128();
    413    *t2 = newTempV128();
    414    *t3 = newTempV128();
    415    *t4 = newTempV128();
    416    *t5 = newTempV128();
    417    *t6 = newTempV128();
    418    *t7 = newTempV128();
    419 }
    420 
    421 //ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
    422 //ZZ    IRRoundingMode. */
    423 //ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
    424 //ZZ {
    425 //ZZ    return mkU32(Irrm_NEAREST);
    426 //ZZ }
    427 //ZZ
    428 //ZZ /* Generate an expression for SRC rotated right by ROT. */
    429 //ZZ static IRExpr* genROR32( IRTemp src, Int rot )
    430 //ZZ {
    431 //ZZ    vassert(rot >= 0 && rot < 32);
    432 //ZZ    if (rot == 0)
    433 //ZZ       return mkexpr(src);
    434 //ZZ    return
    435 //ZZ       binop(Iop_Or32,
    436 //ZZ             binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
    437 //ZZ             binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
    438 //ZZ }
    439 //ZZ
    440 //ZZ static IRExpr* mkU128 ( ULong i )
    441 //ZZ {
    442 //ZZ    return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
    443 //ZZ }
    444 //ZZ
    445 //ZZ /* Generate a 4-aligned version of the given expression if
    446 //ZZ    the given condition is true.  Else return it unchanged. */
    447 //ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
    448 //ZZ {
    449 //ZZ    if (b)
    450 //ZZ       return binop(Iop_And32, e, mkU32(~3));
    451 //ZZ    else
    452 //ZZ       return e;
    453 //ZZ }
    454 
    455 /* Other IR construction helpers. */
    456 static IROp mkAND ( IRType ty ) {
    457    switch (ty) {
    458       case Ity_I32: return Iop_And32;
    459       case Ity_I64: return Iop_And64;
    460       default: vpanic("mkAND");
    461    }
    462 }
    463 
    464 static IROp mkOR ( IRType ty ) {
    465    switch (ty) {
    466       case Ity_I32: return Iop_Or32;
    467       case Ity_I64: return Iop_Or64;
    468       default: vpanic("mkOR");
    469    }
    470 }
    471 
    472 static IROp mkXOR ( IRType ty ) {
    473    switch (ty) {
    474       case Ity_I32: return Iop_Xor32;
    475       case Ity_I64: return Iop_Xor64;
    476       default: vpanic("mkXOR");
    477    }
    478 }
    479 
    480 static IROp mkSHL ( IRType ty ) {
    481    switch (ty) {
    482       case Ity_I32: return Iop_Shl32;
    483       case Ity_I64: return Iop_Shl64;
    484       default: vpanic("mkSHL");
    485    }
    486 }
    487 
    488 static IROp mkSHR ( IRType ty ) {
    489    switch (ty) {
    490       case Ity_I32: return Iop_Shr32;
    491       case Ity_I64: return Iop_Shr64;
    492       default: vpanic("mkSHR");
    493    }
    494 }
    495 
    496 static IROp mkSAR ( IRType ty ) {
    497    switch (ty) {
    498       case Ity_I32: return Iop_Sar32;
    499       case Ity_I64: return Iop_Sar64;
    500       default: vpanic("mkSAR");
    501    }
    502 }
    503 
    504 static IROp mkNOT ( IRType ty ) {
    505    switch (ty) {
    506       case Ity_I32: return Iop_Not32;
    507       case Ity_I64: return Iop_Not64;
    508       default: vpanic("mkNOT");
    509    }
    510 }
    511 
    512 static IROp mkADD ( IRType ty ) {
    513    switch (ty) {
    514       case Ity_I32: return Iop_Add32;
    515       case Ity_I64: return Iop_Add64;
    516       default: vpanic("mkADD");
    517    }
    518 }
    519 
    520 static IROp mkSUB ( IRType ty ) {
    521    switch (ty) {
    522       case Ity_I32: return Iop_Sub32;
    523       case Ity_I64: return Iop_Sub64;
    524       default: vpanic("mkSUB");
    525    }
    526 }
    527 
    528 static IROp mkADDF ( IRType ty ) {
    529    switch (ty) {
    530       case Ity_F32: return Iop_AddF32;
    531       case Ity_F64: return Iop_AddF64;
    532       default: vpanic("mkADDF");
    533    }
    534 }
    535 
    536 static IROp mkSUBF ( IRType ty ) {
    537    switch (ty) {
    538       case Ity_F32: return Iop_SubF32;
    539       case Ity_F64: return Iop_SubF64;
    540       default: vpanic("mkSUBF");
    541    }
    542 }
    543 
    544 static IROp mkMULF ( IRType ty ) {
    545    switch (ty) {
    546       case Ity_F32: return Iop_MulF32;
    547       case Ity_F64: return Iop_MulF64;
    548       default: vpanic("mkMULF");
    549    }
    550 }
    551 
    552 static IROp mkDIVF ( IRType ty ) {
    553    switch (ty) {
    554       case Ity_F32: return Iop_DivF32;
    555       case Ity_F64: return Iop_DivF64;
    556       default: vpanic("mkMULF");
    557    }
    558 }
    559 
    560 static IROp mkNEGF ( IRType ty ) {
    561    switch (ty) {
    562       case Ity_F32: return Iop_NegF32;
    563       case Ity_F64: return Iop_NegF64;
    564       default: vpanic("mkNEGF");
    565    }
    566 }
    567 
    568 static IROp mkABSF ( IRType ty ) {
    569    switch (ty) {
    570       case Ity_F32: return Iop_AbsF32;
    571       case Ity_F64: return Iop_AbsF64;
    572       default: vpanic("mkNEGF");
    573    }
    574 }
    575 
    576 static IROp mkSQRTF ( IRType ty ) {
    577    switch (ty) {
    578       case Ity_F32: return Iop_SqrtF32;
    579       case Ity_F64: return Iop_SqrtF64;
    580       default: vpanic("mkNEGF");
    581    }
    582 }
    583 
    584 static IROp mkVecADD ( UInt size ) {
    585    const IROp ops[4]
    586       = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
    587    vassert(size < 4);
    588    return ops[size];
    589 }
    590 
    591 static IROp mkVecQADDU ( UInt size ) {
    592    const IROp ops[4]
    593       = { Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2 };
    594    vassert(size < 4);
    595    return ops[size];
    596 }
    597 
    598 static IROp mkVecQADDS ( UInt size ) {
    599    const IROp ops[4]
    600       = { Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2 };
    601    vassert(size < 4);
    602    return ops[size];
    603 }
    604 
    605 static IROp mkVecQADDEXTSUSATUU ( UInt size ) {
    606    const IROp ops[4]
    607       = { Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8,
    608           Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2 };
    609    vassert(size < 4);
    610    return ops[size];
    611 }
    612 
    613 static IROp mkVecQADDEXTUSSATSS ( UInt size ) {
    614    const IROp ops[4]
    615       = { Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8,
    616           Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2 };
    617    vassert(size < 4);
    618    return ops[size];
    619 }
    620 
    621 static IROp mkVecSUB ( UInt size ) {
    622    const IROp ops[4]
    623       = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
    624    vassert(size < 4);
    625    return ops[size];
    626 }
    627 
    628 static IROp mkVecQSUBU ( UInt size ) {
    629    const IROp ops[4]
    630       = { Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2 };
    631    vassert(size < 4);
    632    return ops[size];
    633 }
    634 
    635 static IROp mkVecQSUBS ( UInt size ) {
    636    const IROp ops[4]
    637       = { Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2 };
    638    vassert(size < 4);
    639    return ops[size];
    640 }
    641 
    642 static IROp mkVecSARN ( UInt size ) {
    643    const IROp ops[4]
    644       = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
    645    vassert(size < 4);
    646    return ops[size];
    647 }
    648 
    649 static IROp mkVecSHRN ( UInt size ) {
    650    const IROp ops[4]
    651       = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
    652    vassert(size < 4);
    653    return ops[size];
    654 }
    655 
    656 static IROp mkVecSHLN ( UInt size ) {
    657    const IROp ops[4]
    658       = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
    659    vassert(size < 4);
    660    return ops[size];
    661 }
    662 
    663 static IROp mkVecCATEVENLANES ( UInt size ) {
    664    const IROp ops[4]
    665       = { Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8,
    666           Iop_CatEvenLanes32x4, Iop_InterleaveLO64x2 };
    667    vassert(size < 4);
    668    return ops[size];
    669 }
    670 
    671 static IROp mkVecCATODDLANES ( UInt size ) {
    672    const IROp ops[4]
    673       = { Iop_CatOddLanes8x16, Iop_CatOddLanes16x8,
    674           Iop_CatOddLanes32x4, Iop_InterleaveHI64x2 };
    675    vassert(size < 4);
    676    return ops[size];
    677 }
    678 
    679 static IROp mkVecINTERLEAVELO ( UInt size ) {
    680    const IROp ops[4]
    681       = { Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
    682           Iop_InterleaveLO32x4, Iop_InterleaveLO64x2 };
    683    vassert(size < 4);
    684    return ops[size];
    685 }
    686 
    687 static IROp mkVecINTERLEAVEHI ( UInt size ) {
    688    const IROp ops[4]
    689       = { Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
    690           Iop_InterleaveHI32x4, Iop_InterleaveHI64x2 };
    691    vassert(size < 4);
    692    return ops[size];
    693 }
    694 
    695 static IROp mkVecMAXU ( UInt size ) {
    696    const IROp ops[4]
    697       = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
    698    vassert(size < 4);
    699    return ops[size];
    700 }
    701 
    702 static IROp mkVecMAXS ( UInt size ) {
    703    const IROp ops[4]
    704       = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
    705    vassert(size < 4);
    706    return ops[size];
    707 }
    708 
    709 static IROp mkVecMINU ( UInt size ) {
    710    const IROp ops[4]
    711       = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
    712    vassert(size < 4);
    713    return ops[size];
    714 }
    715 
    716 static IROp mkVecMINS ( UInt size ) {
    717    const IROp ops[4]
    718       = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
    719    vassert(size < 4);
    720    return ops[size];
    721 }
    722 
    723 static IROp mkVecMUL ( UInt size ) {
    724    const IROp ops[4]
    725       = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
    726    vassert(size < 3);
    727    return ops[size];
    728 }
    729 
    730 static IROp mkVecMULLU ( UInt sizeNarrow ) {
    731    const IROp ops[4]
    732       = { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2, Iop_INVALID };
    733    vassert(sizeNarrow < 3);
    734    return ops[sizeNarrow];
    735 }
    736 
    737 static IROp mkVecMULLS ( UInt sizeNarrow ) {
    738    const IROp ops[4]
    739       = { Iop_Mull8Sx8, Iop_Mull16Sx4, Iop_Mull32Sx2, Iop_INVALID };
    740    vassert(sizeNarrow < 3);
    741    return ops[sizeNarrow];
    742 }
    743 
    744 static IROp mkVecQDMULLS ( UInt sizeNarrow ) {
    745    const IROp ops[4]
    746       = { Iop_INVALID, Iop_QDMull16Sx4, Iop_QDMull32Sx2, Iop_INVALID };
    747    vassert(sizeNarrow < 3);
    748    return ops[sizeNarrow];
    749 }
    750 
    751 static IROp mkVecCMPEQ ( UInt size ) {
    752    const IROp ops[4]
    753       = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 };
    754    vassert(size < 4);
    755    return ops[size];
    756 }
    757 
    758 static IROp mkVecCMPGTU ( UInt size ) {
    759    const IROp ops[4]
    760       = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
    761    vassert(size < 4);
    762    return ops[size];
    763 }
    764 
    765 static IROp mkVecCMPGTS ( UInt size ) {
    766    const IROp ops[4]
    767       = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
    768    vassert(size < 4);
    769    return ops[size];
    770 }
    771 
    772 static IROp mkVecABS ( UInt size ) {
    773    const IROp ops[4]
    774       = { Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2 };
    775    vassert(size < 4);
    776    return ops[size];
    777 }
    778 
    779 static IROp mkVecZEROHIxxOFV128 ( UInt size ) {
    780    const IROp ops[4]
    781       = { Iop_ZeroHI120ofV128, Iop_ZeroHI112ofV128,
    782           Iop_ZeroHI96ofV128,  Iop_ZeroHI64ofV128 };
    783    vassert(size < 4);
    784    return ops[size];
    785 }
    786 
    787 static IRExpr* mkU ( IRType ty, ULong imm ) {
    788    switch (ty) {
    789       case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
    790       case Ity_I64: return mkU64(imm);
    791       default: vpanic("mkU");
    792    }
    793 }
    794 
    795 static IROp mkVecQDMULHIS ( UInt size ) {
    796    const IROp ops[4]
    797       = { Iop_INVALID, Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, Iop_INVALID };
    798    vassert(size < 4);
    799    return ops[size];
    800 }
    801 
    802 static IROp mkVecQRDMULHIS ( UInt size ) {
    803    const IROp ops[4]
    804       = { Iop_INVALID, Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, Iop_INVALID };
    805    vassert(size < 4);
    806    return ops[size];
    807 }
    808 
    809 static IROp mkVecQANDUQSH ( UInt size ) {
    810    const IROp ops[4]
    811       = { Iop_QandUQsh8x16, Iop_QandUQsh16x8,
    812           Iop_QandUQsh32x4, Iop_QandUQsh64x2 };
    813    vassert(size < 4);
    814    return ops[size];
    815 }
    816 
    817 static IROp mkVecQANDSQSH ( UInt size ) {
    818    const IROp ops[4]
    819       = { Iop_QandSQsh8x16, Iop_QandSQsh16x8,
    820           Iop_QandSQsh32x4, Iop_QandSQsh64x2 };
    821    vassert(size < 4);
    822    return ops[size];
    823 }
    824 
    825 static IROp mkVecQANDUQRSH ( UInt size ) {
    826    const IROp ops[4]
    827       = { Iop_QandUQRsh8x16, Iop_QandUQRsh16x8,
    828           Iop_QandUQRsh32x4, Iop_QandUQRsh64x2 };
    829    vassert(size < 4);
    830    return ops[size];
    831 }
    832 
    833 static IROp mkVecQANDSQRSH ( UInt size ) {
    834    const IROp ops[4]
    835       = { Iop_QandSQRsh8x16, Iop_QandSQRsh16x8,
    836           Iop_QandSQRsh32x4, Iop_QandSQRsh64x2 };
    837    vassert(size < 4);
    838    return ops[size];
    839 }
    840 
    841 static IROp mkVecSHU ( UInt size ) {
    842    const IROp ops[4]
    843       = { Iop_Sh8Ux16, Iop_Sh16Ux8, Iop_Sh32Ux4, Iop_Sh64Ux2 };
    844    vassert(size < 4);
    845    return ops[size];
    846 }
    847 
    848 static IROp mkVecSHS ( UInt size ) {
    849    const IROp ops[4]
    850       = { Iop_Sh8Sx16, Iop_Sh16Sx8, Iop_Sh32Sx4, Iop_Sh64Sx2 };
    851    vassert(size < 4);
    852    return ops[size];
    853 }
    854 
    855 static IROp mkVecRSHU ( UInt size ) {
    856    const IROp ops[4]
    857       = { Iop_Rsh8Ux16, Iop_Rsh16Ux8, Iop_Rsh32Ux4, Iop_Rsh64Ux2 };
    858    vassert(size < 4);
    859    return ops[size];
    860 }
    861 
    862 static IROp mkVecRSHS ( UInt size ) {
    863    const IROp ops[4]
    864       = { Iop_Rsh8Sx16, Iop_Rsh16Sx8, Iop_Rsh32Sx4, Iop_Rsh64Sx2 };
    865    vassert(size < 4);
    866    return ops[size];
    867 }
    868 
    869 static IROp mkVecNARROWUN ( UInt sizeNarrow ) {
    870    const IROp ops[4]
    871       = { Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4,
    872           Iop_NarrowUn64to32x2, Iop_INVALID };
    873    vassert(sizeNarrow < 4);
    874    return ops[sizeNarrow];
    875 }
    876 
    877 static IROp mkVecQNARROWUNSU ( UInt sizeNarrow ) {
    878    const IROp ops[4]
    879       = { Iop_QNarrowUn16Sto8Ux8,  Iop_QNarrowUn32Sto16Ux4,
    880           Iop_QNarrowUn64Sto32Ux2, Iop_INVALID };
    881    vassert(sizeNarrow < 4);
    882    return ops[sizeNarrow];
    883 }
    884 
    885 static IROp mkVecQNARROWUNSS ( UInt sizeNarrow ) {
    886    const IROp ops[4]
    887       = { Iop_QNarrowUn16Sto8Sx8,  Iop_QNarrowUn32Sto16Sx4,
    888           Iop_QNarrowUn64Sto32Sx2, Iop_INVALID };
    889    vassert(sizeNarrow < 4);
    890    return ops[sizeNarrow];
    891 }
    892 
    893 static IROp mkVecQNARROWUNUU ( UInt sizeNarrow ) {
    894    const IROp ops[4]
    895       = { Iop_QNarrowUn16Uto8Ux8,  Iop_QNarrowUn32Uto16Ux4,
    896           Iop_QNarrowUn64Uto32Ux2, Iop_INVALID };
    897    vassert(sizeNarrow < 4);
    898    return ops[sizeNarrow];
    899 }
    900 
    901 static IROp mkVecQANDqshrNNARROWUU ( UInt sizeNarrow ) {
    902    const IROp ops[4]
    903       = { Iop_QandQShrNnarrow16Uto8Ux8, Iop_QandQShrNnarrow32Uto16Ux4,
    904           Iop_QandQShrNnarrow64Uto32Ux2, Iop_INVALID };
    905    vassert(sizeNarrow < 4);
    906    return ops[sizeNarrow];
    907 }
    908 
    909 static IROp mkVecQANDqsarNNARROWSS ( UInt sizeNarrow ) {
    910    const IROp ops[4]
    911       = { Iop_QandQSarNnarrow16Sto8Sx8,  Iop_QandQSarNnarrow32Sto16Sx4,
    912           Iop_QandQSarNnarrow64Sto32Sx2, Iop_INVALID };
    913    vassert(sizeNarrow < 4);
    914    return ops[sizeNarrow];
    915 }
    916 
    917 static IROp mkVecQANDqsarNNARROWSU ( UInt sizeNarrow ) {
    918    const IROp ops[4]
    919       = { Iop_QandQSarNnarrow16Sto8Ux8,  Iop_QandQSarNnarrow32Sto16Ux4,
    920           Iop_QandQSarNnarrow64Sto32Ux2, Iop_INVALID };
    921    vassert(sizeNarrow < 4);
    922    return ops[sizeNarrow];
    923 }
    924 
    925 static IROp mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow ) {
    926    const IROp ops[4]
    927       = { Iop_QandQRShrNnarrow16Uto8Ux8,  Iop_QandQRShrNnarrow32Uto16Ux4,
    928           Iop_QandQRShrNnarrow64Uto32Ux2, Iop_INVALID };
    929    vassert(sizeNarrow < 4);
    930    return ops[sizeNarrow];
    931 }
    932 
    933 static IROp mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow ) {
    934    const IROp ops[4]
    935       = { Iop_QandQRSarNnarrow16Sto8Sx8,  Iop_QandQRSarNnarrow32Sto16Sx4,
    936           Iop_QandQRSarNnarrow64Sto32Sx2, Iop_INVALID };
    937    vassert(sizeNarrow < 4);
    938    return ops[sizeNarrow];
    939 }
    940 
    941 static IROp mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow ) {
    942    const IROp ops[4]
    943       = { Iop_QandQRSarNnarrow16Sto8Ux8,  Iop_QandQRSarNnarrow32Sto16Ux4,
    944           Iop_QandQRSarNnarrow64Sto32Ux2, Iop_INVALID };
    945    vassert(sizeNarrow < 4);
    946    return ops[sizeNarrow];
    947 }
    948 
    949 static IROp mkVecQSHLNSATUU ( UInt size ) {
    950    const IROp ops[4]
    951       = { Iop_QShlNsatUU8x16, Iop_QShlNsatUU16x8,
    952           Iop_QShlNsatUU32x4, Iop_QShlNsatUU64x2 };
    953    vassert(size < 4);
    954    return ops[size];
    955 }
    956 
    957 static IROp mkVecQSHLNSATSS ( UInt size ) {
    958    const IROp ops[4]
    959       = { Iop_QShlNsatSS8x16, Iop_QShlNsatSS16x8,
    960           Iop_QShlNsatSS32x4, Iop_QShlNsatSS64x2 };
    961    vassert(size < 4);
    962    return ops[size];
    963 }
    964 
    965 static IROp mkVecQSHLNSATSU ( UInt size ) {
    966    const IROp ops[4]
    967       = { Iop_QShlNsatSU8x16, Iop_QShlNsatSU16x8,
    968           Iop_QShlNsatSU32x4, Iop_QShlNsatSU64x2 };
    969    vassert(size < 4);
    970    return ops[size];
    971 }
    972 
    973 static IROp mkVecADDF ( UInt size ) {
    974    const IROp ops[4]
    975       = { Iop_INVALID, Iop_INVALID, Iop_Add32Fx4, Iop_Add64Fx2 };
    976    vassert(size < 4);
    977    return ops[size];
    978 }
    979 
    980 static IROp mkVecMAXF ( UInt size ) {
    981    const IROp ops[4]
    982       = { Iop_INVALID, Iop_INVALID, Iop_Max32Fx4, Iop_Max64Fx2 };
    983    vassert(size < 4);
    984    return ops[size];
    985 }
    986 
    987 static IROp mkVecMINF ( UInt size ) {
    988    const IROp ops[4]
    989       = { Iop_INVALID, Iop_INVALID, Iop_Min32Fx4, Iop_Min64Fx2 };
    990    vassert(size < 4);
    991    return ops[size];
    992 }
    993 
    994 /* Generate IR to create 'arg rotated right by imm', for sane values
    995    of 'ty' and 'imm'. */
    996 static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
    997 {
    998    UInt w = 0;
    999    if (ty == Ity_I64) {
   1000       w = 64;
   1001    } else {
   1002       vassert(ty == Ity_I32);
   1003       w = 32;
   1004    }
   1005    vassert(w != 0);
   1006    vassert(imm < w);
   1007    if (imm == 0) {
   1008       return arg;
   1009    }
   1010    IRTemp res = newTemp(ty);
   1011    assign(res, binop(mkOR(ty),
   1012                      binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
   1013                      binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
   1014    return res;
   1015 }
   1016 
   1017 /* Generate IR to set the returned temp to either all-zeroes or
   1018    all ones, as a copy of arg<imm>. */
   1019 static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
   1020 {
   1021    UInt w = 0;
   1022    if (ty == Ity_I64) {
   1023       w = 64;
   1024    } else {
   1025       vassert(ty == Ity_I32);
   1026       w = 32;
   1027    }
   1028    vassert(w != 0);
   1029    vassert(imm < w);
   1030    IRTemp res = newTemp(ty);
   1031    assign(res, binop(mkSAR(ty),
   1032                      binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
   1033                      mkU8(w - 1)));
   1034    return res;
   1035 }
   1036 
   1037 /* U-widen 8/16/32/64 bit int expr to 64. */
   1038 static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
   1039 {
   1040    switch (srcTy) {
   1041       case Ity_I64: return e;
   1042       case Ity_I32: return unop(Iop_32Uto64, e);
   1043       case Ity_I16: return unop(Iop_16Uto64, e);
   1044       case Ity_I8:  return unop(Iop_8Uto64, e);
   1045       default: vpanic("widenUto64(arm64)");
   1046    }
   1047 }
   1048 
   1049 /* Narrow 64 bit int expr to 8/16/32/64.  Clearly only some
   1050    of these combinations make sense. */
   1051 static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
   1052 {
   1053    switch (dstTy) {
   1054       case Ity_I64: return e;
   1055       case Ity_I32: return unop(Iop_64to32, e);
   1056       case Ity_I16: return unop(Iop_64to16, e);
   1057       case Ity_I8:  return unop(Iop_64to8, e);
   1058       default: vpanic("narrowFrom64(arm64)");
   1059    }
   1060 }
   1061 
   1062 
   1063 /*------------------------------------------------------------*/
   1064 /*--- Helpers for accessing guest registers.               ---*/
   1065 /*------------------------------------------------------------*/
   1066 
   1067 #define OFFB_X0       offsetof(VexGuestARM64State,guest_X0)
   1068 #define OFFB_X1       offsetof(VexGuestARM64State,guest_X1)
   1069 #define OFFB_X2       offsetof(VexGuestARM64State,guest_X2)
   1070 #define OFFB_X3       offsetof(VexGuestARM64State,guest_X3)
   1071 #define OFFB_X4       offsetof(VexGuestARM64State,guest_X4)
   1072 #define OFFB_X5       offsetof(VexGuestARM64State,guest_X5)
   1073 #define OFFB_X6       offsetof(VexGuestARM64State,guest_X6)
   1074 #define OFFB_X7       offsetof(VexGuestARM64State,guest_X7)
   1075 #define OFFB_X8       offsetof(VexGuestARM64State,guest_X8)
   1076 #define OFFB_X9       offsetof(VexGuestARM64State,guest_X9)
   1077 #define OFFB_X10      offsetof(VexGuestARM64State,guest_X10)
   1078 #define OFFB_X11      offsetof(VexGuestARM64State,guest_X11)
   1079 #define OFFB_X12      offsetof(VexGuestARM64State,guest_X12)
   1080 #define OFFB_X13      offsetof(VexGuestARM64State,guest_X13)
   1081 #define OFFB_X14      offsetof(VexGuestARM64State,guest_X14)
   1082 #define OFFB_X15      offsetof(VexGuestARM64State,guest_X15)
   1083 #define OFFB_X16      offsetof(VexGuestARM64State,guest_X16)
   1084 #define OFFB_X17      offsetof(VexGuestARM64State,guest_X17)
   1085 #define OFFB_X18      offsetof(VexGuestARM64State,guest_X18)
   1086 #define OFFB_X19      offsetof(VexGuestARM64State,guest_X19)
   1087 #define OFFB_X20      offsetof(VexGuestARM64State,guest_X20)
   1088 #define OFFB_X21      offsetof(VexGuestARM64State,guest_X21)
   1089 #define OFFB_X22      offsetof(VexGuestARM64State,guest_X22)
   1090 #define OFFB_X23      offsetof(VexGuestARM64State,guest_X23)
   1091 #define OFFB_X24      offsetof(VexGuestARM64State,guest_X24)
   1092 #define OFFB_X25      offsetof(VexGuestARM64State,guest_X25)
   1093 #define OFFB_X26      offsetof(VexGuestARM64State,guest_X26)
   1094 #define OFFB_X27      offsetof(VexGuestARM64State,guest_X27)
   1095 #define OFFB_X28      offsetof(VexGuestARM64State,guest_X28)
   1096 #define OFFB_X29      offsetof(VexGuestARM64State,guest_X29)
   1097 #define OFFB_X30      offsetof(VexGuestARM64State,guest_X30)
   1098 
   1099 #define OFFB_XSP      offsetof(VexGuestARM64State,guest_XSP)
   1100 #define OFFB_PC       offsetof(VexGuestARM64State,guest_PC)
   1101 
   1102 #define OFFB_CC_OP    offsetof(VexGuestARM64State,guest_CC_OP)
   1103 #define OFFB_CC_DEP1  offsetof(VexGuestARM64State,guest_CC_DEP1)
   1104 #define OFFB_CC_DEP2  offsetof(VexGuestARM64State,guest_CC_DEP2)
   1105 #define OFFB_CC_NDEP  offsetof(VexGuestARM64State,guest_CC_NDEP)
   1106 
   1107 #define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
   1108 #define OFFB_NRADDR   offsetof(VexGuestARM64State,guest_NRADDR)
   1109 
   1110 #define OFFB_Q0       offsetof(VexGuestARM64State,guest_Q0)
   1111 #define OFFB_Q1       offsetof(VexGuestARM64State,guest_Q1)
   1112 #define OFFB_Q2       offsetof(VexGuestARM64State,guest_Q2)
   1113 #define OFFB_Q3       offsetof(VexGuestARM64State,guest_Q3)
   1114 #define OFFB_Q4       offsetof(VexGuestARM64State,guest_Q4)
   1115 #define OFFB_Q5       offsetof(VexGuestARM64State,guest_Q5)
   1116 #define OFFB_Q6       offsetof(VexGuestARM64State,guest_Q6)
   1117 #define OFFB_Q7       offsetof(VexGuestARM64State,guest_Q7)
   1118 #define OFFB_Q8       offsetof(VexGuestARM64State,guest_Q8)
   1119 #define OFFB_Q9       offsetof(VexGuestARM64State,guest_Q9)
   1120 #define OFFB_Q10      offsetof(VexGuestARM64State,guest_Q10)
   1121 #define OFFB_Q11      offsetof(VexGuestARM64State,guest_Q11)
   1122 #define OFFB_Q12      offsetof(VexGuestARM64State,guest_Q12)
   1123 #define OFFB_Q13      offsetof(VexGuestARM64State,guest_Q13)
   1124 #define OFFB_Q14      offsetof(VexGuestARM64State,guest_Q14)
   1125 #define OFFB_Q15      offsetof(VexGuestARM64State,guest_Q15)
   1126 #define OFFB_Q16      offsetof(VexGuestARM64State,guest_Q16)
   1127 #define OFFB_Q17      offsetof(VexGuestARM64State,guest_Q17)
   1128 #define OFFB_Q18      offsetof(VexGuestARM64State,guest_Q18)
   1129 #define OFFB_Q19      offsetof(VexGuestARM64State,guest_Q19)
   1130 #define OFFB_Q20      offsetof(VexGuestARM64State,guest_Q20)
   1131 #define OFFB_Q21      offsetof(VexGuestARM64State,guest_Q21)
   1132 #define OFFB_Q22      offsetof(VexGuestARM64State,guest_Q22)
   1133 #define OFFB_Q23      offsetof(VexGuestARM64State,guest_Q23)
   1134 #define OFFB_Q24      offsetof(VexGuestARM64State,guest_Q24)
   1135 #define OFFB_Q25      offsetof(VexGuestARM64State,guest_Q25)
   1136 #define OFFB_Q26      offsetof(VexGuestARM64State,guest_Q26)
   1137 #define OFFB_Q27      offsetof(VexGuestARM64State,guest_Q27)
   1138 #define OFFB_Q28      offsetof(VexGuestARM64State,guest_Q28)
   1139 #define OFFB_Q29      offsetof(VexGuestARM64State,guest_Q29)
   1140 #define OFFB_Q30      offsetof(VexGuestARM64State,guest_Q30)
   1141 #define OFFB_Q31      offsetof(VexGuestARM64State,guest_Q31)
   1142 
   1143 #define OFFB_FPCR     offsetof(VexGuestARM64State,guest_FPCR)
   1144 #define OFFB_QCFLAG   offsetof(VexGuestARM64State,guest_QCFLAG)
   1145 
   1146 #define OFFB_CMSTART  offsetof(VexGuestARM64State,guest_CMSTART)
   1147 #define OFFB_CMLEN    offsetof(VexGuestARM64State,guest_CMLEN)
   1148 
   1149 
   1150 /* ---------------- Integer registers ---------------- */
   1151 
   1152 static Int offsetIReg64 ( UInt iregNo )
   1153 {
   1154    /* Do we care about endianness here?  We do if sub-parts of integer
   1155       registers are accessed. */
   1156    switch (iregNo) {
   1157       case 0:  return OFFB_X0;
   1158       case 1:  return OFFB_X1;
   1159       case 2:  return OFFB_X2;
   1160       case 3:  return OFFB_X3;
   1161       case 4:  return OFFB_X4;
   1162       case 5:  return OFFB_X5;
   1163       case 6:  return OFFB_X6;
   1164       case 7:  return OFFB_X7;
   1165       case 8:  return OFFB_X8;
   1166       case 9:  return OFFB_X9;
   1167       case 10: return OFFB_X10;
   1168       case 11: return OFFB_X11;
   1169       case 12: return OFFB_X12;
   1170       case 13: return OFFB_X13;
   1171       case 14: return OFFB_X14;
   1172       case 15: return OFFB_X15;
   1173       case 16: return OFFB_X16;
   1174       case 17: return OFFB_X17;
   1175       case 18: return OFFB_X18;
   1176       case 19: return OFFB_X19;
   1177       case 20: return OFFB_X20;
   1178       case 21: return OFFB_X21;
   1179       case 22: return OFFB_X22;
   1180       case 23: return OFFB_X23;
   1181       case 24: return OFFB_X24;
   1182       case 25: return OFFB_X25;
   1183       case 26: return OFFB_X26;
   1184       case 27: return OFFB_X27;
   1185       case 28: return OFFB_X28;
   1186       case 29: return OFFB_X29;
   1187       case 30: return OFFB_X30;
   1188       /* but not 31 */
   1189       default: vassert(0);
   1190    }
   1191 }
   1192 
   1193 static Int offsetIReg64orSP ( UInt iregNo )
   1194 {
   1195    return iregNo == 31  ? OFFB_XSP  : offsetIReg64(iregNo);
   1196 }
   1197 
   1198 static const HChar* nameIReg64orZR ( UInt iregNo )
   1199 {
   1200    vassert(iregNo < 32);
   1201    static const HChar* names[32]
   1202       = { "x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",
   1203           "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
   1204           "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
   1205           "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
   1206    return names[iregNo];
   1207 }
   1208 
   1209 static const HChar* nameIReg64orSP ( UInt iregNo )
   1210 {
   1211    if (iregNo == 31) {
   1212       return "sp";
   1213    }
   1214    vassert(iregNo < 31);
   1215    return nameIReg64orZR(iregNo);
   1216 }
   1217 
   1218 static IRExpr* getIReg64orSP ( UInt iregNo )
   1219 {
   1220    vassert(iregNo < 32);
   1221    return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
   1222 }
   1223 
   1224 static IRExpr* getIReg64orZR ( UInt iregNo )
   1225 {
   1226    if (iregNo == 31) {
   1227       return mkU64(0);
   1228    }
   1229    vassert(iregNo < 31);
   1230    return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
   1231 }
   1232 
   1233 static void putIReg64orSP ( UInt iregNo, IRExpr* e )
   1234 {
   1235    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
   1236    stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
   1237 }
   1238 
   1239 static void putIReg64orZR ( UInt iregNo, IRExpr* e )
   1240 {
   1241    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
   1242    if (iregNo == 31) {
   1243       return;
   1244    }
   1245    vassert(iregNo < 31);
   1246    stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
   1247 }
   1248 
   1249 static const HChar* nameIReg32orZR ( UInt iregNo )
   1250 {
   1251    vassert(iregNo < 32);
   1252    static const HChar* names[32]
   1253       = { "w0",  "w1",  "w2",  "w3",  "w4",  "w5",  "w6",  "w7",
   1254           "w8",  "w9",  "w10", "w11", "w12", "w13", "w14", "w15",
   1255           "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
   1256           "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
   1257    return names[iregNo];
   1258 }
   1259 
   1260 static const HChar* nameIReg32orSP ( UInt iregNo )
   1261 {
   1262    if (iregNo == 31) {
   1263       return "wsp";
   1264    }
   1265    vassert(iregNo < 31);
   1266    return nameIReg32orZR(iregNo);
   1267 }
   1268 
   1269 static IRExpr* getIReg32orSP ( UInt iregNo )
   1270 {
   1271    vassert(iregNo < 32);
   1272    return unop(Iop_64to32,
   1273                IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
   1274 }
   1275 
   1276 static IRExpr* getIReg32orZR ( UInt iregNo )
   1277 {
   1278    if (iregNo == 31) {
   1279       return mkU32(0);
   1280    }
   1281    vassert(iregNo < 31);
   1282    return unop(Iop_64to32,
   1283                IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
   1284 }
   1285 
   1286 static void putIReg32orSP ( UInt iregNo, IRExpr* e )
   1287 {
   1288    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
   1289    stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
   1290 }
   1291 
   1292 static void putIReg32orZR ( UInt iregNo, IRExpr* e )
   1293 {
   1294    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
   1295    if (iregNo == 31) {
   1296       return;
   1297    }
   1298    vassert(iregNo < 31);
   1299    stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
   1300 }
   1301 
   1302 static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
   1303 {
   1304    vassert(is64 == True || is64 == False);
   1305    return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
   1306 }
   1307 
   1308 static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
   1309 {
   1310    vassert(is64 == True || is64 == False);
   1311    return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
   1312 }
   1313 
   1314 static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
   1315 {
   1316    vassert(is64 == True || is64 == False);
   1317    return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
   1318 }
   1319 
   1320 static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
   1321 {
   1322    vassert(is64 == True || is64 == False);
   1323    if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
   1324 }
   1325 
   1326 static void putPC ( IRExpr* e )
   1327 {
   1328    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
   1329    stmt( IRStmt_Put(OFFB_PC, e) );
   1330 }
   1331 
   1332 
   1333 /* ---------------- Vector (Q) registers ---------------- */
   1334 
   1335 static Int offsetQReg128 ( UInt qregNo )
   1336 {
   1337    /* We don't care about endianness at this point.  It only becomes
   1338       relevant when dealing with sections of these registers.*/
   1339    switch (qregNo) {
   1340       case 0:  return OFFB_Q0;
   1341       case 1:  return OFFB_Q1;
   1342       case 2:  return OFFB_Q2;
   1343       case 3:  return OFFB_Q3;
   1344       case 4:  return OFFB_Q4;
   1345       case 5:  return OFFB_Q5;
   1346       case 6:  return OFFB_Q6;
   1347       case 7:  return OFFB_Q7;
   1348       case 8:  return OFFB_Q8;
   1349       case 9:  return OFFB_Q9;
   1350       case 10: return OFFB_Q10;
   1351       case 11: return OFFB_Q11;
   1352       case 12: return OFFB_Q12;
   1353       case 13: return OFFB_Q13;
   1354       case 14: return OFFB_Q14;
   1355       case 15: return OFFB_Q15;
   1356       case 16: return OFFB_Q16;
   1357       case 17: return OFFB_Q17;
   1358       case 18: return OFFB_Q18;
   1359       case 19: return OFFB_Q19;
   1360       case 20: return OFFB_Q20;
   1361       case 21: return OFFB_Q21;
   1362       case 22: return OFFB_Q22;
   1363       case 23: return OFFB_Q23;
   1364       case 24: return OFFB_Q24;
   1365       case 25: return OFFB_Q25;
   1366       case 26: return OFFB_Q26;
   1367       case 27: return OFFB_Q27;
   1368       case 28: return OFFB_Q28;
   1369       case 29: return OFFB_Q29;
   1370       case 30: return OFFB_Q30;
   1371       case 31: return OFFB_Q31;
   1372       default: vassert(0);
   1373    }
   1374 }
   1375 
   1376 /* Write to a complete Qreg. */
   1377 static void putQReg128 ( UInt qregNo, IRExpr* e )
   1378 {
   1379    vassert(qregNo < 32);
   1380    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
   1381    stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
   1382 }
   1383 
   1384 /* Read a complete Qreg. */
   1385 static IRExpr* getQReg128 ( UInt qregNo )
   1386 {
   1387    vassert(qregNo < 32);
   1388    return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
   1389 }
   1390 
   1391 /* Produce the IR type for some sub-part of a vector.  For 32- and 64-
   1392    bit sub-parts we can choose either integer or float types, and
   1393    choose float on the basis that that is the common use case and so
   1394    will give least interference with Put-to-Get forwarding later
   1395    on. */
   1396 static IRType preferredVectorSubTypeFromSize ( UInt szB )
   1397 {
   1398    switch (szB) {
   1399       case 1:  return Ity_I8;
   1400       case 2:  return Ity_I16;
   1401       case 4:  return Ity_I32; //Ity_F32;
   1402       case 8:  return Ity_F64;
   1403       case 16: return Ity_V128;
   1404       default: vassert(0);
   1405    }
   1406 }
   1407 
   1408 /* Find the offset of the laneNo'th lane of type laneTy in the given
   1409    Qreg.  Since the host is little-endian, the least significant lane
   1410    has the lowest offset. */
   1411 static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
   1412 {
   1413    vassert(host_endness == VexEndnessLE);
   1414    Int base = offsetQReg128(qregNo);
   1415    /* Since the host is little-endian, the least significant lane
   1416       will be at the lowest address. */
   1417    /* Restrict this to known types, so as to avoid silently accepting
   1418       stupid types. */
   1419    UInt laneSzB = 0;
   1420    switch (laneTy) {
   1421       case Ity_I8:                 laneSzB = 1;  break;
   1422       case Ity_F16: case Ity_I16:  laneSzB = 2;  break;
   1423       case Ity_F32: case Ity_I32:  laneSzB = 4;  break;
   1424       case Ity_F64: case Ity_I64:  laneSzB = 8;  break;
   1425       case Ity_V128:               laneSzB = 16; break;
   1426       default: break;
   1427    }
   1428    vassert(laneSzB > 0);
   1429    UInt minOff = laneNo * laneSzB;
   1430    UInt maxOff = minOff + laneSzB - 1;
   1431    vassert(maxOff < 16);
   1432    return base + minOff;
   1433 }
   1434 
   1435 /* Put to the least significant lane of a Qreg. */
   1436 static void putQRegLO ( UInt qregNo, IRExpr* e )
   1437 {
   1438    IRType ty  = typeOfIRExpr(irsb->tyenv, e);
   1439    Int    off = offsetQRegLane(qregNo, ty, 0);
   1440    switch (ty) {
   1441       case Ity_I8:  case Ity_I16: case Ity_I32: case Ity_I64:
   1442       case Ity_F16: case Ity_F32: case Ity_F64: case Ity_V128:
   1443          break;
   1444       default:
   1445          vassert(0); // Other cases are probably invalid
   1446    }
   1447    stmt(IRStmt_Put(off, e));
   1448 }
   1449 
   1450 /* Get from the least significant lane of a Qreg. */
   1451 static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
   1452 {
   1453    Int off = offsetQRegLane(qregNo, ty, 0);
   1454    switch (ty) {
   1455       case Ity_I8:
   1456       case Ity_F16: case Ity_I16:
   1457       case Ity_I32: case Ity_I64:
   1458       case Ity_F32: case Ity_F64: case Ity_V128:
   1459          break;
   1460       default:
   1461          vassert(0); // Other cases are ATC
   1462    }
   1463    return IRExpr_Get(off, ty);
   1464 }
   1465 
   1466 static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
   1467 {
   1468    static const HChar* namesQ[32]
   1469       = { "q0",  "q1",  "q2",  "q3",  "q4",  "q5",  "q6",  "q7",
   1470           "q8",  "q9",  "q10", "q11", "q12", "q13", "q14", "q15",
   1471           "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
   1472           "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
   1473    static const HChar* namesD[32]
   1474       = { "d0",  "d1",  "d2",  "d3",  "d4",  "d5",  "d6",  "d7",
   1475           "d8",  "d9",  "d10", "d11", "d12", "d13", "d14", "d15",
   1476           "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
   1477           "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
   1478    static const HChar* namesS[32]
   1479       = { "s0",  "s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",
   1480           "s8",  "s9",  "s10", "s11", "s12", "s13", "s14", "s15",
   1481           "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
   1482           "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
   1483    static const HChar* namesH[32]
   1484       = { "h0",  "h1",  "h2",  "h3",  "h4",  "h5",  "h6",  "h7",
   1485           "h8",  "h9",  "h10", "h11", "h12", "h13", "h14", "h15",
   1486           "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
   1487           "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
   1488    static const HChar* namesB[32]
   1489       = { "b0",  "b1",  "b2",  "b3",  "b4",  "b5",  "b6",  "b7",
   1490           "b8",  "b9",  "b10", "b11", "b12", "b13", "b14", "b15",
   1491           "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
   1492           "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
   1493    vassert(qregNo < 32);
   1494    switch (sizeofIRType(laneTy)) {
   1495       case 1:  return namesB[qregNo];
   1496       case 2:  return namesH[qregNo];
   1497       case 4:  return namesS[qregNo];
   1498       case 8:  return namesD[qregNo];
   1499       case 16: return namesQ[qregNo];
   1500       default: vassert(0);
   1501    }
   1502    /*NOTREACHED*/
   1503 }
   1504 
   1505 static const HChar* nameQReg128 ( UInt qregNo )
   1506 {
   1507    return nameQRegLO(qregNo, Ity_V128);
   1508 }
   1509 
   1510 /* Find the offset of the most significant half (8 bytes) of the given
   1511    Qreg.  This requires knowing the endianness of the host. */
   1512 static Int offsetQRegHI64 ( UInt qregNo )
   1513 {
   1514    return offsetQRegLane(qregNo, Ity_I64, 1);
   1515 }
   1516 
   1517 static IRExpr* getQRegHI64 ( UInt qregNo )
   1518 {
   1519    return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
   1520 }
   1521 
   1522 static void putQRegHI64 ( UInt qregNo, IRExpr* e )
   1523 {
   1524    IRType ty  = typeOfIRExpr(irsb->tyenv, e);
   1525    Int    off = offsetQRegHI64(qregNo);
   1526    switch (ty) {
   1527       case Ity_I64: case Ity_F64:
   1528          break;
   1529       default:
   1530          vassert(0); // Other cases are plain wrong
   1531    }
   1532    stmt(IRStmt_Put(off, e));
   1533 }
   1534 
   1535 /* Put to a specified lane of a Qreg. */
   1536 static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
   1537 {
   1538    IRType laneTy  = typeOfIRExpr(irsb->tyenv, e);
   1539    Int    off     = offsetQRegLane(qregNo, laneTy, laneNo);
   1540    switch (laneTy) {
   1541       case Ity_F64: case Ity_I64:
   1542       case Ity_I32: case Ity_F32:
   1543       case Ity_I16: case Ity_F16:
   1544       case Ity_I8:
   1545          break;
   1546       default:
   1547          vassert(0); // Other cases are ATC
   1548    }
   1549    stmt(IRStmt_Put(off, e));
   1550 }
   1551 
   1552 /* Get from a specified lane of a Qreg. */
   1553 static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
   1554 {
   1555    Int off = offsetQRegLane(qregNo, laneTy, laneNo);
   1556    switch (laneTy) {
   1557       case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
   1558       case Ity_F64: case Ity_F32: case Ity_F16:
   1559          break;
   1560       default:
   1561          vassert(0); // Other cases are ATC
   1562    }
   1563    return IRExpr_Get(off, laneTy);
   1564 }
   1565 
   1566 
   1567 //ZZ /* ---------------- Misc registers ---------------- */
   1568 //ZZ
   1569 //ZZ static void putMiscReg32 ( UInt    gsoffset,
   1570 //ZZ                            IRExpr* e, /* :: Ity_I32 */
   1571 //ZZ                            IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
   1572 //ZZ {
   1573 //ZZ    switch (gsoffset) {
   1574 //ZZ       case OFFB_FPSCR:   break;
   1575 //ZZ       case OFFB_QFLAG32: break;
   1576 //ZZ       case OFFB_GEFLAG0: break;
   1577 //ZZ       case OFFB_GEFLAG1: break;
   1578 //ZZ       case OFFB_GEFLAG2: break;
   1579 //ZZ       case OFFB_GEFLAG3: break;
   1580 //ZZ       default: vassert(0); /* awaiting more cases */
   1581 //ZZ    }
   1582 //ZZ    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
   1583 //ZZ
   1584 //ZZ    if (guardT == IRTemp_INVALID) {
   1585 //ZZ       /* unconditional write */
   1586 //ZZ       stmt(IRStmt_Put(gsoffset, e));
   1587 //ZZ    } else {
   1588 //ZZ       stmt(IRStmt_Put(
   1589 //ZZ          gsoffset,
   1590 //ZZ          IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
   1591 //ZZ                      e, IRExpr_Get(gsoffset, Ity_I32) )
   1592 //ZZ       ));
   1593 //ZZ    }
   1594 //ZZ }
   1595 //ZZ
   1596 //ZZ static IRTemp get_ITSTATE ( void )
   1597 //ZZ {
   1598 //ZZ    ASSERT_IS_THUMB;
   1599 //ZZ    IRTemp t = newTemp(Ity_I32);
   1600 //ZZ    assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
   1601 //ZZ    return t;
   1602 //ZZ }
   1603 //ZZ
   1604 //ZZ static void put_ITSTATE ( IRTemp t )
   1605 //ZZ {
   1606 //ZZ    ASSERT_IS_THUMB;
   1607 //ZZ    stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
   1608 //ZZ }
   1609 //ZZ
   1610 //ZZ static IRTemp get_QFLAG32 ( void )
   1611 //ZZ {
   1612 //ZZ    IRTemp t = newTemp(Ity_I32);
   1613 //ZZ    assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
   1614 //ZZ    return t;
   1615 //ZZ }
   1616 //ZZ
   1617 //ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
   1618 //ZZ {
   1619 //ZZ    putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
   1620 //ZZ }
   1621 //ZZ
   1622 //ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
   1623 //ZZ    Status Register) to indicate that overflow or saturation occurred.
   1624 //ZZ    Nb: t must be zero to denote no saturation, and any nonzero
   1625 //ZZ    value to indicate saturation. */
   1626 //ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
   1627 //ZZ {
   1628 //ZZ    IRTemp old = get_QFLAG32();
   1629 //ZZ    IRTemp nyu = newTemp(Ity_I32);
   1630 //ZZ    assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
   1631 //ZZ    put_QFLAG32(nyu, condT);
   1632 //ZZ }
   1633 
   1634 
   1635 /* ---------------- FPCR stuff ---------------- */
   1636 
   1637 /* Generate IR to get hold of the rounding mode bits in FPCR, and
   1638    convert them to IR format.  Bind the final result to the
   1639    returned temp. */
   1640 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
   1641 {
   1642    /* The ARMvfp encoding for rounding mode bits is:
   1643          00  to nearest
   1644          01  to +infinity
   1645          10  to -infinity
   1646          11  to zero
   1647       We need to convert that to the IR encoding:
   1648          00  to nearest (the default)
   1649          10  to +infinity
   1650          01  to -infinity
   1651          11  to zero
   1652       Which can be done by swapping bits 0 and 1.
   1653       The rmode bits are at 23:22 in FPSCR.
   1654    */
   1655    IRTemp armEncd = newTemp(Ity_I32);
   1656    IRTemp swapped = newTemp(Ity_I32);
   1657    /* Fish FPCR[23:22] out, and slide to bottom.  Doesn't matter that
   1658       we don't zero out bits 24 and above, since the assignment to
   1659       'swapped' will mask them out anyway. */
   1660    assign(armEncd,
   1661           binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
   1662    /* Now swap them. */
   1663    assign(swapped,
   1664           binop(Iop_Or32,
   1665                 binop(Iop_And32,
   1666                       binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
   1667                       mkU32(2)),
   1668                 binop(Iop_And32,
   1669                       binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
   1670                       mkU32(1))
   1671          ));
   1672    return swapped;
   1673 }
   1674 
   1675 
   1676 /*------------------------------------------------------------*/
   1677 /*--- Helpers for flag handling and conditional insns      ---*/
   1678 /*------------------------------------------------------------*/
   1679 
   1680 static const HChar* nameARM64Condcode ( ARM64Condcode cond )
   1681 {
   1682    switch (cond) {
   1683       case ARM64CondEQ:  return "eq";
   1684       case ARM64CondNE:  return "ne";
   1685       case ARM64CondCS:  return "cs";  // or 'hs'
   1686       case ARM64CondCC:  return "cc";  // or 'lo'
   1687       case ARM64CondMI:  return "mi";
   1688       case ARM64CondPL:  return "pl";
   1689       case ARM64CondVS:  return "vs";
   1690       case ARM64CondVC:  return "vc";
   1691       case ARM64CondHI:  return "hi";
   1692       case ARM64CondLS:  return "ls";
   1693       case ARM64CondGE:  return "ge";
   1694       case ARM64CondLT:  return "lt";
   1695       case ARM64CondGT:  return "gt";
   1696       case ARM64CondLE:  return "le";
   1697       case ARM64CondAL:  return "al";
   1698       case ARM64CondNV:  return "nv";
   1699       default: vpanic("name_ARM64Condcode");
   1700    }
   1701 }
   1702 
   1703 /* and a handy shorthand for it */
   1704 static const HChar* nameCC ( ARM64Condcode cond ) {
   1705    return nameARM64Condcode(cond);
   1706 }
   1707 
   1708 
   1709 /* Build IR to calculate some particular condition from stored
   1710    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
   1711    Ity_I64, suitable for narrowing.  Although the return type is
   1712    Ity_I64, the returned value is either 0 or 1.  'cond' must be
   1713    :: Ity_I64 and must denote the condition to compute in
   1714    bits 7:4, and be zero everywhere else.
   1715 */
   1716 static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
   1717 {
   1718    vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
   1719    /* And 'cond' had better produce a value in which only bits 7:4 are
   1720       nonzero.  However, obviously we can't assert for that. */
   1721 
   1722    /* So what we're constructing for the first argument is
   1723       "(cond << 4) | stored-operation".
   1724       However, as per comments above, 'cond' must be supplied
   1725       pre-shifted to this function.
   1726 
   1727       This pairing scheme requires that the ARM64_CC_OP_ values all fit
   1728       in 4 bits.  Hence we are passing a (COND, OP) pair in the lowest
   1729       8 bits of the first argument. */
   1730    IRExpr** args
   1731       = mkIRExprVec_4(
   1732            binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
   1733            IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
   1734            IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
   1735            IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
   1736         );
   1737    IRExpr* call
   1738       = mkIRExprCCall(
   1739            Ity_I64,
   1740            0/*regparm*/,
   1741            "arm64g_calculate_condition", &arm64g_calculate_condition,
   1742            args
   1743         );
   1744 
   1745    /* Exclude the requested condition, OP and NDEP from definedness
   1746       checking.  We're only interested in DEP1 and DEP2. */
   1747    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1748    return call;
   1749 }
   1750 
   1751 
   1752 /* Build IR to calculate some particular condition from stored
   1753    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
   1754    Ity_I64, suitable for narrowing.  Although the return type is
   1755    Ity_I64, the returned value is either 0 or 1.
   1756 */
   1757 static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
   1758 {
   1759   /* First arg is "(cond << 4) | condition".  This requires that the
   1760      ARM64_CC_OP_ values all fit in 4 bits.  Hence we are passing a
   1761      (COND, OP) pair in the lowest 8 bits of the first argument. */
   1762    vassert(cond >= 0 && cond <= 15);
   1763    return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
   1764 }
   1765 
   1766 
   1767 /* Build IR to calculate just the carry flag from stored
   1768    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
   1769    Ity_I64. */
   1770 static IRExpr* mk_arm64g_calculate_flag_c ( void )
   1771 {
   1772    IRExpr** args
   1773       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I64),
   1774                        IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
   1775                        IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
   1776                        IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
   1777    IRExpr* call
   1778       = mkIRExprCCall(
   1779            Ity_I64,
   1780            0/*regparm*/,
   1781            "arm64g_calculate_flag_c", &arm64g_calculate_flag_c,
   1782            args
   1783         );
   1784    /* Exclude OP and NDEP from definedness checking.  We're only
   1785       interested in DEP1 and DEP2. */
   1786    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1787    return call;
   1788 }
   1789 
   1790 
   1791 //ZZ /* Build IR to calculate just the overflow flag from stored
   1792 //ZZ    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
   1793 //ZZ    Ity_I32. */
   1794 //ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
   1795 //ZZ {
   1796 //ZZ    IRExpr** args
   1797 //ZZ       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
   1798 //ZZ                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
   1799 //ZZ                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
   1800 //ZZ                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
   1801 //ZZ    IRExpr* call
   1802 //ZZ       = mkIRExprCCall(
   1803 //ZZ            Ity_I32,
   1804 //ZZ            0/*regparm*/,
   1805 //ZZ            "armg_calculate_flag_v", &armg_calculate_flag_v,
   1806 //ZZ            args
   1807 //ZZ         );
   1808 //ZZ    /* Exclude OP and NDEP from definedness checking.  We're only
   1809 //ZZ       interested in DEP1 and DEP2. */
   1810 //ZZ    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1811 //ZZ    return call;
   1812 //ZZ }
   1813 
   1814 
   1815 /* Build IR to calculate N Z C V in bits 31:28 of the
   1816    returned word. */
   1817 static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
   1818 {
   1819    IRExpr** args
   1820       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I64),
   1821                        IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
   1822                        IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
   1823                        IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
   1824    IRExpr* call
   1825       = mkIRExprCCall(
   1826            Ity_I64,
   1827            0/*regparm*/,
   1828            "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
   1829            args
   1830         );
   1831    /* Exclude OP and NDEP from definedness checking.  We're only
   1832       interested in DEP1 and DEP2. */
   1833    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
   1834    return call;
   1835 }
   1836 
   1837 
   1838 /* Build IR to set the flags thunk, in the most general case. */
   1839 static
   1840 void setFlags_D1_D2_ND ( UInt cc_op,
   1841                          IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
   1842 {
   1843    vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
   1844    vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
   1845    vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
   1846    vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
   1847    stmt( IRStmt_Put( OFFB_CC_OP,   mkU64(cc_op) ));
   1848    stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
   1849    stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
   1850    stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
   1851 }
   1852 
   1853 /* Build IR to set the flags thunk after ADD or SUB. */
   1854 static
   1855 void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
   1856 {
   1857    IRTemp argL64 = IRTemp_INVALID;
   1858    IRTemp argR64 = IRTemp_INVALID;
   1859    IRTemp z64    = newTemp(Ity_I64);
   1860    if (is64) {
   1861       argL64 = argL;
   1862       argR64 = argR;
   1863    } else {
   1864       argL64 = newTemp(Ity_I64);
   1865       argR64 = newTemp(Ity_I64);
   1866       assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
   1867       assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
   1868    }
   1869    assign(z64, mkU64(0));
   1870    UInt cc_op = ARM64G_CC_OP_NUMBER;
   1871    /**/ if ( isSUB &&  is64) { cc_op = ARM64G_CC_OP_SUB64; }
   1872    else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
   1873    else if (!isSUB &&  is64) { cc_op = ARM64G_CC_OP_ADD64; }
   1874    else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
   1875    else                      { vassert(0); }
   1876    setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
   1877 }
   1878 
   1879 /* Build IR to set the flags thunk after ADC or SBC. */
   1880 static
   1881 void setFlags_ADC_SBC ( Bool is64, Bool isSBC,
   1882                         IRTemp argL, IRTemp argR, IRTemp oldC )
   1883 {
   1884    IRTemp argL64 = IRTemp_INVALID;
   1885    IRTemp argR64 = IRTemp_INVALID;
   1886    IRTemp oldC64 = IRTemp_INVALID;
   1887    if (is64) {
   1888       argL64 = argL;
   1889       argR64 = argR;
   1890       oldC64 = oldC;
   1891    } else {
   1892       argL64 = newTemp(Ity_I64);
   1893       argR64 = newTemp(Ity_I64);
   1894       oldC64 = newTemp(Ity_I64);
   1895       assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
   1896       assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
   1897       assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC)));
   1898    }
   1899    UInt cc_op = ARM64G_CC_OP_NUMBER;
   1900    /**/ if ( isSBC &&  is64) { cc_op = ARM64G_CC_OP_SBC64; }
   1901    else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; }
   1902    else if (!isSBC &&  is64) { cc_op = ARM64G_CC_OP_ADC64; }
   1903    else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; }
   1904    else                      { vassert(0); }
   1905    setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64);
   1906 }
   1907 
   1908 /* Build IR to set the flags thunk after ADD or SUB, if the given
   1909    condition evaluates to True at run time.  If not, the flags are set
   1910    to the specified NZCV value. */
   1911 static
   1912 void setFlags_ADD_SUB_conditionally (
   1913         Bool is64, Bool isSUB,
   1914         IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
   1915      )
   1916 {
   1917    /* Generate IR as follows:
   1918         CC_OP   = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
   1919         CC_DEP1 = ITE(cond, argL64, nzcv << 28)
   1920         CC_DEP2 = ITE(cond, argR64, 0)
   1921         CC_NDEP = 0
   1922    */
   1923 
   1924    IRTemp z64 = newTemp(Ity_I64);
   1925    assign(z64, mkU64(0));
   1926 
   1927    /* Establish the operation and operands for the True case. */
   1928    IRTemp t_dep1 = IRTemp_INVALID;
   1929    IRTemp t_dep2 = IRTemp_INVALID;
   1930    UInt   t_op   = ARM64G_CC_OP_NUMBER;
   1931    /**/ if ( isSUB &&  is64) { t_op = ARM64G_CC_OP_SUB64; }
   1932    else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
   1933    else if (!isSUB &&  is64) { t_op = ARM64G_CC_OP_ADD64; }
   1934    else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
   1935    else                      { vassert(0); }
   1936    /* */
   1937    if (is64) {
   1938       t_dep1 = argL;
   1939       t_dep2 = argR;
   1940    } else {
   1941       t_dep1 = newTemp(Ity_I64);
   1942       t_dep2 = newTemp(Ity_I64);
   1943       assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
   1944       assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
   1945    }
   1946 
   1947    /* Establish the operation and operands for the False case. */
   1948    IRTemp f_dep1 = newTemp(Ity_I64);
   1949    IRTemp f_dep2 = z64;
   1950    UInt   f_op   = ARM64G_CC_OP_COPY;
   1951    assign(f_dep1, mkU64(nzcv << 28));
   1952 
   1953    /* Final thunk values */
   1954    IRTemp dep1 = newTemp(Ity_I64);
   1955    IRTemp dep2 = newTemp(Ity_I64);
   1956    IRTemp op   = newTemp(Ity_I64);
   1957 
   1958    assign(op,   IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
   1959    assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
   1960    assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
   1961 
   1962    /* finally .. */
   1963    stmt( IRStmt_Put( OFFB_CC_OP,   mkexpr(op) ));
   1964    stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
   1965    stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
   1966    stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
   1967 }
   1968 
   1969 /* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
   1970 static
   1971 void setFlags_LOGIC ( Bool is64, IRTemp res )
   1972 {
   1973    IRTemp res64 = IRTemp_INVALID;
   1974    IRTemp z64   = newTemp(Ity_I64);
   1975    UInt   cc_op = ARM64G_CC_OP_NUMBER;
   1976    if (is64) {
   1977       res64 = res;
   1978       cc_op = ARM64G_CC_OP_LOGIC64;
   1979    } else {
   1980       res64 = newTemp(Ity_I64);
   1981       assign(res64, unop(Iop_32Uto64, mkexpr(res)));
   1982       cc_op = ARM64G_CC_OP_LOGIC32;
   1983    }
   1984    assign(z64, mkU64(0));
   1985    setFlags_D1_D2_ND(cc_op, res64, z64, z64);
   1986 }
   1987 
   1988 /* Build IR to set the flags thunk to a given NZCV value.  NZCV is
   1989    located in bits 31:28 of the supplied value. */
   1990 static
   1991 void setFlags_COPY ( IRTemp nzcv_28x0 )
   1992 {
   1993    IRTemp z64 = newTemp(Ity_I64);
   1994    assign(z64, mkU64(0));
   1995    setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
   1996 }
   1997 
   1998 
   1999 //ZZ /* Minor variant of the above that sets NDEP to zero (if it
   2000 //ZZ    sets it at all) */
   2001 //ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
   2002 //ZZ                              IRTemp t_dep2,
   2003 //ZZ                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   2004 //ZZ {
   2005 //ZZ    IRTemp z32 = newTemp(Ity_I32);
   2006 //ZZ    assign( z32, mkU32(0) );
   2007 //ZZ    setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
   2008 //ZZ }
   2009 //ZZ
   2010 //ZZ
   2011 //ZZ /* Minor variant of the above that sets DEP2 to zero (if it
   2012 //ZZ    sets it at all) */
   2013 //ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
   2014 //ZZ                              IRTemp t_ndep,
   2015 //ZZ                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   2016 //ZZ {
   2017 //ZZ    IRTemp z32 = newTemp(Ity_I32);
   2018 //ZZ    assign( z32, mkU32(0) );
   2019 //ZZ    setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
   2020 //ZZ }
   2021 //ZZ
   2022 //ZZ
   2023 //ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
   2024 //ZZ    sets them at all) */
   2025 //ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
   2026 //ZZ                           IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
   2027 //ZZ {
   2028 //ZZ    IRTemp z32 = newTemp(Ity_I32);
   2029 //ZZ    assign( z32, mkU32(0) );
   2030 //ZZ    setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
   2031 //ZZ }
   2032 
   2033 
   2034 /*------------------------------------------------------------*/
   2035 /*--- Misc math helpers                                    ---*/
   2036 /*------------------------------------------------------------*/
   2037 
   2038 /* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
   2039 static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
   2040 {
   2041    IRTemp maskT = newTemp(Ity_I64);
   2042    IRTemp res   = newTemp(Ity_I64);
   2043    vassert(sh >= 1 && sh <= 63);
   2044    assign(maskT, mkU64(mask));
   2045    assign( res,
   2046            binop(Iop_Or64,
   2047                  binop(Iop_Shr64,
   2048                        binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
   2049                        mkU8(sh)),
   2050                  binop(Iop_And64,
   2051                        binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
   2052                        mkexpr(maskT))
   2053                  )
   2054            );
   2055    return res;
   2056 }
   2057 
   2058 /* Generates byte swaps within 32-bit lanes. */
   2059 static IRTemp math_UINTSWAP64 ( IRTemp src )
   2060 {
   2061    IRTemp res;
   2062    res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
   2063    res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
   2064    return res;
   2065 }
   2066 
   2067 /* Generates byte swaps within 16-bit lanes. */
   2068 static IRTemp math_USHORTSWAP64 ( IRTemp src )
   2069 {
   2070    IRTemp res;
   2071    res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
   2072    return res;
   2073 }
   2074 
   2075 /* Generates a 64-bit byte swap. */
   2076 static IRTemp math_BYTESWAP64 ( IRTemp src )
   2077 {
   2078    IRTemp res;
   2079    res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
   2080    res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
   2081    res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
   2082    return res;
   2083 }
   2084 
   2085 /* Generates a 64-bit bit swap. */
   2086 static IRTemp math_BITSWAP64 ( IRTemp src )
   2087 {
   2088    IRTemp res;
   2089    res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
   2090    res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
   2091    res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
   2092    return math_BYTESWAP64(res);
   2093 }
   2094 
   2095 /* Duplicates the bits at the bottom of the given word to fill the
   2096    whole word.  src :: Ity_I64 is assumed to have zeroes everywhere
   2097    except for the bottom bits. */
   2098 static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
   2099 {
   2100    if (srcTy == Ity_I8) {
   2101       IRTemp t16 = newTemp(Ity_I64);
   2102       assign(t16, binop(Iop_Or64, mkexpr(src),
   2103                                   binop(Iop_Shl64, mkexpr(src), mkU8(8))));
   2104       IRTemp t32 = newTemp(Ity_I64);
   2105       assign(t32, binop(Iop_Or64, mkexpr(t16),
   2106                                   binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
   2107       IRTemp t64 = newTemp(Ity_I64);
   2108       assign(t64, binop(Iop_Or64, mkexpr(t32),
   2109                                   binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
   2110       return t64;
   2111    }
   2112    if (srcTy == Ity_I16) {
   2113       IRTemp t32 = newTemp(Ity_I64);
   2114       assign(t32, binop(Iop_Or64, mkexpr(src),
   2115                                   binop(Iop_Shl64, mkexpr(src), mkU8(16))));
   2116       IRTemp t64 = newTemp(Ity_I64);
   2117       assign(t64, binop(Iop_Or64, mkexpr(t32),
   2118                                   binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
   2119       return t64;
   2120    }
   2121    if (srcTy == Ity_I32) {
   2122       IRTemp t64 = newTemp(Ity_I64);
   2123       assign(t64, binop(Iop_Or64, mkexpr(src),
   2124                                   binop(Iop_Shl64, mkexpr(src), mkU8(32))));
   2125       return t64;
   2126    }
   2127    if (srcTy == Ity_I64) {
   2128       return src;
   2129    }
   2130    vassert(0);
   2131 }
   2132 
   2133 
   2134 /* Duplicates the src element exactly so as to fill a V128 value. */
   2135 static IRTemp math_DUP_TO_V128 ( IRTemp src, IRType srcTy )
   2136 {
   2137    IRTemp res = newTempV128();
   2138    if (srcTy == Ity_F64) {
   2139       IRTemp i64 = newTemp(Ity_I64);
   2140       assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(src)));
   2141       assign(res, binop(Iop_64HLtoV128, mkexpr(i64), mkexpr(i64)));
   2142       return res;
   2143    }
   2144    if (srcTy == Ity_F32) {
   2145       IRTemp i64a = newTemp(Ity_I64);
   2146       assign(i64a, unop(Iop_32Uto64, unop(Iop_ReinterpF32asI32, mkexpr(src))));
   2147       IRTemp i64b = newTemp(Ity_I64);
   2148       assign(i64b, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(i64a), mkU8(32)),
   2149                                    mkexpr(i64a)));
   2150       assign(res, binop(Iop_64HLtoV128, mkexpr(i64b), mkexpr(i64b)));
   2151       return res;
   2152    }
   2153    if (srcTy == Ity_I64) {
   2154       assign(res, binop(Iop_64HLtoV128, mkexpr(src), mkexpr(src)));
   2155       return res;
   2156    }
   2157    if (srcTy == Ity_I32 || srcTy == Ity_I16 || srcTy == Ity_I8) {
   2158       IRTemp t1 = newTemp(Ity_I64);
   2159       assign(t1, widenUto64(srcTy, mkexpr(src)));
   2160       IRTemp t2 = math_DUP_TO_64(t1, srcTy);
   2161       assign(res, binop(Iop_64HLtoV128, mkexpr(t2), mkexpr(t2)));
   2162       return res;
   2163    }
   2164    vassert(0);
   2165 }
   2166 
   2167 
   2168 /* |fullWidth| is a full V128 width result.  Depending on bitQ,
   2169    zero out the upper half. */
   2170 static IRExpr* math_MAYBE_ZERO_HI64 ( UInt bitQ, IRTemp fullWidth )
   2171 {
   2172    if (bitQ == 1) return mkexpr(fullWidth);
   2173    if (bitQ == 0) return unop(Iop_ZeroHI64ofV128, mkexpr(fullWidth));
   2174    vassert(0);
   2175 }
   2176 
   2177 /* The same, but from an expression instead. */
   2178 static IRExpr* math_MAYBE_ZERO_HI64_fromE ( UInt bitQ, IRExpr* fullWidth )
   2179 {
   2180    IRTemp fullWidthT = newTempV128();
   2181    assign(fullWidthT, fullWidth);
   2182    return math_MAYBE_ZERO_HI64(bitQ, fullWidthT);
   2183 }
   2184 
   2185 
   2186 /*------------------------------------------------------------*/
   2187 /*--- FP comparison helpers                                ---*/
   2188 /*------------------------------------------------------------*/
   2189 
   2190 /* irRes :: Ity_I32 holds a floating point comparison result encoded
   2191    as an IRCmpF64Result.  Generate code to convert it to an
   2192    ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
   2193    Assign a new temp to hold that value, and return the temp. */
   2194 static
   2195 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
   2196 {
   2197    IRTemp ix       = newTemp(Ity_I64);
   2198    IRTemp termL    = newTemp(Ity_I64);
   2199    IRTemp termR    = newTemp(Ity_I64);
   2200    IRTemp nzcv     = newTemp(Ity_I64);
   2201    IRTemp irRes    = newTemp(Ity_I64);
   2202 
   2203    /* This is where the fun starts.  We have to convert 'irRes' from
   2204       an IR-convention return result (IRCmpF64Result) to an
   2205       ARM-encoded (N,Z,C,V) group.  The final result is in the bottom
   2206       4 bits of 'nzcv'. */
   2207    /* Map compare result from IR to ARM(nzcv) */
   2208    /*
   2209       FP cmp result | IR   | ARM(nzcv)
   2210       --------------------------------
   2211       UN              0x45   0011
   2212       LT              0x01   1000
   2213       GT              0x00   0010
   2214       EQ              0x40   0110
   2215    */
   2216    /* Now since you're probably wondering WTF ..
   2217 
   2218       ix fishes the useful bits out of the IR value, bits 6 and 0, and
   2219       places them side by side, giving a number which is 0, 1, 2 or 3.
   2220 
   2221       termL is a sequence cooked up by GNU superopt.  It converts ix
   2222          into an almost correct value NZCV value (incredibly), except
   2223          for the case of UN, where it produces 0100 instead of the
   2224          required 0011.
   2225 
   2226       termR is therefore a correction term, also computed from ix.  It
   2227          is 1 in the UN case and 0 for LT, GT and UN.  Hence, to get
   2228          the final correct value, we subtract termR from termL.
   2229 
   2230       Don't take my word for it.  There's a test program at the bottom
   2231       of guest_arm_toIR.c, to try this out with.
   2232    */
   2233    assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
   2234 
   2235    assign(
   2236       ix,
   2237       binop(Iop_Or64,
   2238             binop(Iop_And64,
   2239                   binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
   2240                   mkU64(3)),
   2241             binop(Iop_And64, mkexpr(irRes), mkU64(1))));
   2242 
   2243    assign(
   2244       termL,
   2245       binop(Iop_Add64,
   2246             binop(Iop_Shr64,
   2247                   binop(Iop_Sub64,
   2248                         binop(Iop_Shl64,
   2249                               binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
   2250                               mkU8(62)),
   2251                         mkU64(1)),
   2252                   mkU8(61)),
   2253             mkU64(1)));
   2254 
   2255    assign(
   2256       termR,
   2257       binop(Iop_And64,
   2258             binop(Iop_And64,
   2259                   mkexpr(ix),
   2260                   binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
   2261             mkU64(1)));
   2262 
   2263    assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
   2264    return nzcv;
   2265 }
   2266 
   2267 
   2268 /*------------------------------------------------------------*/
   2269 /*--- Data processing (immediate)                          ---*/
   2270 /*------------------------------------------------------------*/
   2271 
   2272 /* Helper functions for supporting "DecodeBitMasks" */
   2273 
   2274 static ULong dbm_ROR ( Int width, ULong x, Int rot )
   2275 {
   2276    vassert(width > 0 && width <= 64);
   2277    vassert(rot >= 0 && rot < width);
   2278    if (rot == 0) return x;
   2279    ULong res = x >> rot;
   2280    res |= (x << (width - rot));
   2281    if (width < 64)
   2282      res &= ((1ULL << width) - 1);
   2283    return res;
   2284 }
   2285 
   2286 static ULong dbm_RepTo64( Int esize, ULong x )
   2287 {
   2288    switch (esize) {
   2289       case 64:
   2290          return x;
   2291       case 32:
   2292          x &= 0xFFFFFFFF; x |= (x << 32);
   2293          return x;
   2294       case 16:
   2295          x &= 0xFFFF; x |= (x << 16); x |= (x << 32);
   2296          return x;
   2297       case 8:
   2298          x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32);
   2299          return x;
   2300       case 4:
   2301          x &= 0xF; x |= (x << 4); x |= (x << 8);
   2302          x |= (x << 16); x |= (x << 32);
   2303          return x;
   2304       case 2:
   2305          x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8);
   2306          x |= (x << 16); x |= (x << 32);
   2307          return x;
   2308       default:
   2309          break;
   2310    }
   2311    vpanic("dbm_RepTo64");
   2312    /*NOTREACHED*/
   2313    return 0;
   2314 }
   2315 
   2316 static Int dbm_highestSetBit ( ULong x )
   2317 {
   2318    Int i;
   2319    for (i = 63; i >= 0; i--) {
   2320       if (x & (1ULL << i))
   2321          return i;
   2322    }
   2323    vassert(x == 0);
   2324    return -1;
   2325 }
   2326 
   2327 static
   2328 Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask,
   2329                           ULong immN, ULong imms, ULong immr, Bool immediate,
   2330                           UInt M /*32 or 64*/)
   2331 {
   2332    vassert(immN < (1ULL << 1));
   2333    vassert(imms < (1ULL << 6));
   2334    vassert(immr < (1ULL << 6));
   2335    vassert(immediate == False || immediate == True);
   2336    vassert(M == 32 || M == 64);
   2337 
   2338    Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) );
   2339    if (len < 1) { /* printf("fail1\n"); */ return False; }
   2340    vassert(len <= 6);
   2341    vassert(M >= (1 << len));
   2342 
   2343    vassert(len >= 1 && len <= 6);
   2344    ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len);
   2345                   (1 << len) - 1;
   2346    vassert(levels >= 1 && levels <= 63);
   2347 
   2348    if (immediate && ((imms & levels) == levels)) {
   2349       /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
   2350       return False;
   2351    }
   2352 
   2353    ULong S = imms & levels;
   2354    ULong R = immr & levels;
   2355    Int   diff = S - R;
   2356    diff &= 63;
   2357    Int esize = 1 << len;
   2358    vassert(2 <= esize && esize <= 64);
   2359 
   2360    /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
   2361       same below with d.  S can be 63 in which case we have an out of
   2362       range and hence undefined shift. */
   2363    vassert(S >= 0 && S <= 63);
   2364    vassert(esize >= (S+1));
   2365    ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
   2366                   //(1ULL << (S+1)) - 1;
   2367                   ((1ULL << S) - 1) + (1ULL << S);
   2368 
   2369    Int d = // diff<len-1:0>
   2370            diff & ((1 << len)-1);
   2371    vassert(esize >= (d+1));
   2372    vassert(d >= 0 && d <= 63);
   2373 
   2374    ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
   2375                   //(1ULL << (d+1)) - 1;
   2376                   ((1ULL << d) - 1) + (1ULL << d);
   2377 
   2378    if (esize != 64) vassert(elem_s < (1ULL << esize));
   2379    if (esize != 64) vassert(elem_d < (1ULL << esize));
   2380 
   2381    if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
   2382    if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
   2383 
   2384    return True;
   2385 }
   2386 
   2387 
   2388 static
   2389 Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres,
   2390                                          UInt insn)
   2391 {
   2392 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   2393 
   2394    /* insn[28:23]
   2395       10000x PC-rel addressing
   2396       10001x Add/subtract (immediate)
   2397       100100 Logical (immediate)
   2398       100101 Move Wide (immediate)
   2399       100110 Bitfield
   2400       100111 Extract
   2401    */
   2402 
   2403    /* ------------------ ADD/SUB{,S} imm12 ------------------ */
   2404    if (INSN(28,24) == BITS5(1,0,0,0,1)) {
   2405       Bool is64   = INSN(31,31) == 1;
   2406       Bool isSub  = INSN(30,30) == 1;
   2407       Bool setCC  = INSN(29,29) == 1;
   2408       UInt sh     = INSN(23,22);
   2409       UInt uimm12 = INSN(21,10);
   2410       UInt nn     = INSN(9,5);
   2411       UInt dd     = INSN(4,0);
   2412       const HChar* nm = isSub ? "sub" : "add";
   2413       if (sh >= 2) {
   2414          /* Invalid; fall through */
   2415       } else {
   2416          vassert(sh <= 1);
   2417          uimm12 <<= (12 * sh);
   2418          if (is64) {
   2419             IRTemp argL  = newTemp(Ity_I64);
   2420             IRTemp argR  = newTemp(Ity_I64);
   2421             IRTemp res   = newTemp(Ity_I64);
   2422             assign(argL, getIReg64orSP(nn));
   2423             assign(argR, mkU64(uimm12));
   2424             assign(res,  binop(isSub ? Iop_Sub64 : Iop_Add64,
   2425                                mkexpr(argL), mkexpr(argR)));
   2426             if (setCC) {
   2427                putIReg64orZR(dd, mkexpr(res));
   2428                setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
   2429                DIP("%ss %s, %s, 0x%x\n",
   2430                    nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
   2431             } else {
   2432                putIReg64orSP(dd, mkexpr(res));
   2433                DIP("%s %s, %s, 0x%x\n",
   2434                    nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
   2435             }
   2436          } else {
   2437             IRTemp argL  = newTemp(Ity_I32);
   2438             IRTemp argR  = newTemp(Ity_I32);
   2439             IRTemp res   = newTemp(Ity_I32);
   2440             assign(argL, getIReg32orSP(nn));
   2441             assign(argR, mkU32(uimm12));
   2442             assign(res,  binop(isSub ? Iop_Sub32 : Iop_Add32,
   2443                                mkexpr(argL), mkexpr(argR)));
   2444             if (setCC) {
   2445                putIReg32orZR(dd, mkexpr(res));
   2446                setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR);
   2447                DIP("%ss %s, %s, 0x%x\n",
   2448                    nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
   2449             } else {
   2450                putIReg32orSP(dd, mkexpr(res));
   2451                DIP("%s %s, %s, 0x%x\n",
   2452                    nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
   2453             }
   2454          }
   2455          return True;
   2456       }
   2457    }
   2458 
   2459    /* -------------------- ADR/ADRP -------------------- */
   2460    if (INSN(28,24) == BITS5(1,0,0,0,0)) {
   2461       UInt  bP    = INSN(31,31);
   2462       UInt  immLo = INSN(30,29);
   2463       UInt  immHi = INSN(23,5);
   2464       UInt  rD    = INSN(4,0);
   2465       ULong uimm  = (immHi << 2) | immLo;
   2466       ULong simm  = sx_to_64(uimm, 21);
   2467       ULong val;
   2468       if (bP) {
   2469          val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
   2470       } else {
   2471          val = guest_PC_curr_instr + simm;
   2472       }
   2473       putIReg64orZR(rD, mkU64(val));
   2474       DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
   2475       return True;
   2476    }
   2477 
   2478    /* -------------------- LOGIC(imm) -------------------- */
   2479    if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
   2480       /* 31 30 28     22 21   15   9  4
   2481          sf op 100100 N  immr imms Rn Rd
   2482            op=00: AND  Rd|SP, Rn, #imm
   2483            op=01: ORR  Rd|SP, Rn, #imm
   2484            op=10: EOR  Rd|SP, Rn, #imm
   2485            op=11: ANDS Rd|ZR, Rn, #imm
   2486       */
   2487       Bool  is64 = INSN(31,31) == 1;
   2488       UInt  op   = INSN(30,29);
   2489       UInt  N    = INSN(22,22);
   2490       UInt  immR = INSN(21,16);
   2491       UInt  immS = INSN(15,10);
   2492       UInt  nn   = INSN(9,5);
   2493       UInt  dd   = INSN(4,0);
   2494       ULong imm  = 0;
   2495       Bool  ok;
   2496       if (N == 1 && !is64)
   2497          goto after_logic_imm; /* not allowed; fall through */
   2498       ok = dbm_DecodeBitMasks(&imm, NULL,
   2499                               N, immS, immR, True, is64 ? 64 : 32);
   2500       if (!ok)
   2501          goto after_logic_imm;
   2502 
   2503       const HChar* names[4] = { "and", "orr", "eor", "ands" };
   2504       const IROp   ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
   2505       const IROp   ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
   2506 
   2507       vassert(op < 4);
   2508       if (is64) {
   2509          IRExpr* argL = getIReg64orZR(nn);
   2510          IRExpr* argR = mkU64(imm);
   2511          IRTemp  res  = newTemp(Ity_I64);
   2512          assign(res, binop(ops64[op], argL, argR));
   2513          if (op < 3) {
   2514             putIReg64orSP(dd, mkexpr(res));
   2515             DIP("%s %s, %s, 0x%llx\n", names[op],
   2516                 nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
   2517          } else {
   2518             putIReg64orZR(dd, mkexpr(res));
   2519             setFlags_LOGIC(True/*is64*/, res);
   2520             DIP("%s %s, %s, 0x%llx\n", names[op],
   2521                 nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
   2522          }
   2523       } else {
   2524          IRExpr* argL = getIReg32orZR(nn);
   2525          IRExpr* argR = mkU32((UInt)imm);
   2526          IRTemp  res  = newTemp(Ity_I32);
   2527          assign(res, binop(ops32[op], argL, argR));
   2528          if (op < 3) {
   2529             putIReg32orSP(dd, mkexpr(res));
   2530             DIP("%s %s, %s, 0x%x\n", names[op],
   2531                 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
   2532          } else {
   2533             putIReg32orZR(dd, mkexpr(res));
   2534             setFlags_LOGIC(False/*!is64*/, res);
   2535             DIP("%s %s, %s, 0x%x\n", names[op],
   2536                 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
   2537          }
   2538       }
   2539       return True;
   2540    }
   2541    after_logic_imm:
   2542 
   2543    /* -------------------- MOV{Z,N,K} -------------------- */
   2544    if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
   2545       /* 31 30 28      22 20    4
   2546          |  |  |       |  |     |
   2547          sf 10 100 101 hw imm16 Rd   MOV(Z) Rd, (imm16 << (16*hw))
   2548          sf 00 100 101 hw imm16 Rd   MOV(N) Rd, ~(imm16 << (16*hw))
   2549          sf 11 100 101 hw imm16 Rd   MOV(K) Rd, (imm16 << (16*hw))
   2550       */
   2551       Bool is64   = INSN(31,31) == 1;
   2552       UInt subopc = INSN(30,29);
   2553       UInt hw     = INSN(22,21);
   2554       UInt imm16  = INSN(20,5);
   2555       UInt dd     = INSN(4,0);
   2556       if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) {
   2557          /* invalid; fall through */
   2558       } else {
   2559          ULong imm64 = ((ULong)imm16) << (16 * hw);
   2560          if (!is64)
   2561             vassert(imm64 < 0x100000000ULL);
   2562          switch (subopc) {
   2563             case BITS2(1,0): // MOVZ
   2564                putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
   2565                DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
   2566                break;
   2567             case BITS2(0,0): // MOVN
   2568                imm64 = ~imm64;
   2569                if (!is64)
   2570                   imm64 &= 0xFFFFFFFFULL;
   2571                putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
   2572                DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
   2573                break;
   2574             case BITS2(1,1): // MOVK
   2575                /* This is more complex.  We are inserting a slice into
   2576                   the destination register, so we need to have the old
   2577                   value of it. */
   2578                if (is64) {
   2579                   IRTemp old = newTemp(Ity_I64);
   2580                   assign(old, getIReg64orZR(dd));
   2581                   ULong mask = 0xFFFFULL << (16 * hw);
   2582                   IRExpr* res
   2583                      = binop(Iop_Or64,
   2584                              binop(Iop_And64, mkexpr(old), mkU64(~mask)),
   2585                              mkU64(imm64));
   2586                   putIReg64orZR(dd, res);
   2587                   DIP("movk %s, 0x%x, lsl %u\n",
   2588                       nameIReg64orZR(dd), imm16, 16*hw);
   2589                } else {
   2590                   IRTemp old = newTemp(Ity_I32);
   2591                   assign(old, getIReg32orZR(dd));
   2592                   vassert(hw <= 1);
   2593                   UInt mask = 0xFFFF << (16 * hw);
   2594                   IRExpr* res
   2595                      = binop(Iop_Or32,
   2596                              binop(Iop_And32, mkexpr(old), mkU32(~mask)),
   2597                              mkU32((UInt)imm64));
   2598                   putIReg32orZR(dd, res);
   2599                   DIP("movk %s, 0x%x, lsl %u\n",
   2600                       nameIReg32orZR(dd), imm16, 16*hw);
   2601                }
   2602                break;
   2603             default:
   2604                vassert(0);
   2605          }
   2606          return True;
   2607       }
   2608    }
   2609 
   2610    /* -------------------- {U,S,}BFM -------------------- */
   2611    /*    30 28     22 21   15   9  4
   2612 
   2613       sf 10 100110 N  immr imms nn dd
   2614          UBFM Wd, Wn, #immr, #imms   when sf=0, N=0, immr[5]=0, imms[5]=0
   2615          UBFM Xd, Xn, #immr, #imms   when sf=1, N=1
   2616 
   2617       sf 00 100110 N  immr imms nn dd
   2618          SBFM Wd, Wn, #immr, #imms   when sf=0, N=0, immr[5]=0, imms[5]=0
   2619          SBFM Xd, Xn, #immr, #imms   when sf=1, N=1
   2620 
   2621       sf 01 100110 N  immr imms nn dd
   2622          BFM Wd, Wn, #immr, #imms   when sf=0, N=0, immr[5]=0, imms[5]=0
   2623          BFM Xd, Xn, #immr, #imms   when sf=1, N=1
   2624    */
   2625    if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
   2626       UInt sf     = INSN(31,31);
   2627       UInt opc    = INSN(30,29);
   2628       UInt N      = INSN(22,22);
   2629       UInt immR   = INSN(21,16);
   2630       UInt immS   = INSN(15,10);
   2631       UInt nn     = INSN(9,5);
   2632       UInt dd     = INSN(4,0);
   2633       Bool inZero = False;
   2634       Bool extend = False;
   2635       const HChar* nm = "???";
   2636       /* skip invalid combinations */
   2637       switch (opc) {
   2638          case BITS2(0,0):
   2639             inZero = True; extend = True; nm = "sbfm"; break;
   2640          case BITS2(0,1):
   2641             inZero = False; extend = False; nm = "bfm"; break;
   2642          case BITS2(1,0):
   2643             inZero = True; extend = False; nm = "ubfm"; break;
   2644          case BITS2(1,1):
   2645             goto after_bfm; /* invalid */
   2646          default:
   2647             vassert(0);
   2648       }
   2649       if (sf == 1 && N != 1) goto after_bfm;
   2650       if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0
   2651                              || ((immS >> 5) & 1) != 0)) goto after_bfm;
   2652       ULong wmask = 0, tmask = 0;
   2653       Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
   2654                                    N, immS, immR, False, sf == 1 ? 64 : 32);
   2655       if (!ok) goto after_bfm; /* hmmm */
   2656 
   2657       Bool   is64 = sf == 1;
   2658       IRType ty   = is64 ? Ity_I64 : Ity_I32;
   2659 
   2660       IRTemp dst = newTemp(ty);
   2661       IRTemp src = newTemp(ty);
   2662       IRTemp bot = newTemp(ty);
   2663       IRTemp top = newTemp(ty);
   2664       IRTemp res = newTemp(ty);
   2665       assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
   2666       assign(src, getIRegOrZR(is64, nn));
   2667       /* perform bitfield move on low bits */
   2668       assign(bot, binop(mkOR(ty),
   2669                         binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
   2670                         binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
   2671                                          mkU(ty, wmask))));
   2672       /* determine extension bits (sign, zero or dest register) */
   2673       assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
   2674       /* combine extension bits and result bits */
   2675       assign(res, binop(mkOR(ty),
   2676                         binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
   2677                         binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
   2678       putIRegOrZR(is64, dd, mkexpr(res));
   2679       DIP("%s %s, %s, immR=%u, immS=%u\n",
   2680           nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
   2681       return True;
   2682    }
   2683    after_bfm:
   2684 
   2685    /* ---------------------- EXTR ---------------------- */
   2686    /*   30 28     22 20 15   9 4
   2687       1 00 100111 10 m  imm6 n d  EXTR Xd, Xn, Xm, #imm6
   2688       0 00 100111 00 m  imm6 n d  EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
   2689    */
   2690    if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
   2691       Bool is64  = INSN(31,31) == 1;
   2692       UInt mm    = INSN(20,16);
   2693       UInt imm6  = INSN(15,10);
   2694       UInt nn    = INSN(9,5);
   2695       UInt dd    = INSN(4,0);
   2696       Bool valid = True;
   2697       if (INSN(31,31) != INSN(22,22))
   2698         valid = False;
   2699       if (!is64 && imm6 >= 32)
   2700         valid = False;
   2701       if (!valid) goto after_extr;
   2702       IRType ty    = is64 ? Ity_I64 : Ity_I32;
   2703       IRTemp srcHi = newTemp(ty);
   2704       IRTemp srcLo = newTemp(ty);
   2705       IRTemp res   = newTemp(ty);
   2706       assign(srcHi, getIRegOrZR(is64, nn));
   2707       assign(srcLo, getIRegOrZR(is64, mm));
   2708       if (imm6 == 0) {
   2709         assign(res, mkexpr(srcLo));
   2710       } else {
   2711         UInt szBits = 8 * sizeofIRType(ty);
   2712         vassert(imm6 > 0 && imm6 < szBits);
   2713         assign(res, binop(mkOR(ty),
   2714                           binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
   2715                           binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
   2716       }
   2717       putIRegOrZR(is64, dd, mkexpr(res));
   2718       DIP("extr %s, %s, %s, #%u\n",
   2719           nameIRegOrZR(is64,dd),
   2720           nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
   2721       return True;
   2722    }
   2723   after_extr:
   2724 
   2725    vex_printf("ARM64 front end: data_processing_immediate\n");
   2726    return False;
   2727 #  undef INSN
   2728 }
   2729 
   2730 
   2731 /*------------------------------------------------------------*/
   2732 /*--- Data processing (register) instructions              ---*/
   2733 /*------------------------------------------------------------*/
   2734 
   2735 static const HChar* nameSH ( UInt sh ) {
   2736    switch (sh) {
   2737       case 0: return "lsl";
   2738       case 1: return "lsr";
   2739       case 2: return "asr";
   2740       case 3: return "ror";
   2741       default: vassert(0);
   2742    }
   2743 }
   2744 
   2745 /* Generate IR to get a register value, possibly shifted by an
   2746    immediate.  Returns either a 32- or 64-bit temporary holding the
   2747    result.  After the shift, the value can optionally be NOT-ed
   2748    too.
   2749 
   2750    sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR.  sh_amt may only be
   2751    in the range 0 to (is64 ? 64 : 32)-1.  For some instructions, ROR
   2752    isn't allowed, but it's the job of the caller to check that.
   2753 */
   2754 static IRTemp getShiftedIRegOrZR ( Bool is64,
   2755                                    UInt sh_how, UInt sh_amt, UInt regNo,
   2756                                    Bool invert )
   2757 {
   2758    vassert(sh_how < 4);
   2759    vassert(sh_amt < (is64 ? 64 : 32));
   2760    IRType ty = is64 ? Ity_I64 : Ity_I32;
   2761    IRTemp t0 = newTemp(ty);
   2762    assign(t0, getIRegOrZR(is64, regNo));
   2763    IRTemp t1 = newTemp(ty);
   2764    switch (sh_how) {
   2765       case BITS2(0,0):
   2766          assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
   2767          break;
   2768       case BITS2(0,1):
   2769          assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
   2770          break;
   2771       case BITS2(1,0):
   2772          assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
   2773          break;
   2774       case BITS2(1,1):
   2775          assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
   2776          break;
   2777       default:
   2778          vassert(0);
   2779    }
   2780    if (invert) {
   2781       IRTemp t2 = newTemp(ty);
   2782       assign(t2, unop(mkNOT(ty), mkexpr(t1)));
   2783       return t2;
   2784    } else {
   2785       return t1;
   2786    }
   2787 }
   2788 
   2789 
   2790 static
   2791 Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres,
   2792                                         UInt insn)
   2793 {
   2794 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   2795 
   2796    /* ------------------- ADD/SUB(reg) ------------------- */
   2797    /* x==0 => 32 bit op      x==1 => 64 bit op
   2798       sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
   2799 
   2800       31 30 29 28    23 21 20 15   9  4
   2801       |  |  |  |     |  |  |  |    |  |
   2802       x  0  0  01011 sh 0  Rm imm6 Rn Rd   ADD  Rd,Rn, sh(Rm,imm6)
   2803       x  0  1  01011 sh 0  Rm imm6 Rn Rd   ADDS Rd,Rn, sh(Rm,imm6)
   2804       x  1  0  01011 sh 0  Rm imm6 Rn Rd   SUB  Rd,Rn, sh(Rm,imm6)
   2805       x  1  1  01011 sh 0  Rm imm6 Rn Rd   SUBS Rd,Rn, sh(Rm,imm6)
   2806    */
   2807    if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
   2808       UInt   bX    = INSN(31,31);
   2809       UInt   bOP   = INSN(30,30); /* 0: ADD, 1: SUB */
   2810       UInt   bS    = INSN(29, 29); /* set flags? */
   2811       UInt   sh    = INSN(23,22);
   2812       UInt   rM    = INSN(20,16);
   2813       UInt   imm6  = INSN(15,10);
   2814       UInt   rN    = INSN(9,5);
   2815       UInt   rD    = INSN(4,0);
   2816       Bool   isSUB = bOP == 1;
   2817       Bool   is64  = bX == 1;
   2818       IRType ty    = is64 ? Ity_I64 : Ity_I32;
   2819       if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) {
   2820          /* invalid; fall through */
   2821       } else {
   2822          IRTemp argL = newTemp(ty);
   2823          assign(argL, getIRegOrZR(is64, rN));
   2824          IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
   2825          IROp   op   = isSUB ? mkSUB(ty) : mkADD(ty);
   2826          IRTemp res  = newTemp(ty);
   2827          assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
   2828          if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
   2829          if (bS) {
   2830             setFlags_ADD_SUB(is64, isSUB, argL, argR);
   2831          }
   2832          DIP("%s%s %s, %s, %s, %s #%u\n",
   2833              bOP ? "sub" : "add", bS ? "s" : "",
   2834              nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
   2835              nameIRegOrZR(is64, rM), nameSH(sh), imm6);
   2836          return True;
   2837       }
   2838    }
   2839 
   2840    /* ------------------- ADC/SBC(reg) ------------------- */
   2841    /* x==0 => 32 bit op      x==1 => 64 bit op
   2842 
   2843       31 30 29 28    23 21 20 15     9  4
   2844       |  |  |  |     |  |  |  |      |  |
   2845       x  0  0  11010 00 0  Rm 000000 Rn Rd   ADC  Rd,Rn,Rm
   2846       x  0  1  11010 00 0  Rm 000000 Rn Rd   ADCS Rd,Rn,Rm
   2847       x  1  0  11010 00 0  Rm 000000 Rn Rd   SBC  Rd,Rn,Rm
   2848       x  1  1  11010 00 0  Rm 000000 Rn Rd   SBCS Rd,Rn,Rm
   2849    */
   2850 
   2851    if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) {
   2852       UInt   bX    = INSN(31,31);
   2853       UInt   bOP   = INSN(30,30); /* 0: ADC, 1: SBC */
   2854       UInt   bS    = INSN(29,29); /* set flags */
   2855       UInt   rM    = INSN(20,16);
   2856       UInt   rN    = INSN(9,5);
   2857       UInt   rD    = INSN(4,0);
   2858 
   2859       Bool   isSUB = bOP == 1;
   2860       Bool   is64  = bX == 1;
   2861       IRType ty    = is64 ? Ity_I64 : Ity_I32;
   2862 
   2863       IRTemp oldC = newTemp(ty);
   2864       assign(oldC,
   2865              is64 ? mk_arm64g_calculate_flag_c()
   2866                   : unop(Iop_64to32, mk_arm64g_calculate_flag_c()) );
   2867 
   2868       IRTemp argL = newTemp(ty);
   2869       assign(argL, getIRegOrZR(is64, rN));
   2870       IRTemp argR = newTemp(ty);
   2871       assign(argR, getIRegOrZR(is64, rM));
   2872 
   2873       IROp   op   = isSUB ? mkSUB(ty) : mkADD(ty);
   2874       IRTemp res  = newTemp(ty);
   2875       if (isSUB) {
   2876          IRExpr* one = is64 ? mkU64(1) : mkU32(1);
   2877          IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32;
   2878          assign(res,
   2879                 binop(op,
   2880                       binop(op, mkexpr(argL), mkexpr(argR)),
   2881                       binop(xorOp, mkexpr(oldC), one)));
   2882       } else {
   2883          assign(res,
   2884                 binop(op,
   2885                       binop(op, mkexpr(argL), mkexpr(argR)),
   2886                       mkexpr(oldC)));
   2887       }
   2888 
   2889       if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
   2890 
   2891       if (bS) {
   2892          setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC);
   2893       }
   2894 
   2895       DIP("%s%s %s, %s, %s\n",
   2896           bOP ? "sbc" : "adc", bS ? "s" : "",
   2897           nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
   2898           nameIRegOrZR(is64, rM));
   2899       return True;
   2900    }
   2901 
   2902    /* -------------------- LOGIC(reg) -------------------- */
   2903    /* x==0 => 32 bit op      x==1 => 64 bit op
   2904       N==0 => inv? is no-op (no inversion)
   2905       N==1 => inv? is NOT
   2906       sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
   2907 
   2908       31 30 28    23 21 20 15   9  4
   2909       |  |  |     |  |  |  |    |  |
   2910       x  00 01010 sh N  Rm imm6 Rn Rd  AND  Rd,Rn, inv?(sh(Rm,imm6))
   2911       x  01 01010 sh N  Rm imm6 Rn Rd  ORR  Rd,Rn, inv?(sh(Rm,imm6))
   2912       x  10 01010 sh N  Rm imm6 Rn Rd  EOR  Rd,Rn, inv?(sh(Rm,imm6))
   2913       x  11 01010 sh N  Rm imm6 Rn Rd  ANDS Rd,Rn, inv?(sh(Rm,imm6))
   2914       With N=1, the names are: BIC ORN EON BICS
   2915    */
   2916    if (INSN(28,24) == BITS5(0,1,0,1,0)) {
   2917       UInt   bX   = INSN(31,31);
   2918       UInt   sh   = INSN(23,22);
   2919       UInt   bN   = INSN(21,21);
   2920       UInt   rM   = INSN(20,16);
   2921       UInt   imm6 = INSN(15,10);
   2922       UInt   rN   = INSN(9,5);
   2923       UInt   rD   = INSN(4,0);
   2924       Bool   is64 = bX == 1;
   2925       IRType ty   = is64 ? Ity_I64 : Ity_I32;
   2926       if (!is64 && imm6 > 31) {
   2927          /* invalid; fall though */
   2928       } else {
   2929          IRTemp argL = newTemp(ty);
   2930          assign(argL, getIRegOrZR(is64, rN));
   2931          IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
   2932          IROp   op   = Iop_INVALID;
   2933          switch (INSN(30,29)) {
   2934             case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
   2935             case BITS2(0,1):                  op = mkOR(ty);  break;
   2936             case BITS2(1,0):                  op = mkXOR(ty); break;
   2937             default: vassert(0);
   2938          }
   2939          IRTemp res = newTemp(ty);
   2940          assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
   2941          if (INSN(30,29) == BITS2(1,1)) {
   2942             setFlags_LOGIC(is64, res);
   2943          }
   2944          putIRegOrZR(is64, rD, mkexpr(res));
   2945 
   2946          static const HChar* names_op[8]
   2947             = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
   2948          vassert(((bN << 2) | INSN(30,29)) < 8);
   2949          const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)];
   2950          /* Special-case the printing of "MOV" */
   2951          if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) {
   2952             DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
   2953                                 nameIRegOrZR(is64, rM));
   2954          } else {
   2955             DIP("%s %s, %s, %s, %s #%u\n", nm_op,
   2956                 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
   2957                 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
   2958          }
   2959          return True;
   2960       }
   2961    }
   2962 
   2963    /* -------------------- {U,S}MULH -------------------- */
   2964    /* 31       23 22 20 15     9   4
   2965       10011011 1  10 Rm 011111 Rn Rd   UMULH Xd,Xn,Xm
   2966       10011011 0  10 Rm 011111 Rn Rd   SMULH Xd,Xn,Xm
   2967    */
   2968    if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
   2969        && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
   2970       Bool isU = INSN(23,23) == 1;
   2971       UInt mm  = INSN(20,16);
   2972       UInt nn  = INSN(9,5);
   2973       UInt dd  = INSN(4,0);
   2974       putIReg64orZR(dd, unop(Iop_128HIto64,
   2975                              binop(isU ? Iop_MullU64 : Iop_MullS64,
   2976                                    getIReg64orZR(nn), getIReg64orZR(mm))));
   2977       DIP("%cmulh %s, %s, %s\n",
   2978           isU ? 'u' : 's',
   2979           nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
   2980       return True;
   2981    }
   2982 
   2983    /* -------------------- M{ADD,SUB} -------------------- */
   2984    /* 31 30           20 15 14 9 4
   2985       sf 00 11011 000 m  0  a  n r   MADD Rd,Rn,Rm,Ra  d = a+m*n
   2986       sf 00 11011 000 m  1  a  n r   MADD Rd,Rn,Rm,Ra  d = a-m*n
   2987    */
   2988    if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
   2989       Bool is64  = INSN(31,31) == 1;
   2990       UInt mm    = INSN(20,16);
   2991       Bool isAdd = INSN(15,15) == 0;
   2992       UInt aa    = INSN(14,10);
   2993       UInt nn    = INSN(9,5);
   2994       UInt dd    = INSN(4,0);
   2995       if (is64) {
   2996          putIReg64orZR(
   2997             dd,
   2998             binop(isAdd ? Iop_Add64 : Iop_Sub64,
   2999                   getIReg64orZR(aa),
   3000                   binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
   3001       } else {
   3002          putIReg32orZR(
   3003             dd,
   3004             binop(isAdd ? Iop_Add32 : Iop_Sub32,
   3005                   getIReg32orZR(aa),
   3006                   binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
   3007       }
   3008       DIP("%s %s, %s, %s, %s\n",
   3009           isAdd ? "madd" : "msub",
   3010           nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
   3011           nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
   3012       return True;
   3013    }
   3014 
   3015    /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
   3016    /* 31 30 28        20 15   11 9  4
   3017       sf 00 1101 0100 mm cond 00 nn dd   CSEL  Rd,Rn,Rm
   3018       sf 00 1101 0100 mm cond 01 nn dd   CSINC Rd,Rn,Rm
   3019       sf 10 1101 0100 mm cond 00 nn dd   CSINV Rd,Rn,Rm
   3020       sf 10 1101 0100 mm cond 01 nn dd   CSNEG Rd,Rn,Rm
   3021       In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
   3022    */
   3023    if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
   3024       Bool    is64 = INSN(31,31) == 1;
   3025       UInt    b30  = INSN(30,30);
   3026       UInt    mm   = INSN(20,16);
   3027       UInt    cond = INSN(15,12);
   3028       UInt    b10  = INSN(10,10);
   3029       UInt    nn   = INSN(9,5);
   3030       UInt    dd   = INSN(4,0);
   3031       UInt    op   = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */
   3032       IRType  ty   = is64 ? Ity_I64 : Ity_I32;
   3033       IRExpr* argL = getIRegOrZR(is64, nn);
   3034       IRExpr* argR = getIRegOrZR(is64, mm);
   3035       switch (op) {
   3036          case BITS2(0,0):
   3037             break;
   3038          case BITS2(0,1):
   3039             argR = binop(mkADD(ty), argR, mkU(ty,1));
   3040             break;
   3041          case BITS2(1,0):
   3042             argR = unop(mkNOT(ty), argR);
   3043             break;
   3044          case BITS2(1,1):
   3045             argR = binop(mkSUB(ty), mkU(ty,0), argR);
   3046             break;
   3047          default:
   3048             vassert(0);
   3049       }
   3050       putIRegOrZR(
   3051          is64, dd,
   3052          IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
   3053                     argL, argR)
   3054       );
   3055       const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
   3056       DIP("%s %s, %s, %s, %s\n", op_nm[op],
   3057           nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
   3058           nameIRegOrZR(is64, mm), nameCC(cond));
   3059       return True;
   3060    }
   3061 
   3062    /* -------------- ADD/SUB(extended reg) -------------- */
   3063    /*     28         20 15  12   9 4
   3064       000 01011 00 1 m  opt imm3 n d   ADD  Wd|SP, Wn|SP, Wm ext&lsld
   3065       100 01011 00 1 m  opt imm3 n d   ADD  Xd|SP, Xn|SP, Rm ext&lsld
   3066 
   3067       001 01011 00 1 m  opt imm3 n d   ADDS Wd,    Wn|SP, Wm ext&lsld
   3068       101 01011 00 1 m  opt imm3 n d   ADDS Xd,    Xn|SP, Rm ext&lsld
   3069 
   3070       010 01011 00 1 m  opt imm3 n d   SUB  Wd|SP, Wn|SP, Wm ext&lsld
   3071       110 01011 00 1 m  opt imm3 n d   SUB  Xd|SP, Xn|SP, Rm ext&lsld
   3072 
   3073       011 01011 00 1 m  opt imm3 n d   SUBS Wd,    Wn|SP, Wm ext&lsld
   3074       111 01011 00 1 m  opt imm3 n d   SUBS Xd,    Xn|SP, Rm ext&lsld
   3075 
   3076       The 'm' operand is extended per opt, thusly:
   3077 
   3078         000   Xm & 0xFF           UXTB
   3079         001   Xm & 0xFFFF         UXTH
   3080         010   Xm & (2^32)-1       UXTW
   3081         011   Xm                  UXTX
   3082 
   3083         100   Xm sx from bit 7    SXTB
   3084         101   Xm sx from bit 15   SXTH
   3085         110   Xm sx from bit 31   SXTW
   3086         111   Xm                  SXTX
   3087 
   3088       In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
   3089       operation on Xm.  In the 32 bit case, UXTW, UXTX, SXTW and SXTX
   3090       are the identity operation on Wm.
   3091 
   3092       After extension, the value is shifted left by imm3 bits, which
   3093       may only be in the range 0 .. 4 inclusive.
   3094    */
   3095    if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
   3096       Bool is64  = INSN(31,31) == 1;
   3097       Bool isSub = INSN(30,30) == 1;
   3098       Bool setCC = INSN(29,29) == 1;
   3099       UInt mm    = INSN(20,16);
   3100       UInt opt   = INSN(15,13);
   3101       UInt imm3  = INSN(12,10);
   3102       UInt nn    = INSN(9,5);
   3103       UInt dd    = INSN(4,0);
   3104       const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
   3105                                   "sxtb", "sxth", "sxtw", "sxtx" };
   3106       /* Do almost the same thing in the 32- and 64-bit cases. */
   3107       IRTemp xN = newTemp(Ity_I64);
   3108       IRTemp xM = newTemp(Ity_I64);
   3109       assign(xN, getIReg64orSP(nn));
   3110       assign(xM, getIReg64orZR(mm));
   3111       IRExpr* xMw  = mkexpr(xM); /* "xM widened" */
   3112       Int     shSX = 0;
   3113       /* widen Xm .. */
   3114       switch (opt) {
   3115          case BITS3(0,0,0): // UXTB
   3116             xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
   3117          case BITS3(0,0,1): // UXTH
   3118             xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
   3119          case BITS3(0,1,0): // UXTW -- noop for the 32bit case
   3120             if (is64) {
   3121                xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
   3122             }
   3123             break;
   3124          case BITS3(0,1,1): // UXTX -- always a noop
   3125             break;
   3126          case BITS3(1,0,0): // SXTB
   3127             shSX = 56; goto sxTo64;
   3128          case BITS3(1,0,1): // SXTH
   3129             shSX = 48; goto sxTo64;
   3130          case BITS3(1,1,0): // SXTW -- noop for the 32bit case
   3131             if (is64) {
   3132                shSX = 32; goto sxTo64;
   3133             }
   3134             break;
   3135          case BITS3(1,1,1): // SXTX -- always a noop
   3136             break;
   3137          sxTo64:
   3138             vassert(shSX >= 32);
   3139             xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
   3140                         mkU8(shSX));
   3141             break;
   3142          default:
   3143             vassert(0);
   3144       }
   3145       /* and now shift */
   3146       IRTemp argL = xN;
   3147       IRTemp argR = newTemp(Ity_I64);
   3148       assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
   3149       IRTemp res = newTemp(Ity_I64);
   3150       assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
   3151                         mkexpr(argL), mkexpr(argR)));
   3152       if (is64) {
   3153          if (setCC) {
   3154             putIReg64orZR(dd, mkexpr(res));
   3155             setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
   3156          } else {
   3157             putIReg64orSP(dd, mkexpr(res));
   3158          }
   3159       } else {
   3160          if (setCC) {
   3161             IRTemp argL32 = newTemp(Ity_I32);
   3162             IRTemp argR32 = newTemp(Ity_I32);
   3163             putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
   3164             assign(argL32, unop(Iop_64to32, mkexpr(argL)));
   3165             assign(argR32, unop(Iop_64to32, mkexpr(argR)));
   3166             setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32);
   3167          } else {
   3168             putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
   3169          }
   3170       }
   3171       DIP("%s%s %s, %s, %s %s lsl %u\n",
   3172           isSub ? "sub" : "add", setCC ? "s" : "",
   3173           setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
   3174           nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
   3175           nameExt[opt], imm3);
   3176       return True;
   3177    }
   3178 
   3179    /* ---------------- CCMP/CCMN(imm) ---------------- */
   3180    /* Bizarrely, these appear in the "data processing register"
   3181       category, even though they are operations against an
   3182       immediate. */
   3183    /* 31   29        20   15   11 9    3
   3184       sf 1 111010010 imm5 cond 10 Rn 0 nzcv   CCMP Rn, #imm5, #nzcv, cond
   3185       sf 0 111010010 imm5 cond 10 Rn 0 nzcv   CCMN Rn, #imm5, #nzcv, cond
   3186 
   3187       Operation is:
   3188          (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
   3189          (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
   3190    */
   3191    if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
   3192        && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
   3193       Bool is64  = INSN(31,31) == 1;
   3194       Bool isSUB = INSN(30,30) == 1;
   3195       UInt imm5  = INSN(20,16);
   3196       UInt cond  = INSN(15,12);
   3197       UInt nn    = INSN(9,5);
   3198       UInt nzcv  = INSN(3,0);
   3199 
   3200       IRTemp condT = newTemp(Ity_I1);
   3201       assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
   3202 
   3203       IRType ty   = is64 ? Ity_I64 : Ity_I32;
   3204       IRTemp argL = newTemp(ty);
   3205       IRTemp argR = newTemp(ty);
   3206 
   3207       if (is64) {
   3208          assign(argL, getIReg64orZR(nn));
   3209          assign(argR, mkU64(imm5));
   3210       } else {
   3211          assign(argL, getIReg32orZR(nn));
   3212          assign(argR, mkU32(imm5));
   3213       }
   3214       setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
   3215 
   3216       DIP("ccm%c %s, #%u, #%u, %s\n",
   3217           isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
   3218           imm5, nzcv, nameCC(cond));
   3219       return True;
   3220    }
   3221 
   3222    /* ---------------- CCMP/CCMN(reg) ---------------- */
   3223    /* 31   29        20 15   11 9    3
   3224       sf 1 111010010 Rm cond 00 Rn 0 nzcv   CCMP Rn, Rm, #nzcv, cond
   3225       sf 0 111010010 Rm cond 00 Rn 0 nzcv   CCMN Rn, Rm, #nzcv, cond
   3226       Operation is:
   3227          (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
   3228          (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
   3229    */
   3230    if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
   3231        && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
   3232       Bool is64  = INSN(31,31) == 1;
   3233       Bool isSUB = INSN(30,30) == 1;
   3234       UInt mm    = INSN(20,16);
   3235       UInt cond  = INSN(15,12);
   3236       UInt nn    = INSN(9,5);
   3237       UInt nzcv  = INSN(3,0);
   3238 
   3239       IRTemp condT = newTemp(Ity_I1);
   3240       assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
   3241 
   3242       IRType ty   = is64 ? Ity_I64 : Ity_I32;
   3243       IRTemp argL = newTemp(ty);
   3244       IRTemp argR = newTemp(ty);
   3245 
   3246       if (is64) {
   3247          assign(argL, getIReg64orZR(nn));
   3248          assign(argR, getIReg64orZR(mm));
   3249       } else {
   3250          assign(argL, getIReg32orZR(nn));
   3251          assign(argR, getIReg32orZR(mm));
   3252       }
   3253       setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
   3254 
   3255       DIP("ccm%c %s, %s, #%u, %s\n",
   3256           isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
   3257           nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
   3258       return True;
   3259    }
   3260 
   3261 
   3262    /* -------------- REV/REV16/REV32/RBIT -------------- */
   3263    /* 31 30 28       20    15   11 9 4
   3264 
   3265       1  10 11010110 00000 0000 11 n d    (1) REV   Xd, Xn
   3266       0  10 11010110 00000 0000 10 n d    (2) REV   Wd, Wn
   3267 
   3268       1  10 11010110 00000 0000 00 n d    (3) RBIT  Xd, Xn
   3269       0  10 11010110 00000 0000 00 n d    (4) RBIT  Wd, Wn
   3270 
   3271       1  10 11010110 00000 0000 01 n d    (5) REV16 Xd, Xn
   3272       0  10 11010110 00000 0000 01 n d    (6) REV16 Wd, Wn
   3273 
   3274       1  10 11010110 00000 0000 10 n d    (7) REV32 Xd, Xn
   3275    */
   3276    if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
   3277        && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
   3278       UInt b31 = INSN(31,31);
   3279       UInt opc = INSN(11,10);
   3280 
   3281       UInt ix = 0;
   3282       /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
   3283       else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
   3284       else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
   3285       else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
   3286       else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
   3287       else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
   3288       else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
   3289       if (ix >= 1 && ix <= 7) {
   3290          Bool   is64  = ix == 1 || ix == 3 || ix == 5 || ix == 7;
   3291          UInt   nn    = INSN(9,5);
   3292          UInt   dd    = INSN(4,0);
   3293          IRTemp src   = newTemp(Ity_I64);
   3294          IRTemp dst   = IRTemp_INVALID;
   3295          IRTemp (*math)(IRTemp) = NULL;
   3296          switch (ix) {
   3297             case 1: case 2: math = math_BYTESWAP64;   break;
   3298             case 3: case 4: math = math_BITSWAP64;    break;
   3299             case 5: case 6: math = math_USHORTSWAP64; break;
   3300             case 7:         math = math_UINTSWAP64;   break;
   3301             default: vassert(0);
   3302          }
   3303          const HChar* names[7]
   3304            = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
   3305          const HChar* nm = names[ix-1];
   3306          vassert(math);
   3307          if (ix == 6) {
   3308             /* This has to be special cased, since the logic below doesn't
   3309                handle it correctly. */
   3310             assign(src, getIReg64orZR(nn));
   3311             dst = math(src);
   3312             putIReg64orZR(dd,
   3313                           unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
   3314          } else if (is64) {
   3315             assign(src, getIReg64orZR(nn));
   3316             dst = math(src);
   3317             putIReg64orZR(dd, mkexpr(dst));
   3318          } else {
   3319             assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
   3320             dst = math(src);
   3321             putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
   3322          }
   3323          DIP("%s %s, %s\n", nm,
   3324              nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
   3325          return True;
   3326       }
   3327       /* else fall through */
   3328    }
   3329 
   3330    /* -------------------- CLZ/CLS -------------------- */
   3331    /*    30 28   24   20    15      9 4
   3332       sf 10 1101 0110 00000 00010 0 n d    CLZ Rd, Rn
   3333       sf 10 1101 0110 00000 00010 1 n d    CLS Rd, Rn
   3334    */
   3335    if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
   3336        && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
   3337       Bool   is64  = INSN(31,31) == 1;
   3338       Bool   isCLS = INSN(10,10) == 1;
   3339       UInt   nn    = INSN(9,5);
   3340       UInt   dd    = INSN(4,0);
   3341       IRTemp src   = newTemp(Ity_I64);
   3342       IRTemp srcZ  = newTemp(Ity_I64);
   3343       IRTemp dst   = newTemp(Ity_I64);
   3344       /* Get the argument, widened out to 64 bit */
   3345       if (is64) {
   3346          assign(src, getIReg64orZR(nn));
   3347       } else {
   3348          assign(src, binop(Iop_Shl64,
   3349                            unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
   3350       }
   3351       /* If this is CLS, mash the arg around accordingly */
   3352       if (isCLS) {
   3353          IRExpr* one = mkU8(1);
   3354          assign(srcZ,
   3355          binop(Iop_Xor64,
   3356                binop(Iop_Shl64, mkexpr(src), one),
   3357                binop(Iop_Shl64, binop(Iop_Shr64, mkexpr(src), one), one)));
   3358       } else {
   3359          assign(srcZ, mkexpr(src));
   3360       }
   3361       /* And compute CLZ. */
   3362       if (is64) {
   3363          assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
   3364                                 mkU64(isCLS ? 63 : 64),
   3365                                 unop(Iop_Clz64, mkexpr(srcZ))));
   3366          putIReg64orZR(dd, mkexpr(dst));
   3367       } else {
   3368          assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
   3369                                 mkU64(isCLS ? 31 : 32),
   3370                                 unop(Iop_Clz64, mkexpr(srcZ))));
   3371          putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
   3372       }
   3373       DIP("cl%c %s, %s\n", isCLS ? 's' : 'z',
   3374           nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
   3375       return True;
   3376    }
   3377 
   3378    /* ------------------ LSLV/LSRV/ASRV/RORV ------------------ */
   3379    /*    30 28        20 15   11 9 4
   3380       sf 00 1101 0110 m  0010 00 n d   LSLV Rd,Rn,Rm
   3381       sf 00 1101 0110 m  0010 01 n d   LSRV Rd,Rn,Rm
   3382       sf 00 1101 0110 m  0010 10 n d   ASRV Rd,Rn,Rm
   3383       sf 00 1101 0110 m  0010 11 n d   RORV Rd,Rn,Rm
   3384    */
   3385    if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
   3386        && INSN(15,12) == BITS4(0,0,1,0)) {
   3387       Bool   is64 = INSN(31,31) == 1;
   3388       UInt   mm   = INSN(20,16);
   3389       UInt   op   = INSN(11,10);
   3390       UInt   nn   = INSN(9,5);
   3391       UInt   dd   = INSN(4,0);
   3392       IRType ty   = is64 ? Ity_I64 : Ity_I32;
   3393       IRTemp srcL = newTemp(ty);
   3394       IRTemp srcR = newTemp(Ity_I64);
   3395       IRTemp res  = newTemp(ty);
   3396       IROp   iop  = Iop_INVALID;
   3397       assign(srcL, getIRegOrZR(is64, nn));
   3398       assign(srcR, binop(Iop_And64, getIReg64orZR(mm),
   3399                                     mkU64(is64 ? 63 : 31)));
   3400       if (op < 3) {
   3401          // LSLV, LSRV, ASRV
   3402          switch (op) {
   3403             case BITS2(0,0): iop = mkSHL(ty); break;
   3404             case BITS2(0,1): iop = mkSHR(ty); break;
   3405             case BITS2(1,0): iop = mkSAR(ty); break;
   3406             default: vassert(0);
   3407          }
   3408          assign(res, binop(iop, mkexpr(srcL),
   3409                                 unop(Iop_64to8, mkexpr(srcR))));
   3410       } else {
   3411          // RORV
   3412          IROp opSHL = mkSHL(ty);
   3413          IROp opSHR = mkSHR(ty);
   3414          IROp opOR  = mkOR(ty);
   3415          IRExpr* width = mkU64(is64 ? 64: 32);
   3416          assign(
   3417             res,
   3418             IRExpr_ITE(
   3419                binop(Iop_CmpEQ64, mkexpr(srcR), mkU64(0)),
   3420                mkexpr(srcL),
   3421                binop(opOR,
   3422                      binop(opSHL,
   3423                            mkexpr(srcL),
   3424                            unop(Iop_64to8, binop(Iop_Sub64, width,
   3425                                                             mkexpr(srcR)))),
   3426                      binop(opSHR,
   3427                            mkexpr(srcL), unop(Iop_64to8, mkexpr(srcR))))
   3428          ));
   3429       }
   3430       putIRegOrZR(is64, dd, mkexpr(res));
   3431       vassert(op < 4);
   3432       const HChar* names[4] = { "lslv", "lsrv", "asrv", "rorv" };
   3433       DIP("%s %s, %s, %s\n",
   3434           names[op], nameIRegOrZR(is64,dd),
   3435                      nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
   3436       return True;
   3437    }
   3438 
   3439    /* -------------------- SDIV/UDIV -------------------- */
   3440    /*    30 28        20 15    10 9 4
   3441       sf 00 1101 0110 m  00001  1 n d  SDIV Rd,Rn,Rm
   3442       sf 00 1101 0110 m  00001  0 n d  UDIV Rd,Rn,Rm
   3443    */
   3444    if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
   3445        && INSN(15,11) == BITS5(0,0,0,0,1)) {
   3446       Bool is64 = INSN(31,31) == 1;
   3447       UInt mm   = INSN(20,16);
   3448       Bool isS  = INSN(10,10) == 1;
   3449       UInt nn   = INSN(9,5);
   3450       UInt dd   = INSN(4,0);
   3451       if (isS) {
   3452          putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
   3453                                      getIRegOrZR(is64, nn),
   3454                                      getIRegOrZR(is64, mm)));
   3455       } else {
   3456          putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
   3457                                      getIRegOrZR(is64, nn),
   3458                                      getIRegOrZR(is64, mm)));
   3459       }
   3460       DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
   3461           nameIRegOrZR(is64, dd),
   3462           nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
   3463       return True;
   3464    }
   3465 
   3466    /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
   3467    /* 31        23  20 15 14 9 4
   3468       1001 1011 101 m  0  a  n d   UMADDL Xd,Wn,Wm,Xa
   3469       1001 1011 001 m  0  a  n d   SMADDL Xd,Wn,Wm,Xa
   3470       1001 1011 101 m  1  a  n d   UMSUBL Xd,Wn,Wm,Xa
   3471       1001 1011 001 m  1  a  n d   SMSUBL Xd,Wn,Wm,Xa
   3472       with operation
   3473          Xd = Xa +/- (Wn *u/s Wm)
   3474    */
   3475    if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
   3476       Bool   isU   = INSN(23,23) == 1;
   3477       UInt   mm    = INSN(20,16);
   3478       Bool   isAdd = INSN(15,15) == 0;
   3479       UInt   aa    = INSN(14,10);
   3480       UInt   nn    = INSN(9,5);
   3481       UInt   dd    = INSN(4,0);
   3482       IRTemp wN    = newTemp(Ity_I32);
   3483       IRTemp wM    = newTemp(Ity_I32);
   3484       IRTemp xA    = newTemp(Ity_I64);
   3485       IRTemp muld  = newTemp(Ity_I64);
   3486       IRTemp res   = newTemp(Ity_I64);
   3487       assign(wN, getIReg32orZR(nn));
   3488       assign(wM, getIReg32orZR(mm));
   3489       assign(xA, getIReg64orZR(aa));
   3490       assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
   3491                          mkexpr(wN), mkexpr(wM)));
   3492       assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
   3493                         mkexpr(xA), mkexpr(muld)));
   3494       putIReg64orZR(dd, mkexpr(res));
   3495       DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
   3496           nameIReg64orZR(dd), nameIReg32orZR(nn),
   3497           nameIReg32orZR(mm), nameIReg64orZR(aa));
   3498       return True;
   3499    }
   3500    vex_printf("ARM64 front end: data_processing_register\n");
   3501    return False;
   3502 #  undef INSN
   3503 }
   3504 
   3505 
   3506 /*------------------------------------------------------------*/
   3507 /*--- Math helpers for vector interleave/deinterleave      ---*/
   3508 /*------------------------------------------------------------*/
   3509 
   3510 #define EX(_tmp) \
   3511            mkexpr(_tmp)
   3512 #define SL(_hi128,_lo128,_nbytes) \
   3513            ( (_nbytes) == 0 \
   3514                 ? (_lo128) \
   3515                 : triop(Iop_SliceV128,(_hi128),(_lo128),mkU8(_nbytes)) )
   3516 #define ROR(_v128,_nbytes) \
   3517            SL((_v128),(_v128),(_nbytes))
   3518 #define ROL(_v128,_nbytes) \
   3519            SL((_v128),(_v128),16-(_nbytes))
   3520 #define SHR(_v128,_nbytes) \
   3521            binop(Iop_ShrV128,(_v128),mkU8(8*(_nbytes)))
   3522 #define SHL(_v128,_nbytes) \
   3523            binop(Iop_ShlV128,(_v128),mkU8(8*(_nbytes)))
   3524 #define ILO64x2(_argL,_argR) \
   3525            binop(Iop_InterleaveLO64x2,(_argL),(_argR))
   3526 #define IHI64x2(_argL,_argR) \
   3527            binop(Iop_InterleaveHI64x2,(_argL),(_argR))
   3528 #define ILO32x4(_argL,_argR) \
   3529            binop(Iop_InterleaveLO32x4,(_argL),(_argR))
   3530 #define IHI32x4(_argL,_argR) \
   3531            binop(Iop_InterleaveHI32x4,(_argL),(_argR))
   3532 #define ILO16x8(_argL,_argR) \
   3533            binop(Iop_InterleaveLO16x8,(_argL),(_argR))
   3534 #define IHI16x8(_argL,_argR) \
   3535            binop(Iop_InterleaveHI16x8,(_argL),(_argR))
   3536 #define ILO8x16(_argL,_argR) \
   3537            binop(Iop_InterleaveLO8x16,(_argL),(_argR))
   3538 #define IHI8x16(_argL,_argR) \
   3539            binop(Iop_InterleaveHI8x16,(_argL),(_argR))
   3540 #define CEV32x4(_argL,_argR) \
   3541            binop(Iop_CatEvenLanes32x4,(_argL),(_argR))
   3542 #define COD32x4(_argL,_argR) \
   3543            binop(Iop_CatOddLanes32x4,(_argL),(_argR))
   3544 #define COD16x8(_argL,_argR) \
   3545            binop(Iop_CatOddLanes16x8,(_argL),(_argR))
   3546 #define COD8x16(_argL,_argR) \
   3547            binop(Iop_CatOddLanes8x16,(_argL),(_argR))
   3548 #define CEV8x16(_argL,_argR) \
   3549            binop(Iop_CatEvenLanes8x16,(_argL),(_argR))
   3550 #define AND(_arg1,_arg2) \
   3551            binop(Iop_AndV128,(_arg1),(_arg2))
   3552 #define OR2(_arg1,_arg2) \
   3553            binop(Iop_OrV128,(_arg1),(_arg2))
   3554 #define OR3(_arg1,_arg2,_arg3) \
   3555            binop(Iop_OrV128,(_arg1),binop(Iop_OrV128,(_arg2),(_arg3)))
   3556 #define OR4(_arg1,_arg2,_arg3,_arg4) \
   3557            binop(Iop_OrV128, \
   3558                  binop(Iop_OrV128,(_arg1),(_arg2)), \
   3559                  binop(Iop_OrV128,(_arg3),(_arg4)))
   3560 
   3561 
   3562 /* Do interleaving for 1 128 bit vector, for ST1 insns. */
   3563 static
   3564 void math_INTERLEAVE1_128( /*OUTx1*/ IRTemp* i0,
   3565                            UInt laneSzBlg2, IRTemp u0 )
   3566 {
   3567    assign(*i0, mkexpr(u0));
   3568 }
   3569 
   3570 
   3571 /* Do interleaving for 2 128 bit vectors, for ST2 insns. */
   3572 static
   3573 void math_INTERLEAVE2_128( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
   3574                            UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
   3575 {
   3576    /* This is pretty easy, since we have primitives directly to
   3577       hand. */
   3578    if (laneSzBlg2 == 3) {
   3579       // 64x2
   3580       // u1 == B1 B0, u0 == A1 A0
   3581       // i1 == B1 A1, i0 == B0 A0
   3582       assign(*i0, binop(Iop_InterleaveLO64x2, mkexpr(u1), mkexpr(u0)));
   3583       assign(*i1, binop(Iop_InterleaveHI64x2, mkexpr(u1), mkexpr(u0)));
   3584       return;
   3585    }
   3586    if (laneSzBlg2 == 2) {
   3587       // 32x4
   3588       // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
   3589       // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
   3590       assign(*i0, binop(Iop_InterleaveLO32x4, mkexpr(u1), mkexpr(u0)));
   3591       assign(*i1, binop(Iop_InterleaveHI32x4, mkexpr(u1), mkexpr(u0)));
   3592       return;
   3593    }
   3594    if (laneSzBlg2 == 1) {
   3595       // 16x8
   3596       // u1 == B{7..0}, u0 == A{7..0}
   3597       // i0 == B3 A3 B2 A2 B1 A1 B0 A0
   3598       // i1 == B7 A7 B6 A6 B5 A5 B4 A4
   3599       assign(*i0, binop(Iop_InterleaveLO16x8, mkexpr(u1), mkexpr(u0)));
   3600       assign(*i1, binop(Iop_InterleaveHI16x8, mkexpr(u1), mkexpr(u0)));
   3601       return;
   3602    }
   3603    if (laneSzBlg2 == 0) {
   3604       // 8x16
   3605       // u1 == B{f..0}, u0 == A{f..0}
   3606       // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
   3607       // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
   3608       assign(*i0, binop(Iop_InterleaveLO8x16, mkexpr(u1), mkexpr(u0)));
   3609       assign(*i1, binop(Iop_InterleaveHI8x16, mkexpr(u1), mkexpr(u0)));
   3610       return;
   3611    }
   3612    /*NOTREACHED*/
   3613    vassert(0);
   3614 }
   3615 
   3616 
   3617 /* Do interleaving for 3 128 bit vectors, for ST3 insns. */
   3618 static
   3619 void math_INTERLEAVE3_128(
   3620         /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
   3621         UInt laneSzBlg2,
   3622         IRTemp u0, IRTemp u1, IRTemp u2 )
   3623 {
   3624    if (laneSzBlg2 == 3) {
   3625       // 64x2
   3626       // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
   3627       // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
   3628       assign(*i2, IHI64x2( EX(u2), EX(u1) ));
   3629       assign(*i1, ILO64x2( ROR(EX(u0),8), EX(u2) ));
   3630       assign(*i0, ILO64x2( EX(u1), EX(u0) ));
   3631       return;
   3632    }
   3633 
   3634    if (laneSzBlg2 == 2) {
   3635       // 32x4
   3636       // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
   3637       // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
   3638       // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
   3639       IRTemp p0    = newTempV128();
   3640       IRTemp p1    = newTempV128();
   3641       IRTemp p2    = newTempV128();
   3642       IRTemp c1100 = newTempV128();
   3643       IRTemp c0011 = newTempV128();
   3644       IRTemp c0110 = newTempV128();
   3645       assign(c1100, mkV128(0xFF00));
   3646       assign(c0011, mkV128(0x00FF));
   3647       assign(c0110, mkV128(0x0FF0));
   3648       // First interleave them at 64x2 granularity,
   3649       // generating partial ("p") values.
   3650       math_INTERLEAVE3_128(&p0, &p1, &p2, 3, u0, u1, u2);
   3651       // And more shuffling around for the final answer
   3652       assign(*i2, OR2( AND( IHI32x4(EX(p2), ROL(EX(p2),8)), EX(c1100) ),
   3653                        AND( IHI32x4(ROR(EX(p1),4), EX(p2)), EX(c0011) ) ));
   3654       assign(*i1, OR3( SHL(EX(p2),12),
   3655                        AND(EX(p1),EX(c0110)),
   3656                        SHR(EX(p0),12) ));
   3657       assign(*i0, OR2( AND( ILO32x4(EX(p0),ROL(EX(p1),4)), EX(c1100) ),
   3658                        AND( ILO32x4(ROR(EX(p0),8),EX(p0)), EX(c0011) ) ));
   3659       return;
   3660    }
   3661 
   3662    if (laneSzBlg2 == 1) {
   3663       // 16x8
   3664       // u2 == C7 C6 C5 C4 C3 C2 C1 C0
   3665       // u1 == B7 B6 B5 B4 B3 B2 B1 B0
   3666       // u0 == A7 A6 A5 A4 A3 A2 A1 A0
   3667       //
   3668       // p2 == C7 C6 B7 B6 A7 A6 C5 C4
   3669       // p1 == B5 B4 A5 A4 C3 C2 B3 B2
   3670       // p0 == A3 A2 C1 C0 B1 B0 A1 A0
   3671       //
   3672       // i2 == C7 B7 A7 C6 B6 A6 C5 B5
   3673       // i1 == A5 C4 B4 A4 C4 B3 A3 C2
   3674       // i0 == B2 A2 C1 B1 A1 C0 B0 A0
   3675       IRTemp p0    = newTempV128();
   3676       IRTemp p1    = newTempV128();
   3677       IRTemp p2    = newTempV128();
   3678       IRTemp c1000 = newTempV128();
   3679       IRTemp c0100 = newTempV128();
   3680       IRTemp c0010 = newTempV128();
   3681       IRTemp c0001 = newTempV128();
   3682       assign(c1000, mkV128(0xF000));
   3683       assign(c0100, mkV128(0x0F00));
   3684       assign(c0010, mkV128(0x00F0));
   3685       assign(c0001, mkV128(0x000F));
   3686       // First interleave them at 32x4 granularity,
   3687       // generating partial ("p") values.
   3688       math_INTERLEAVE3_128(&p0, &p1, &p2, 2, u0, u1, u2);
   3689       // And more shuffling around for the final answer
   3690       assign(*i2,
   3691              OR4( AND( IHI16x8( EX(p2),        ROL(EX(p2),4) ), EX(c1000) ),
   3692                   AND( IHI16x8( ROL(EX(p2),6), EX(p2)        ), EX(c0100) ),
   3693                   AND( IHI16x8( ROL(EX(p2),2), ROL(EX(p2),6) ), EX(c0010) ),
   3694                   AND( ILO16x8( ROR(EX(p2),2), ROL(EX(p1),2) ), EX(c0001) )
   3695       ));
   3696       assign(*i1,
   3697              OR4( AND( IHI16x8( ROL(EX(p1),4), ROR(EX(p2),2) ), EX(c1000) ),
   3698                   AND( IHI16x8( EX(p1),        ROL(EX(p1),4) ), EX(c0100) ),
   3699                   AND( IHI16x8( ROL(EX(p1),4), ROL(EX(p1),8) ), EX(c0010) ),
   3700                   AND( IHI16x8( ROR(EX(p0),6), ROL(EX(p1),4) ), EX(c0001) )
   3701       ));
   3702       assign(*i0,
   3703              OR4( AND( IHI16x8( ROR(EX(p1),2), ROL(EX(p0),2) ), EX(c1000) ),
   3704                   AND( IHI16x8( ROL(EX(p0),2), ROL(EX(p0),6) ), EX(c0100) ),
   3705                   AND( IHI16x8( ROL(EX(p0),8), ROL(EX(p0),2) ), EX(c0010) ),
   3706                   AND( IHI16x8( ROL(EX(p0),4), ROL(EX(p0),8) ), EX(c0001) )
   3707       ));
   3708       return;
   3709    }
   3710 
   3711    if (laneSzBlg2 == 0) {
   3712       // 8x16.  It doesn't seem worth the hassle of first doing a
   3713       // 16x8 interleave, so just generate all 24 partial results
   3714       // directly :-(
   3715       // u2 == Cf .. C0, u1 == Bf .. B0, u0 == Af .. A0
   3716       // i2 == Cf Bf Af Ce .. Bb Ab Ca
   3717       // i1 == Ba Aa C9 B9 .. A6 C5 B5
   3718       // i0 == A5 C4 B4 A4 .. C0 B0 A0
   3719 
   3720       IRTemp i2_FEDC = newTempV128(); IRTemp i2_BA98 = newTempV128();
   3721       IRTemp i2_7654 = newTempV128(); IRTemp i2_3210 = newTempV128();
   3722       IRTemp i1_FEDC = newTempV128(); IRTemp i1_BA98 = newTempV128();
   3723       IRTemp i1_7654 = newTempV128(); IRTemp i1_3210 = newTempV128();
   3724       IRTemp i0_FEDC = newTempV128(); IRTemp i0_BA98 = newTempV128();
   3725       IRTemp i0_7654 = newTempV128(); IRTemp i0_3210 = newTempV128();
   3726       IRTemp i2_hi64 = newTempV128(); IRTemp i2_lo64 = newTempV128();
   3727       IRTemp i1_hi64 = newTempV128(); IRTemp i1_lo64 = newTempV128();
   3728       IRTemp i0_hi64 = newTempV128(); IRTemp i0_lo64 = newTempV128();
   3729 
   3730       // eg XXXX(qqq, CC, 0xF, BB, 0xA)) sets qqq to be a vector
   3731       // of the form 14 bytes junk : CC[0xF] : BB[0xA]
   3732       //
   3733 #     define XXXX(_tempName,_srcVec1,_srcShift1,_srcVec2,_srcShift2) \
   3734          IRTemp t_##_tempName = newTempV128(); \
   3735          assign(t_##_tempName, \
   3736                 ILO8x16( ROR(EX(_srcVec1),(_srcShift1)), \
   3737                          ROR(EX(_srcVec2),(_srcShift2)) ) )
   3738 
   3739       // Let CC, BB, AA be (handy) aliases of u2, u1, u0 respectively
   3740       IRTemp CC = u2; IRTemp BB = u1; IRTemp AA = u0;
   3741 
   3742       // The slicing and reassembly are done as interleavedly as possible,
   3743       // so as to minimise the demand for registers in the back end, which
   3744       // was observed to be a problem in testing.
   3745 
   3746       XXXX(CfBf, CC, 0xf, BB, 0xf); // i2[15:14]
   3747       XXXX(AfCe, AA, 0xf, CC, 0xe);
   3748       assign(i2_FEDC, ILO16x8(EX(t_CfBf), EX(t_AfCe)));
   3749 
   3750       XXXX(BeAe, BB, 0xe, AA, 0xe);
   3751       XXXX(CdBd, CC, 0xd, BB, 0xd);
   3752       assign(i2_BA98, ILO16x8(EX(t_BeAe), EX(t_CdBd)));
   3753       assign(i2_hi64, ILO32x4(EX(i2_FEDC), EX(i2_BA98)));
   3754 
   3755       XXXX(AdCc, AA, 0xd, CC, 0xc);
   3756       XXXX(BcAc, BB, 0xc, AA, 0xc);
   3757       assign(i2_7654, ILO16x8(EX(t_AdCc), EX(t_BcAc)));
   3758 
   3759       XXXX(CbBb, CC, 0xb, BB, 0xb);
   3760       XXXX(AbCa, AA, 0xb, CC, 0xa); // i2[1:0]
   3761       assign(i2_3210, ILO16x8(EX(t_CbBb), EX(t_AbCa)));
   3762       assign(i2_lo64, ILO32x4(EX(i2_7654), EX(i2_3210)));
   3763       assign(*i2, ILO64x2(EX(i2_hi64), EX(i2_lo64)));
   3764 
   3765       XXXX(BaAa, BB, 0xa, AA, 0xa); // i1[15:14]
   3766       XXXX(C9B9, CC, 0x9, BB, 0x9);
   3767       assign(i1_FEDC, ILO16x8(EX(t_BaAa), EX(t_C9B9)));
   3768 
   3769       XXXX(A9C8, AA, 0x9, CC, 0x8);
   3770       XXXX(B8A8, BB, 0x8, AA, 0x8);
   3771       assign(i1_BA98, ILO16x8(EX(t_A9C8), EX(t_B8A8)));
   3772       assign(i1_hi64, ILO32x4(EX(i1_FEDC), EX(i1_BA98)));
   3773 
   3774       XXXX(C7B7, CC, 0x7, BB, 0x7);
   3775       XXXX(A7C6, AA, 0x7, CC, 0x6);
   3776       assign(i1_7654, ILO16x8(EX(t_C7B7), EX(t_A7C6)));
   3777 
   3778       XXXX(B6A6, BB, 0x6, AA, 0x6);
   3779       XXXX(C5B5, CC, 0x5, BB, 0x5); // i1[1:0]
   3780       assign(i1_3210, ILO16x8(EX(t_B6A6), EX(t_C5B5)));
   3781       assign(i1_lo64, ILO32x4(EX(i1_7654), EX(i1_3210)));
   3782       assign(*i1, ILO64x2(EX(i1_hi64), EX(i1_lo64)));
   3783 
   3784       XXXX(A5C4, AA, 0x5, CC, 0x4); // i0[15:14]
   3785       XXXX(B4A4, BB, 0x4, AA, 0x4);
   3786       assign(i0_FEDC, ILO16x8(EX(t_A5C4), EX(t_B4A4)));
   3787 
   3788       XXXX(C3B3, CC, 0x3, BB, 0x3);
   3789       XXXX(A3C2, AA, 0x3, CC, 0x2);
   3790       assign(i0_BA98, ILO16x8(EX(t_C3B3), EX(t_A3C2)));
   3791       assign(i0_hi64, ILO32x4(EX(i0_FEDC), EX(i0_BA98)));
   3792 
   3793       XXXX(B2A2, BB, 0x2, AA, 0x2);
   3794       XXXX(C1B1, CC, 0x1, BB, 0x1);
   3795       assign(i0_7654, ILO16x8(EX(t_B2A2), EX(t_C1B1)));
   3796 
   3797       XXXX(A1C0, AA, 0x1, CC, 0x0);
   3798       XXXX(B0A0, BB, 0x0, AA, 0x0); // i0[1:0]
   3799       assign(i0_3210, ILO16x8(EX(t_A1C0), EX(t_B0A0)));
   3800       assign(i0_lo64, ILO32x4(EX(i0_7654), EX(i0_3210)));
   3801       assign(*i0, ILO64x2(EX(i0_hi64), EX(i0_lo64)));
   3802 
   3803 #     undef XXXX
   3804       return;
   3805    }
   3806 
   3807    /*NOTREACHED*/
   3808    vassert(0);
   3809 }
   3810 
   3811 
   3812 /* Do interleaving for 4 128 bit vectors, for ST4 insns. */
   3813 static
   3814 void math_INTERLEAVE4_128(
   3815         /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
   3816         UInt laneSzBlg2,
   3817         IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
   3818 {
   3819    if (laneSzBlg2 == 3) {
   3820       // 64x2
   3821       assign(*i0, ILO64x2(EX(u1), EX(u0)));
   3822       assign(*i1, ILO64x2(EX(u3), EX(u2)));
   3823       assign(*i2, IHI64x2(EX(u1), EX(u0)));
   3824       assign(*i3, IHI64x2(EX(u3), EX(u2)));
   3825       return;
   3826    }
   3827    if (laneSzBlg2 == 2) {
   3828       // 32x4
   3829       // First, interleave at the 64-bit lane size.
   3830       IRTemp p0 = newTempV128();
   3831       IRTemp p1 = newTempV128();
   3832       IRTemp p2 = newTempV128();
   3833       IRTemp p3 = newTempV128();
   3834       math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 3, u0, u1, u2, u3);
   3835       // And interleave (cat) at the 32 bit size.
   3836       assign(*i0, CEV32x4(EX(p1), EX(p0)));
   3837       assign(*i1, COD32x4(EX(p1), EX(p0)));
   3838       assign(*i2, CEV32x4(EX(p3), EX(p2)));
   3839       assign(*i3, COD32x4(EX(p3), EX(p2)));
   3840       return;
   3841    }
   3842    if (laneSzBlg2 == 1) {
   3843       // 16x8
   3844       // First, interleave at the 32-bit lane size.
   3845       IRTemp p0 = newTempV128();
   3846       IRTemp p1 = newTempV128();
   3847       IRTemp p2 = newTempV128();
   3848       IRTemp p3 = newTempV128();
   3849       math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 2, u0, u1, u2, u3);
   3850       // And rearrange within each vector, to get the right 16 bit lanes.
   3851       assign(*i0, COD16x8(EX(p0), SHL(EX(p0), 2)));
   3852       assign(*i1, COD16x8(EX(p1), SHL(EX(p1), 2)));
   3853       assign(*i2, COD16x8(EX(p2), SHL(EX(p2), 2)));
   3854       assign(*i3, COD16x8(EX(p3), SHL(EX(p3), 2)));
   3855       return;
   3856    }
   3857    if (laneSzBlg2 == 0) {
   3858       // 8x16
   3859       // First, interleave at the 16-bit lane size.
   3860       IRTemp p0 = newTempV128();
   3861       IRTemp p1 = newTempV128();
   3862       IRTemp p2 = newTempV128();
   3863       IRTemp p3 = newTempV128();
   3864       math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 1, u0, u1, u2, u3);
   3865       // And rearrange within each vector, to get the right 8 bit lanes.
   3866       assign(*i0, IHI32x4(COD8x16(EX(p0),EX(p0)), CEV8x16(EX(p0),EX(p0))));
   3867       assign(*i1, IHI32x4(COD8x16(EX(p1),EX(p1)), CEV8x16(EX(p1),EX(p1))));
   3868       assign(*i2, IHI32x4(COD8x16(EX(p2),EX(p2)), CEV8x16(EX(p2),EX(p2))));
   3869       assign(*i3, IHI32x4(COD8x16(EX(p3),EX(p3)), CEV8x16(EX(p3),EX(p3))));
   3870       return;
   3871    }
   3872    /*NOTREACHED*/
   3873    vassert(0);
   3874 }
   3875 
   3876 
   3877 /* Do deinterleaving for 1 128 bit vector, for LD1 insns. */
   3878 static
   3879 void math_DEINTERLEAVE1_128( /*OUTx1*/ IRTemp* u0,
   3880                              UInt laneSzBlg2, IRTemp i0 )
   3881 {
   3882    assign(*u0, mkexpr(i0));
   3883 }
   3884 
   3885 
   3886 /* Do deinterleaving for 2 128 bit vectors, for LD2 insns. */
   3887 static
   3888 void math_DEINTERLEAVE2_128( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
   3889                              UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
   3890 {
   3891    /* This is pretty easy, since we have primitives directly to
   3892       hand. */
   3893    if (laneSzBlg2 == 3) {
   3894       // 64x2
   3895       // i1 == B1 A1, i0 == B0 A0
   3896       // u1 == B1 B0, u0 == A1 A0
   3897       assign(*u0, binop(Iop_InterleaveLO64x2, mkexpr(i1), mkexpr(i0)));
   3898       assign(*u1, binop(Iop_InterleaveHI64x2, mkexpr(i1), mkexpr(i0)));
   3899       return;
   3900    }
   3901    if (laneSzBlg2 == 2) {
   3902       // 32x4
   3903       // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
   3904       // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
   3905       assign(*u0, binop(Iop_CatEvenLanes32x4, mkexpr(i1), mkexpr(i0)));
   3906       assign(*u1, binop(Iop_CatOddLanes32x4, mkexpr(i1), mkexpr(i0)));
   3907       return;
   3908    }
   3909    if (laneSzBlg2 == 1) {
   3910       // 16x8
   3911       // i0 == B3 A3 B2 A2 B1 A1 B0 A0
   3912       // i1 == B7 A7 B6 A6 B5 A5 B4 A4
   3913       // u1 == B{7..0}, u0 == A{7..0}
   3914       assign(*u0, binop(Iop_CatEvenLanes16x8, mkexpr(i1), mkexpr(i0)));
   3915       assign(*u1, binop(Iop_CatOddLanes16x8,  mkexpr(i1), mkexpr(i0)));
   3916       return;
   3917    }
   3918    if (laneSzBlg2 == 0) {
   3919       // 8x16
   3920       // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
   3921       // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
   3922       // u1 == B{f..0}, u0 == A{f..0}
   3923       assign(*u0, binop(Iop_CatEvenLanes8x16, mkexpr(i1), mkexpr(i0)));
   3924       assign(*u1, binop(Iop_CatOddLanes8x16,  mkexpr(i1), mkexpr(i0)));
   3925       return;
   3926    }
   3927    /*NOTREACHED*/
   3928    vassert(0);
   3929 }
   3930 
   3931 
   3932 /* Do deinterleaving for 3 128 bit vectors, for LD3 insns. */
   3933 static
   3934 void math_DEINTERLEAVE3_128(
   3935         /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
   3936         UInt laneSzBlg2,
   3937         IRTemp i0, IRTemp i1, IRTemp i2 )
   3938 {
   3939    if (laneSzBlg2 == 3) {
   3940       // 64x2
   3941       // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
   3942       // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
   3943       assign(*u2, ILO64x2( ROL(EX(i2),8), EX(i1)        ));
   3944       assign(*u1, ILO64x2( EX(i2),        ROL(EX(i0),8) ));
   3945       assign(*u0, ILO64x2( ROL(EX(i1),8), EX(i0)        ));
   3946       return;
   3947    }
   3948 
   3949    if (laneSzBlg2 == 2) {
   3950       // 32x4
   3951       // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
   3952       // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
   3953       // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
   3954       IRTemp t_a1c0b0a0 = newTempV128();
   3955       IRTemp t_a2c1b1a1 = newTempV128();
   3956       IRTemp t_a3c2b2a2 = newTempV128();
   3957       IRTemp t_a0c3b3a3 = newTempV128();
   3958       IRTemp p0 = newTempV128();
   3959       IRTemp p1 = newTempV128();
   3960       IRTemp p2 = newTempV128();
   3961       // Compute some intermediate values.
   3962       assign(t_a1c0b0a0, EX(i0));
   3963       assign(t_a2c1b1a1, SL(EX(i1),EX(i0),3*4));
   3964       assign(t_a3c2b2a2, SL(EX(i2),EX(i1),2*4));
   3965       assign(t_a0c3b3a3, SL(EX(i0),EX(i2),1*4));
   3966       // First deinterleave into lane-pairs
   3967       assign(p0, ILO32x4(EX(t_a2c1b1a1),EX(t_a1c0b0a0)));
   3968       assign(p1, ILO64x2(ILO32x4(EX(t_a0c3b3a3), EX(t_a3c2b2a2)),
   3969                          IHI32x4(EX(t_a2c1b1a1), EX(t_a1c0b0a0))));
   3970       assign(p2, ILO32x4(ROR(EX(t_a0c3b3a3),1*4), ROR(EX(t_a3c2b2a2),1*4)));
   3971       // Then deinterleave at 64x2 granularity.
   3972       math_DEINTERLEAVE3_128(u0, u1, u2, 3, p0, p1, p2);
   3973       return;
   3974    }
   3975 
   3976    if (laneSzBlg2 == 1) {
   3977       // 16x8
   3978       // u2 == C7 C6 C5 C4 C3 C2 C1 C0
   3979       // u1 == B7 B6 B5 B4 B3 B2 B1 B0
   3980       // u0 == A7 A6 A5 A4 A3 A2 A1 A0
   3981       //
   3982       // i2 == C7 B7 A7 C6 B6 A6 C5 B5
   3983       // i1 == A5 C4 B4 A4 C4 B3 A3 C2
   3984       // i0 == B2 A2 C1 B1 A1 C0 B0 A0
   3985       //
   3986       // p2 == C7 C6 B7 B6 A7 A6 C5 C4
   3987       // p1 == B5 B4 A5 A4 C3 C2 B3 B2
   3988       // p0 == A3 A2 C1 C0 B1 B0 A1 A0
   3989 
   3990       IRTemp s0, s1, s2, s3, t0, t1, t2, t3, p0, p1, p2, c00111111;
   3991       s0 = s1 = s2 = s3
   3992          = t0 = t1 = t2 = t3 = p0 = p1 = p2 = c00111111 = IRTemp_INVALID;
   3993       newTempsV128_4(&s0, &s1, &s2, &s3);
   3994       newTempsV128_4(&t0, &t1, &t2, &t3);
   3995       newTempsV128_4(&p0, &p1, &p2, &c00111111);
   3996 
   3997       // s0 == b2a2 c1b1a1 c0b0a0
   3998       // s1 == b4a4 c3b3c3 c2b2a2
   3999       // s2 == b6a6 c5b5a5 c4b4a4
   4000       // s3 == b0a0 c7b7a7 c6b6a6
   4001       assign(s0, EX(i0));
   4002       assign(s1, SL(EX(i1),EX(i0),6*2));
   4003       assign(s2, SL(EX(i2),EX(i1),4*2));
   4004       assign(s3, SL(EX(i0),EX(i2),2*2));
   4005 
   4006       // t0 == 0 0 c1c0 b1b0 a1a0
   4007       // t1 == 0 0 c3c2 b3b2 a3a2
   4008       // t2 == 0 0 c5c4 b5b4 a5a4
   4009       // t3 == 0 0 c7c6 b7b6 a7a6
   4010       assign(c00111111, mkV128(0x0FFF));
   4011       assign(t0, AND( ILO16x8( ROR(EX(s0),3*2), EX(s0)), EX(c00111111)));
   4012       assign(t1, AND( ILO16x8( ROR(EX(s1),3*2), EX(s1)), EX(c00111111)));
   4013       assign(t2, AND( ILO16x8( ROR(EX(s2),3*2), EX(s2)), EX(c00111111)));
   4014       assign(t3, AND( ILO16x8( ROR(EX(s3),3*2), EX(s3)), EX(c00111111)));
   4015 
   4016       assign(p0, OR2(EX(t0),          SHL(EX(t1),6*2)));
   4017       assign(p1, OR2(SHL(EX(t2),4*2), SHR(EX(t1),2*2)));
   4018       assign(p2, OR2(SHL(EX(t3),2*2), SHR(EX(t2),4*2)));
   4019 
   4020       // Then deinterleave at 32x4 granularity.
   4021       math_DEINTERLEAVE3_128(u0, u1, u2, 2, p0, p1, p2);
   4022       return;
   4023    }
   4024 
   4025    if (laneSzBlg2 == 0) {
   4026       // 8x16.  This is the same scheme as for 16x8, with twice the
   4027       // number of intermediate values.
   4028       //
   4029       // u2 == C{f..0}
   4030       // u1 == B{f..0}
   4031       // u0 == A{f..0}
   4032       //
   4033       // i2 == CBA{f} CBA{e} CBA{d} CBA{c} CBA{b} C{a}
   4034       // i1 ==  BA{a} CBA{9} CBA{8} CBA{7} CBA{6} CB{5}
   4035       // i0 ==   A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
   4036       //
   4037       // p2 == C{fe} B{fe} A{fe} C{dc} B{dc} A{dc} C{ba} B{ba}
   4038       // p1 == A{ba} C{98} B{98} A{98} C{76} B{76} A{76} C{54}
   4039       // p0 == B{54} A{54} C{32} B{32} A{32} C{10} B{10} A{10}
   4040       //
   4041       IRTemp s0, s1, s2, s3, s4, s5, s6, s7,
   4042              t0, t1, t2, t3, t4, t5, t6, t7, p0, p1, p2, cMASK;
   4043       s0 = s1 = s2 = s3 = s4 = s5 = s6 = s7
   4044          = t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = p0 = p1 = p2 = cMASK
   4045          = IRTemp_INVALID;
   4046       newTempsV128_4(&s0, &s1, &s2, &s3);
   4047       newTempsV128_4(&s4, &s5, &s6, &s7);
   4048       newTempsV128_4(&t0, &t1, &t2, &t3);
   4049       newTempsV128_4(&t4, &t5, &t6, &t7);
   4050       newTempsV128_4(&p0, &p1, &p2, &cMASK);
   4051 
   4052       // s0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
   4053       // s1 == A{7} CBA{6} CBA{5} CBA{4} CBA{3} CBA{2}
   4054       // s2 == A{9} CBA{8} CBA{7} CBA{6} CBA{5} CBA{4}
   4055       // s3 == A{b} CBA{a} CBA{9} CBA{8} CBA{7} CBA{6}
   4056       // s4 == A{d} CBA{c} CBA{b} CBA{a} CBA{9} CBA{8}
   4057       // s5 == A{f} CBA{e} CBA{d} CBA{c} CBA{b} CBA{a}
   4058       // s6 == A{1} CBA{0} CBA{f} CBA{e} CBA{d} CBA{c}
   4059       // s7 == A{3} CBA{2} CBA{1} CBA{0} CBA{f} CBA{e}
   4060       assign(s0, SL(EX(i1),EX(i0), 0));
   4061       assign(s1, SL(EX(i1),EX(i0), 6));
   4062       assign(s2, SL(EX(i1),EX(i0),12));
   4063       assign(s3, SL(EX(i2),EX(i1), 2));
   4064       assign(s4, SL(EX(i2),EX(i1), 8));
   4065       assign(s5, SL(EX(i2),EX(i1),14));
   4066       assign(s6, SL(EX(i0),EX(i2), 4));
   4067       assign(s7, SL(EX(i0),EX(i2),10));
   4068 
   4069       // t0 == 0--(ten)--0 C1 C0 B1 B0 A1 A0
   4070       // t1 == 0--(ten)--0 C3 C2 B3 B2 A3 A2
   4071       // t2 == 0--(ten)--0 C5 C4 B5 B4 A5 A4
   4072       // t3 == 0--(ten)--0 C7 C6 B7 B6 A7 A6
   4073       // t4 == 0--(ten)--0 C9 C8 B9 B8 A9 A8
   4074       // t5 == 0--(ten)--0 Cb Ca Bb Ba Ab Aa
   4075       // t6 == 0--(ten)--0 Cd Cc Bd Bc Ad Ac
   4076       // t7 == 0--(ten)--0 Cf Ce Bf Be Af Ae
   4077       assign(cMASK, mkV128(0x003F));
   4078       assign(t0, AND( ILO8x16( ROR(EX(s0),3), EX(s0)), EX(cMASK)));
   4079       assign(t1, AND( ILO8x16( ROR(EX(s1),3), EX(s1)), EX(cMASK)));
   4080       assign(t2, AND( ILO8x16( ROR(EX(s2),3), EX(s2)), EX(cMASK)));
   4081       assign(t3, AND( ILO8x16( ROR(EX(s3),3), EX(s3)), EX(cMASK)));
   4082       assign(t4, AND( ILO8x16( ROR(EX(s4),3), EX(s4)), EX(cMASK)));
   4083       assign(t5, AND( ILO8x16( ROR(EX(s5),3), EX(s5)), EX(cMASK)));
   4084       assign(t6, AND( ILO8x16( ROR(EX(s6),3), EX(s6)), EX(cMASK)));
   4085       assign(t7, AND( ILO8x16( ROR(EX(s7),3), EX(s7)), EX(cMASK)));
   4086 
   4087       assign(p0, OR3( SHL(EX(t2),12), SHL(EX(t1),6), EX(t0) ));
   4088       assign(p1, OR4( SHL(EX(t5),14), SHL(EX(t4),8),
   4089                  SHL(EX(t3),2), SHR(EX(t2),4) ));
   4090       assign(p2, OR3( SHL(EX(t7),10), SHL(EX(t6),4), SHR(EX(t5),2) ));
   4091 
   4092       // Then deinterleave at 16x8 granularity.
   4093       math_DEINTERLEAVE3_128(u0, u1, u2, 1, p0, p1, p2);
   4094       return;
   4095    }
   4096 
   4097    /*NOTREACHED*/
   4098    vassert(0);
   4099 }
   4100 
   4101 
   4102 /* Do deinterleaving for 4 128 bit vectors, for LD4 insns. */
   4103 static
   4104 void math_DEINTERLEAVE4_128(
   4105         /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
   4106         UInt laneSzBlg2,
   4107         IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
   4108 {
   4109    if (laneSzBlg2 == 3) {
   4110       // 64x2
   4111       assign(*u0, ILO64x2(EX(i2), EX(i0)));
   4112       assign(*u1, IHI64x2(EX(i2), EX(i0)));
   4113       assign(*u2, ILO64x2(EX(i3), EX(i1)));
   4114       assign(*u3, IHI64x2(EX(i3), EX(i1)));
   4115       return;
   4116    }
   4117    if (laneSzBlg2 == 2) {
   4118       // 32x4
   4119       IRTemp p0 = newTempV128();
   4120       IRTemp p2 = newTempV128();
   4121       IRTemp p1 = newTempV128();
   4122       IRTemp p3 = newTempV128();
   4123       assign(p0, ILO32x4(EX(i1), EX(i0)));
   4124       assign(p1, IHI32x4(EX(i1), EX(i0)));
   4125       assign(p2, ILO32x4(EX(i3), EX(i2)));
   4126       assign(p3, IHI32x4(EX(i3), EX(i2)));
   4127       // And now do what we did for the 64-bit case.
   4128       math_DEINTERLEAVE4_128(u0, u1, u2, u3, 3, p0, p1, p2, p3);
   4129       return;
   4130    }
   4131    if (laneSzBlg2 == 1) {
   4132       // 16x8
   4133       // Deinterleave into 32-bit chunks, then do as the 32-bit case.
   4134       IRTemp p0 = newTempV128();
   4135       IRTemp p1 = newTempV128();
   4136       IRTemp p2 = newTempV128();
   4137       IRTemp p3 = newTempV128();
   4138       assign(p0, IHI16x8(EX(i0), SHL(EX(i0), 8)));
   4139       assign(p1, IHI16x8(EX(i1), SHL(EX(i1), 8)));
   4140       assign(p2, IHI16x8(EX(i2), SHL(EX(i2), 8)));
   4141       assign(p3, IHI16x8(EX(i3), SHL(EX(i3), 8)));
   4142       // From here on is like the 32 bit case.
   4143       math_DEINTERLEAVE4_128(u0, u1, u2, u3, 2, p0, p1, p2, p3);
   4144       return;
   4145    }
   4146    if (laneSzBlg2 == 0) {
   4147       // 8x16
   4148       // Deinterleave into 16-bit chunks, then do as the 16-bit case.
   4149       IRTemp p0 = newTempV128();
   4150       IRTemp p1 = newTempV128();
   4151       IRTemp p2 = newTempV128();
   4152       IRTemp p3 = newTempV128();
   4153       assign(p0, IHI64x2( IHI8x16(EX(i0),ROL(EX(i0),4)),
   4154                           ILO8x16(EX(i0),ROL(EX(i0),4)) ));
   4155       assign(p1, IHI64x2( IHI8x16(EX(i1),ROL(EX(i1),4)),
   4156                           ILO8x16(EX(i1),ROL(EX(i1),4)) ));
   4157       assign(p2, IHI64x2( IHI8x16(EX(i2),ROL(EX(i2),4)),
   4158                           ILO8x16(EX(i2),ROL(EX(i2),4)) ));
   4159       assign(p3, IHI64x2( IHI8x16(EX(i3),ROL(EX(i3),4)),
   4160                           ILO8x16(EX(i3),ROL(EX(i3),4)) ));
   4161       // From here on is like the 16 bit case.
   4162       math_DEINTERLEAVE4_128(u0, u1, u2, u3, 1, p0, p1, p2, p3);
   4163       return;
   4164    }
   4165    /*NOTREACHED*/
   4166    vassert(0);
   4167 }
   4168 
   4169 
   4170 /* Wrappers that use the full-width (de)interleavers to do half-width
   4171    (de)interleaving.  The scheme is to clone each input lane in the
   4172    lower half of each incoming value, do a full width (de)interleave
   4173    at the next lane size up, and remove every other lane of the the
   4174    result.  The returned values may have any old junk in the upper
   4175    64 bits -- the caller must ignore that. */
   4176 
   4177 /* Helper function -- get doubling and narrowing operations. */
   4178 static
   4179 void math_get_doubler_and_halver ( /*OUT*/IROp* doubler,
   4180                                    /*OUT*/IROp* halver,
   4181                                    UInt laneSzBlg2 )
   4182 {
   4183    switch (laneSzBlg2) {
   4184       case 2:
   4185          *doubler = Iop_InterleaveLO32x4; *halver = Iop_CatEvenLanes32x4;
   4186          break;
   4187       case 1:
   4188          *doubler = Iop_InterleaveLO16x8; *halver = Iop_CatEvenLanes16x8;
   4189          break;
   4190       case 0:
   4191          *doubler = Iop_InterleaveLO8x16; *halver = Iop_CatEvenLanes8x16;
   4192          break;
   4193       default:
   4194          vassert(0);
   4195    }
   4196 }
   4197 
   4198 /* Do interleaving for 1 64 bit vector, for ST1 insns. */
   4199 static
   4200 void math_INTERLEAVE1_64( /*OUTx1*/ IRTemp* i0,
   4201                           UInt laneSzBlg2, IRTemp u0 )
   4202 {
   4203    assign(*i0, mkexpr(u0));
   4204 }
   4205 
   4206 
   4207 /* Do interleaving for 2 64 bit vectors, for ST2 insns. */
   4208 static
   4209 void math_INTERLEAVE2_64( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
   4210                           UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
   4211 {
   4212    if (laneSzBlg2 == 3) {
   4213       // 1x64, degenerate case
   4214       assign(*i0, EX(u0));
   4215       assign(*i1, EX(u1));
   4216       return;
   4217    }
   4218 
   4219    vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
   4220    IROp doubler = Iop_INVALID, halver = Iop_INVALID;
   4221    math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
   4222 
   4223    IRTemp du0 = newTempV128();
   4224    IRTemp du1 = newTempV128();
   4225    assign(du0, binop(doubler, EX(u0), EX(u0)));
   4226    assign(du1, binop(doubler, EX(u1), EX(u1)));
   4227    IRTemp di0 = newTempV128();
   4228    IRTemp di1 = newTempV128();
   4229    math_INTERLEAVE2_128(&di0, &di1, laneSzBlg2 + 1, du0, du1);
   4230    assign(*i0, binop(halver, EX(di0), EX(di0)));
   4231    assign(*i1, binop(halver, EX(di1), EX(di1)));
   4232 }
   4233 
   4234 
   4235 /* Do interleaving for 3 64 bit vectors, for ST3 insns. */
   4236 static
   4237 void math_INTERLEAVE3_64(
   4238         /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
   4239         UInt laneSzBlg2,
   4240         IRTemp u0, IRTemp u1, IRTemp u2 )
   4241 {
   4242    if (laneSzBlg2 == 3) {
   4243       // 1x64, degenerate case
   4244       assign(*i0, EX(u0));
   4245       assign(*i1, EX(u1));
   4246       assign(*i2, EX(u2));
   4247       return;
   4248    }
   4249 
   4250    vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
   4251    IROp doubler = Iop_INVALID, halver = Iop_INVALID;
   4252    math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
   4253 
   4254    IRTemp du0 = newTempV128();
   4255    IRTemp du1 = newTempV128();
   4256    IRTemp du2 = newTempV128();
   4257    assign(du0, binop(doubler, EX(u0), EX(u0)));
   4258    assign(du1, binop(doubler, EX(u1), EX(u1)));
   4259    assign(du2, binop(doubler, EX(u2), EX(u2)));
   4260    IRTemp di0 = newTempV128();
   4261    IRTemp di1 = newTempV128();
   4262    IRTemp di2 = newTempV128();
   4263    math_INTERLEAVE3_128(&di0, &di1, &di2, laneSzBlg2 + 1, du0, du1, du2);
   4264    assign(*i0, binop(halver, EX(di0), EX(di0)));
   4265    assign(*i1, binop(halver, EX(di1), EX(di1)));
   4266    assign(*i2, binop(halver, EX(di2), EX(di2)));
   4267 }
   4268 
   4269 
   4270 /* Do interleaving for 4 64 bit vectors, for ST4 insns. */
   4271 static
   4272 void math_INTERLEAVE4_64(
   4273         /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
   4274         UInt laneSzBlg2,
   4275         IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
   4276 {
   4277    if (laneSzBlg2 == 3) {
   4278       // 1x64, degenerate case
   4279       assign(*i0, EX(u0));
   4280       assign(*i1, EX(u1));
   4281       assign(*i2, EX(u2));
   4282       assign(*i3, EX(u3));
   4283       return;
   4284    }
   4285 
   4286    vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
   4287    IROp doubler = Iop_INVALID, halver = Iop_INVALID;
   4288    math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
   4289 
   4290    IRTemp du0 = newTempV128();
   4291    IRTemp du1 = newTempV128();
   4292    IRTemp du2 = newTempV128();
   4293    IRTemp du3 = newTempV128();
   4294    assign(du0, binop(doubler, EX(u0), EX(u0)));
   4295    assign(du1, binop(doubler, EX(u1), EX(u1)));
   4296    assign(du2, binop(doubler, EX(u2), EX(u2)));
   4297    assign(du3, binop(doubler, EX(u3), EX(u3)));
   4298    IRTemp di0 = newTempV128();
   4299    IRTemp di1 = newTempV128();
   4300    IRTemp di2 = newTempV128();
   4301    IRTemp di3 = newTempV128();
   4302    math_INTERLEAVE4_128(&di0, &di1, &di2, &di3,
   4303                         laneSzBlg2 + 1, du0, du1, du2, du3);
   4304    assign(*i0, binop(halver, EX(di0), EX(di0)));
   4305    assign(*i1, binop(halver, EX(di1), EX(di1)));
   4306    assign(*i2, binop(halver, EX(di2), EX(di2)));
   4307    assign(*i3, binop(halver, EX(di3), EX(di3)));
   4308 }
   4309 
   4310 
   4311 /* Do deinterleaving for 1 64 bit vector, for LD1 insns. */
   4312 static
   4313 void math_DEINTERLEAVE1_64( /*OUTx1*/ IRTemp* u0,
   4314                             UInt laneSzBlg2, IRTemp i0 )
   4315 {
   4316    assign(*u0, mkexpr(i0));
   4317 }
   4318 
   4319 
   4320 /* Do deinterleaving for 2 64 bit vectors, for LD2 insns. */
   4321 static
   4322 void math_DEINTERLEAVE2_64( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
   4323                             UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
   4324 {
   4325    if (laneSzBlg2 == 3) {
   4326       // 1x64, degenerate case
   4327       assign(*u0, EX(i0));
   4328       assign(*u1, EX(i1));
   4329       return;
   4330    }
   4331 
   4332    vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
   4333    IROp doubler = Iop_INVALID, halver = Iop_INVALID;
   4334    math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
   4335 
   4336    IRTemp di0 = newTempV128();
   4337    IRTemp di1 = newTempV128();
   4338    assign(di0, binop(doubler, EX(i0), EX(i0)));
   4339    assign(di1, binop(doubler, EX(i1), EX(i1)));
   4340 
   4341    IRTemp du0 = newTempV128();
   4342    IRTemp du1 = newTempV128();
   4343    math_DEINTERLEAVE2_128(&du0, &du1, laneSzBlg2 + 1, di0, di1);
   4344    assign(*u0, binop(halver, EX(du0), EX(du0)));
   4345    assign(*u1, binop(halver, EX(du1), EX(du1)));
   4346 }
   4347 
   4348 
   4349 /* Do deinterleaving for 3 64 bit vectors, for LD3 insns. */
   4350 static
   4351 void math_DEINTERLEAVE3_64(
   4352         /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
   4353         UInt laneSzBlg2,
   4354         IRTemp i0, IRTemp i1, IRTemp i2 )
   4355 {
   4356    if (laneSzBlg2 == 3) {
   4357       // 1x64, degenerate case
   4358       assign(*u0, EX(i0));
   4359       assign(*u1, EX(i1));
   4360       assign(*u2, EX(i2));
   4361       return;
   4362    }
   4363 
   4364    vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
   4365    IROp doubler = Iop_INVALID, halver = Iop_INVALID;
   4366    math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
   4367 
   4368    IRTemp di0 = newTempV128();
   4369    IRTemp di1 = newTempV128();
   4370    IRTemp di2 = newTempV128();
   4371    assign(di0, binop(doubler, EX(i0), EX(i0)));
   4372    assign(di1, binop(doubler, EX(i1), EX(i1)));
   4373    assign(di2, binop(doubler, EX(i2), EX(i2)));
   4374    IRTemp du0 = newTempV128();
   4375    IRTemp du1 = newTempV128();
   4376    IRTemp du2 = newTempV128();
   4377    math_DEINTERLEAVE3_128(&du0, &du1, &du2, laneSzBlg2 + 1, di0, di1, di2);
   4378    assign(*u0, binop(halver, EX(du0), EX(du0)));
   4379    assign(*u1, binop(halver, EX(du1), EX(du1)));
   4380    assign(*u2, binop(halver, EX(du2), EX(du2)));
   4381 }
   4382 
   4383 
   4384 /* Do deinterleaving for 4 64 bit vectors, for LD4 insns. */
   4385 static
   4386 void math_DEINTERLEAVE4_64(
   4387         /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
   4388         UInt laneSzBlg2,
   4389         IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
   4390 {
   4391    if (laneSzBlg2 == 3) {
   4392       // 1x64, degenerate case
   4393       assign(*u0, EX(i0));
   4394       assign(*u1, EX(i1));
   4395       assign(*u2, EX(i2));
   4396       assign(*u3, EX(i3));
   4397       return;
   4398    }
   4399 
   4400    vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
   4401    IROp doubler = Iop_INVALID, halver = Iop_INVALID;
   4402    math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
   4403 
   4404    IRTemp di0 = newTempV128();
   4405    IRTemp di1 = newTempV128();
   4406    IRTemp di2 = newTempV128();
   4407    IRTemp di3 = newTempV128();
   4408    assign(di0, binop(doubler, EX(i0), EX(i0)));
   4409    assign(di1, binop(doubler, EX(i1), EX(i1)));
   4410    assign(di2, binop(doubler, EX(i2), EX(i2)));
   4411    assign(di3, binop(doubler, EX(i3), EX(i3)));
   4412    IRTemp du0 = newTempV128();
   4413    IRTemp du1 = newTempV128();
   4414    IRTemp du2 = newTempV128();
   4415    IRTemp du3 = newTempV128();
   4416    math_DEINTERLEAVE4_128(&du0, &du1, &du2, &du3,
   4417                           laneSzBlg2 + 1, di0, di1, di2, di3);
   4418    assign(*u0, binop(halver, EX(du0), EX(du0)));
   4419    assign(*u1, binop(halver, EX(du1), EX(du1)));
   4420    assign(*u2, binop(halver, EX(du2), EX(du2)));
   4421    assign(*u3, binop(halver, EX(du3), EX(du3)));
   4422 }
   4423 
   4424 
   4425 #undef EX
   4426 #undef SL
   4427 #undef ROR
   4428 #undef ROL
   4429 #undef SHR
   4430 #undef SHL
   4431 #undef ILO64x2
   4432 #undef IHI64x2
   4433 #undef ILO32x4
   4434 #undef IHI32x4
   4435 #undef ILO16x8
   4436 #undef IHI16x8
   4437 #undef ILO16x8
   4438 #undef IHI16x8
   4439 #undef CEV32x4
   4440 #undef COD32x4
   4441 #undef COD16x8
   4442 #undef COD8x16
   4443 #undef CEV8x16
   4444 #undef AND
   4445 #undef OR2
   4446 #undef OR3
   4447 #undef OR4
   4448 
   4449 
   4450 /*------------------------------------------------------------*/
   4451 /*--- Load and Store instructions                          ---*/
   4452 /*------------------------------------------------------------*/
   4453 
   4454 /* Generate the EA for a "reg + reg" style amode.  This is done from
   4455    parts of the insn, but for sanity checking sake it takes the whole
   4456    insn.  This appears to depend on insn[15:12], with opt=insn[15:13]
   4457    and S=insn[12]:
   4458 
   4459    The possible forms, along with their opt:S values, are:
   4460       011:0   Xn|SP + Xm
   4461       111:0   Xn|SP + Xm
   4462       011:1   Xn|SP + Xm * transfer_szB
   4463       111:1   Xn|SP + Xm * transfer_szB
   4464       010:0   Xn|SP + 32Uto64(Wm)
   4465       010:1   Xn|SP + 32Uto64(Wm) * transfer_szB
   4466       110:0   Xn|SP + 32Sto64(Wm)
   4467       110:1   Xn|SP + 32Sto64(Wm) * transfer_szB
   4468 
   4469    Rm is insn[20:16].  Rn is insn[9:5].  Rt is insn[4:0].  Log2 of
   4470    the transfer size is insn[23,31,30].  For integer loads/stores,
   4471    insn[23] is zero, hence szLg2 can be at most 3 in such cases.
   4472 
   4473    If the decoding fails, it returns IRTemp_INVALID.
   4474 
   4475    isInt is True iff this is decoding is for transfers to/from integer
   4476    registers.  If False it is for transfers to/from vector registers.
   4477 */
   4478 static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt )
   4479 {
   4480    UInt    optS  = SLICE_UInt(insn, 15, 12);
   4481    UInt    mm    = SLICE_UInt(insn, 20, 16);
   4482    UInt    nn    = SLICE_UInt(insn, 9, 5);
   4483    UInt    szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
   4484                    | SLICE_UInt(insn, 31, 30); // Log2 of the size
   4485 
   4486    buf[0] = 0;
   4487 
   4488    /* Sanity checks, that this really is a load/store insn. */
   4489    if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
   4490       goto fail;
   4491 
   4492    if (isInt
   4493        && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
   4494        && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
   4495        && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
   4496        && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
   4497       goto fail;
   4498 
   4499    if (!isInt
   4500        && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
   4501       goto fail;
   4502 
   4503    /* Throw out non-verified but possibly valid cases. */
   4504    switch (szLg2) {
   4505       case BITS3(0,0,0): break; //  8 bit, valid for both int and vec
   4506       case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
   4507       case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
   4508       case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
   4509       case BITS3(1,0,0): // can only ever be valid for the vector case
   4510                          if (isInt) goto fail; else break;
   4511       case BITS3(1,0,1): // these sizes are never valid
   4512       case BITS3(1,1,0):
   4513       case BITS3(1,1,1): goto fail;
   4514 
   4515       default: vassert(0);
   4516    }
   4517 
   4518    IRExpr* rhs  = NULL;
   4519    switch (optS) {
   4520       case BITS4(1,1,1,0): goto fail; //ATC
   4521       case BITS4(0,1,1,0):
   4522          rhs = getIReg64orZR(mm);
   4523          vex_sprintf(buf, "[%s, %s]",
   4524                      nameIReg64orZR(nn), nameIReg64orZR(mm));
   4525          break;
   4526       case BITS4(1,1,1,1): goto fail; //ATC
   4527       case BITS4(0,1,1,1):
   4528          rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
   4529          vex_sprintf(buf, "[%s, %s lsl %u]",
   4530                      nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
   4531          break;
   4532       case BITS4(0,1,0,0):
   4533          rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
   4534          vex_sprintf(buf, "[%s, %s uxtx]",
   4535                      nameIReg64orZR(nn), nameIReg32orZR(mm));
   4536          break;
   4537       case BITS4(0,1,0,1):
   4538          rhs = binop(Iop_Shl64,
   4539                      unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
   4540          vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
   4541                      nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
   4542          break;
   4543       case BITS4(1,1,0,0):
   4544          rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
   4545          vex_sprintf(buf, "[%s, %s sxtx]",
   4546                      nameIReg64orZR(nn), nameIReg32orZR(mm));
   4547          break;
   4548       case BITS4(1,1,0,1):
   4549          rhs = binop(Iop_Shl64,
   4550                      unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
   4551          vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
   4552                      nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
   4553          break;
   4554       default:
   4555          /* The rest appear to be genuinely invalid */
   4556          goto fail;
   4557    }
   4558 
   4559    vassert(rhs);
   4560    IRTemp res = newTemp(Ity_I64);
   4561    assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
   4562    return res;
   4563 
   4564   fail:
   4565    vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
   4566    return IRTemp_INVALID;
   4567 }
   4568 
   4569 
   4570 /* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
   4571    bits of DATAE :: Ity_I64. */
   4572 static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
   4573 {
   4574    IRExpr* addrE = mkexpr(addr);
   4575    switch (szB) {
   4576       case 8:
   4577          storeLE(addrE, dataE);
   4578          break;
   4579       case 4:
   4580          storeLE(addrE, unop(Iop_64to32, dataE));
   4581          break;
   4582       case 2:
   4583          storeLE(addrE, unop(Iop_64to16, dataE));
   4584          break;
   4585       case 1:
   4586          storeLE(addrE, unop(Iop_64to8, dataE));
   4587          break;
   4588       default:
   4589          vassert(0);
   4590    }
   4591 }
   4592 
   4593 
   4594 /* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
   4595    placing the result in an Ity_I64 temporary. */
   4596 static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
   4597 {
   4598    IRTemp  res   = newTemp(Ity_I64);
   4599    IRExpr* addrE = mkexpr(addr);
   4600    switch (szB) {
   4601       case 8:
   4602          assign(res, loadLE(Ity_I64,addrE));
   4603          break;
   4604       case 4:
   4605          assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
   4606          break;
   4607       case 2:
   4608          assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
   4609          break;
   4610       case 1:
   4611          assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
   4612          break;
   4613       default:
   4614          vassert(0);
   4615    }
   4616    return res;
   4617 }
   4618 
   4619 
   4620 /* Generate a "standard 7" name, from bitQ and size.  But also
   4621    allow ".1d" since that's occasionally useful. */
   4622 static
   4623 const HChar* nameArr_Q_SZ ( UInt bitQ, UInt size )
   4624 {
   4625    vassert(bitQ <= 1 && size <= 3);
   4626    const HChar* nms[8]
   4627       = { "8b", "4h", "2s", "1d", "16b", "8h", "4s", "2d" };
   4628    UInt ix = (bitQ << 2) | size;
   4629    vassert(ix < 8);
   4630    return nms[ix];
   4631 }
   4632 
   4633 
   4634 static
   4635 Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn)
   4636 {
   4637 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   4638 
   4639    /* ------------ LDR,STR (immediate, uimm12) ----------- */
   4640    /* uimm12 is scaled by the transfer size
   4641 
   4642       31 29  26    21    9  4
   4643       |  |   |     |     |  |
   4644       11 111 00100 imm12 nn tt    STR  Xt, [Xn|SP, #imm12 * 8]
   4645       11 111 00101 imm12 nn tt    LDR  Xt, [Xn|SP, #imm12 * 8]
   4646 
   4647       10 111 00100 imm12 nn tt    STR  Wt, [Xn|SP, #imm12 * 4]
   4648       10 111 00101 imm12 nn tt    LDR  Wt, [Xn|SP, #imm12 * 4]
   4649 
   4650       01 111 00100 imm12 nn tt    STRH Wt, [Xn|SP, #imm12 * 2]
   4651       01 111 00101 imm12 nn tt    LDRH Wt, [Xn|SP, #imm12 * 2]
   4652 
   4653       00 111 00100 imm12 nn tt    STRB Wt, [Xn|SP, #imm12 * 1]
   4654       00 111 00101 imm12 nn tt    LDRB Wt, [Xn|SP, #imm12 * 1]
   4655    */
   4656    if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
   4657       UInt   szLg2 = INSN(31,30);
   4658       UInt   szB   = 1 << szLg2;
   4659       Bool   isLD  = INSN(22,22) == 1;
   4660       UInt   offs  = INSN(21,10) * szB;
   4661       UInt   nn    = INSN(9,5);
   4662       UInt   tt    = INSN(4,0);
   4663       IRTemp ta    = newTemp(Ity_I64);
   4664       assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
   4665       if (nn == 31) { /* FIXME generate stack alignment check */ }
   4666       vassert(szLg2 < 4);
   4667       if (isLD) {
   4668          putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
   4669       } else {
   4670          gen_narrowing_store(szB, ta, getIReg64orZR(tt));
   4671       }
   4672       const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
   4673       const HChar* st_name[4] = { "strb", "strh", "str", "str" };
   4674       DIP("%s %s, [%s, #%u]\n",
   4675           (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
   4676           nameIReg64orSP(nn), offs);
   4677       return True;
   4678    }
   4679 
   4680    /* ------------ LDUR,STUR (immediate, simm9) ----------- */
   4681    /*
   4682       31 29  26      20   11 9  4
   4683       |  |   |       |    |  |  |
   4684       (at-Rn-then-Rn=EA)  |  |  |
   4685       sz 111 00000 0 imm9 01 Rn Rt   STR Rt, [Xn|SP], #simm9
   4686       sz 111 00001 0 imm9 01 Rn Rt   LDR Rt, [Xn|SP], #simm9
   4687 
   4688       (at-EA-then-Rn=EA)
   4689       sz 111 00000 0 imm9 11 Rn Rt   STR Rt, [Xn|SP, #simm9]!
   4690       sz 111 00001 0 imm9 11 Rn Rt   LDR Rt, [Xn|SP, #simm9]!
   4691 
   4692       (at-EA)
   4693       sz 111 00000 0 imm9 00 Rn Rt   STR Rt, [Xn|SP, #simm9]
   4694       sz 111 00001 0 imm9 00 Rn Rt   LDR Rt, [Xn|SP, #simm9]
   4695 
   4696       simm9 is unscaled.
   4697 
   4698       The case 'wback && Rn == Rt && Rt != 31' is disallowed.  In the
   4699       load case this is because would create two competing values for
   4700       Rt.  In the store case the reason is unclear, but the spec
   4701       disallows it anyway.
   4702 
   4703       Stores are narrowing, loads are unsigned widening.  sz encodes
   4704       the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
   4705    */
   4706    if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
   4707        == BITS9(1,1,1, 0,0,0,0,0, 0)) {
   4708       UInt szLg2  = INSN(31,30);
   4709       UInt szB    = 1 << szLg2;
   4710       Bool isLoad = INSN(22,22) == 1;
   4711       UInt imm9   = INSN(20,12);
   4712       UInt nn     = INSN(9,5);
   4713       UInt tt     = INSN(4,0);
   4714       Bool wBack  = INSN(10,10) == 1;
   4715       UInt how    = INSN(11,10);
   4716       if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) {
   4717          /* undecodable; fall through */
   4718       } else {
   4719          if (nn == 31) { /* FIXME generate stack alignment check */ }
   4720 
   4721          // Compute the transfer address TA and the writeback address WA.
   4722          IRTemp tRN = newTemp(Ity_I64);
   4723          assign(tRN, getIReg64orSP(nn));
   4724          IRTemp tEA = newTemp(Ity_I64);
   4725          Long simm9 = (Long)sx_to_64(imm9, 9);
   4726          assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
   4727 
   4728          IRTemp tTA = newTemp(Ity_I64);
   4729          IRTemp tWA = newTemp(Ity_I64);
   4730          switch (how) {
   4731             case BITS2(0,1):
   4732                assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
   4733             case BITS2(1,1):
   4734                assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
   4735             case BITS2(0,0):
   4736                assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
   4737             default:
   4738                vassert(0); /* NOTREACHED */
   4739          }
   4740 
   4741          /* Normally rN would be updated after the transfer.  However, in
   4742             the special case typifed by
   4743                str x30, [sp,#-16]!
   4744             it is necessary to update SP before the transfer, (1)
   4745             because Memcheck will otherwise complain about a write
   4746             below the stack pointer, and (2) because the segfault
   4747             stack extension mechanism will otherwise extend the stack
   4748             only down to SP before the instruction, which might not be
   4749             far enough, if the -16 bit takes the actual access
   4750             address to the next page.
   4751          */
   4752          Bool earlyWBack
   4753            = wBack && simm9 < 0 && szB == 8
   4754              && how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn;
   4755 
   4756          if (wBack && earlyWBack)
   4757             putIReg64orSP(nn, mkexpr(tEA));
   4758 
   4759          if (isLoad) {
   4760             putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
   4761          } else {
   4762             gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
   4763          }
   4764 
   4765          if (wBack && !earlyWBack)
   4766             putIReg64orSP(nn, mkexpr(tEA));
   4767 
   4768          const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
   4769          const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
   4770          const HChar* fmt_str = NULL;
   4771          switch (how) {
   4772             case BITS2(0,1):
   4773                fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
   4774                break;
   4775             case BITS2(1,1):
   4776                fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
   4777                break;
   4778             case BITS2(0,0):
   4779                fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
   4780                break;
   4781             default:
   4782                vassert(0);
   4783          }
   4784          DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
   4785                       nameIRegOrZR(szB == 8, tt),
   4786                       nameIReg64orSP(nn), simm9);
   4787          return True;
   4788       }
   4789    }
   4790 
   4791    /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
   4792    /* L==1 => mm==LD
   4793       L==0 => mm==ST
   4794       x==0 => 32 bit transfers, and zero extended loads
   4795       x==1 => 64 bit transfers
   4796       simm7 is scaled by the (single-register) transfer size
   4797 
   4798       (at-Rn-then-Rn=EA)
   4799       x0 101 0001 L imm7 Rt2 Rn Rt1  mmP Rt1,Rt2, [Xn|SP], #imm
   4800 
   4801       (at-EA-then-Rn=EA)
   4802       x0 101 0011 L imm7 Rt2 Rn Rt1  mmP Rt1,Rt2, [Xn|SP, #imm]!
   4803 
   4804       (at-EA)
   4805       x0 101 0010 L imm7 Rt2 Rn Rt1  mmP Rt1,Rt2, [Xn|SP, #imm]
   4806    */
   4807 
   4808    UInt insn_30_23 = INSN(30,23);
   4809    if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
   4810        || insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
   4811        || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
   4812       UInt bL     = INSN(22,22);
   4813       UInt bX     = INSN(31,31);
   4814       UInt bWBack = INSN(23,23);
   4815       UInt rT1    = INSN(4,0);
   4816       UInt rN     = INSN(9,5);
   4817       UInt rT2    = INSN(14,10);
   4818       Long simm7  = (Long)sx_to_64(INSN(21,15), 7);
   4819       if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
   4820           || (bL && rT1 == rT2)) {
   4821          /* undecodable; fall through */
   4822       } else {
   4823          if (rN == 31) { /* FIXME generate stack alignment check */ }
   4824 
   4825          // Compute the transfer address TA and the writeback address WA.
   4826          IRTemp tRN = newTemp(Ity_I64);
   4827          assign(tRN, getIReg64orSP(rN));
   4828          IRTemp tEA = newTemp(Ity_I64);
   4829          simm7 = (bX ? 8 : 4) * simm7;
   4830          assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
   4831 
   4832          IRTemp tTA = newTemp(Ity_I64);
   4833          IRTemp tWA = newTemp(Ity_I64);
   4834          switch (INSN(24,23)) {
   4835             case BITS2(0,1):
   4836                assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
   4837             case BITS2(1,1):
   4838                assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
   4839             case BITS2(1,0):
   4840                assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
   4841             default:
   4842                vassert(0); /* NOTREACHED */
   4843          }
   4844 
   4845          /* Normally rN would be updated after the transfer.  However, in
   4846             the special case typifed by
   4847                stp x29, x30, [sp,#-112]!
   4848             it is necessary to update SP before the transfer, (1)
   4849             because Memcheck will otherwise complain about a write
   4850             below the stack pointer, and (2) because the segfault
   4851             stack extension mechanism will otherwise extend the stack
   4852             only down to SP before the instruction, which might not be
   4853             far enough, if the -112 bit takes the actual access
   4854             address to the next page.
   4855          */
   4856          Bool earlyWBack
   4857            = bWBack && simm7 < 0
   4858              && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
   4859 
   4860          if (bWBack && earlyWBack)
   4861             putIReg64orSP(rN, mkexpr(tEA));
   4862 
   4863          /**/ if (bL == 1 && bX == 1) {
   4864             // 64 bit load
   4865             putIReg64orZR(rT1, loadLE(Ity_I64,
   4866                                       binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
   4867             putIReg64orZR(rT2, loadLE(Ity_I64,
   4868                                       binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
   4869          } else if (bL == 1 && bX == 0) {
   4870             // 32 bit load
   4871             putIReg32orZR(rT1, loadLE(Ity_I32,
   4872                                       binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
   4873             putIReg32orZR(rT2, loadLE(Ity_I32,
   4874                                       binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
   4875          } else if (bL == 0 && bX == 1) {
   4876             // 64 bit store
   4877             storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
   4878                     getIReg64orZR(rT1));
   4879             storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
   4880                     getIReg64orZR(rT2));
   4881          } else {
   4882             vassert(bL == 0 && bX == 0);
   4883             // 32 bit store
   4884             storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
   4885                     getIReg32orZR(rT1));
   4886             storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
   4887                     getIReg32orZR(rT2));
   4888          }
   4889 
   4890          if (bWBack && !earlyWBack)
   4891             putIReg64orSP(rN, mkexpr(tEA));
   4892 
   4893          const HChar* fmt_str = NULL;
   4894          switch (INSN(24,23)) {
   4895             case BITS2(0,1):
   4896                fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
   4897                break;
   4898             case BITS2(1,1):
   4899                fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
   4900                break;
   4901             case BITS2(1,0):
   4902                fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
   4903                break;
   4904             default:
   4905                vassert(0);
   4906          }
   4907          DIP(fmt_str, bL == 0 ? "st" : "ld",
   4908                       nameIRegOrZR(bX == 1, rT1),
   4909                       nameIRegOrZR(bX == 1, rT2),
   4910                       nameIReg64orSP(rN), simm7);
   4911          return True;
   4912       }
   4913    }
   4914 
   4915    /* ---------------- LDR (literal, int reg) ---------------- */
   4916    /* 31 29      23    4
   4917       00 011 000 imm19 Rt   LDR   Wt, [PC + sxTo64(imm19 << 2)]
   4918       01 011 000 imm19 Rt   LDR   Xt, [PC + sxTo64(imm19 << 2)]
   4919       10 011 000 imm19 Rt   LDRSW Xt, [PC + sxTo64(imm19 << 2)]
   4920       11 011 000 imm19 Rt   prefetch  [PC + sxTo64(imm19 << 2)]
   4921       Just handles the first two cases for now.
   4922    */
   4923    if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
   4924       UInt  imm19 = INSN(23,5);
   4925       UInt  rT    = INSN(4,0);
   4926       UInt  bX    = INSN(30,30);
   4927       ULong ea    = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
   4928       if (bX) {
   4929          putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
   4930       } else {
   4931          putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
   4932       }
   4933       DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
   4934       return True;
   4935    }
   4936 
   4937    /* -------------- {LD,ST}R (integer register) --------------- */
   4938    /* 31 29        20 15     12 11 9  4
   4939       |  |         |  |      |  |  |  |
   4940       11 111000011 Rm option S  10 Rn Rt  LDR  Xt, [Xn|SP, R<m>{ext/sh}]
   4941       10 111000011 Rm option S  10 Rn Rt  LDR  Wt, [Xn|SP, R<m>{ext/sh}]
   4942       01 111000011 Rm option S  10 Rn Rt  LDRH Wt, [Xn|SP, R<m>{ext/sh}]
   4943       00 111000011 Rm option S  10 Rn Rt  LDRB Wt, [Xn|SP, R<m>{ext/sh}]
   4944 
   4945       11 111000001 Rm option S  10 Rn Rt  STR  Xt, [Xn|SP, R<m>{ext/sh}]
   4946       10 111000001 Rm option S  10 Rn Rt  STR  Wt, [Xn|SP, R<m>{ext/sh}]
   4947       01 111000001 Rm option S  10 Rn Rt  STRH Wt, [Xn|SP, R<m>{ext/sh}]
   4948       00 111000001 Rm option S  10 Rn Rt  STRB Wt, [Xn|SP, R<m>{ext/sh}]
   4949    */
   4950    if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
   4951        && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
   4952       HChar  dis_buf[64];
   4953       UInt   szLg2 = INSN(31,30);
   4954       Bool   isLD  = INSN(22,22) == 1;
   4955       UInt   tt    = INSN(4,0);
   4956       IRTemp ea    = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
   4957       if (ea != IRTemp_INVALID) {
   4958          switch (szLg2) {
   4959             case 3: /* 64 bit */
   4960                if (isLD) {
   4961                   putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
   4962                   DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
   4963                } else {
   4964                   storeLE(mkexpr(ea), getIReg64orZR(tt));
   4965                   DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
   4966                }
   4967                break;
   4968             case 2: /* 32 bit */
   4969                if (isLD) {
   4970                   putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
   4971                   DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
   4972                } else {
   4973                   storeLE(mkexpr(ea), getIReg32orZR(tt));
   4974                   DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
   4975                }
   4976                break;
   4977             case 1: /* 16 bit */
   4978                if (isLD) {
   4979                   putIReg64orZR(tt, unop(Iop_16Uto64,
   4980                                          loadLE(Ity_I16, mkexpr(ea))));
   4981                   DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
   4982                } else {
   4983                   storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
   4984                   DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
   4985                }
   4986                break;
   4987             case 0: /* 8 bit */
   4988                if (isLD) {
   4989                   putIReg64orZR(tt, unop(Iop_8Uto64,
   4990                                          loadLE(Ity_I8, mkexpr(ea))));
   4991                   DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
   4992                } else {
   4993                   storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
   4994                   DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
   4995                }
   4996                break;
   4997             default:
   4998                vassert(0);
   4999          }
   5000          return True;
   5001       }
   5002    }
   5003 
   5004    /* -------------- LDRS{B,H,W} (uimm12) -------------- */
   5005    /* 31 29  26  23 21    9 4
   5006       10 111 001 10 imm12 n t   LDRSW Xt, [Xn|SP, #pimm12 * 4]
   5007       01 111 001 1x imm12 n t   LDRSH Rt, [Xn|SP, #pimm12 * 2]
   5008       00 111 001 1x imm12 n t   LDRSB Rt, [Xn|SP, #pimm12 * 1]
   5009       where
   5010          Rt is Wt when x==1, Xt when x==0
   5011    */
   5012    if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
   5013       /* Further checks on bits 31:30 and 22 */
   5014       Bool valid = False;
   5015       switch ((INSN(31,30) << 1) | INSN(22,22)) {
   5016          case BITS3(1,0,0):
   5017          case BITS3(0,1,0): case BITS3(0,1,1):
   5018          case BITS3(0,0,0): case BITS3(0,0,1):
   5019             valid = True;
   5020             break;
   5021       }
   5022       if (valid) {
   5023          UInt    szLg2 = INSN(31,30);
   5024          UInt    bitX  = INSN(22,22);
   5025          UInt    imm12 = INSN(21,10);
   5026          UInt    nn    = INSN(9,5);
   5027          UInt    tt    = INSN(4,0);
   5028          UInt    szB   = 1 << szLg2;
   5029          IRExpr* ea    = binop(Iop_Add64,
   5030                                getIReg64orSP(nn), mkU64(imm12 * szB));
   5031          switch (szB) {
   5032             case 4:
   5033                vassert(bitX == 0);
   5034                putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
   5035                DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
   5036                    nameIReg64orSP(nn), imm12 * szB);
   5037                break;
   5038             case 2:
   5039                if (bitX == 1) {
   5040                   putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
   5041                } else {
   5042                   putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
   5043                }
   5044                DIP("ldrsh %s, [%s, #%u]\n",
   5045                    nameIRegOrZR(bitX == 0, tt),
   5046                    nameIReg64orSP(nn), imm12 * szB);
   5047                break;
   5048             case 1:
   5049                if (bitX == 1) {
   5050                   putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
   5051                } else {
   5052                   putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
   5053                }
   5054                DIP("ldrsb %s, [%s, #%u]\n",
   5055                    nameIRegOrZR(bitX == 0, tt),
   5056                    nameIReg64orSP(nn), imm12 * szB);
   5057                break;
   5058             default:
   5059                vassert(0);
   5060          }
   5061          return True;
   5062       }
   5063       /* else fall through */
   5064    }
   5065 
   5066    /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
   5067    /* (at-Rn-then-Rn=EA)
   5068       31 29      23 21 20   11 9 4
   5069       00 111 000 1x 0  imm9 01 n t  LDRSB Rt, [Xn|SP], #simm9
   5070       01 111 000 1x 0  imm9 01 n t  LDRSH Rt, [Xn|SP], #simm9
   5071       10 111 000 10 0  imm9 01 n t  LDRSW Xt, [Xn|SP], #simm9
   5072 
   5073       (at-EA-then-Rn=EA)
   5074       00 111 000 1x 0  imm9 11 n t  LDRSB Rt, [Xn|SP, #simm9]!
   5075       01 111 000 1x 0  imm9 11 n t  LDRSH Rt, [Xn|SP, #simm9]!
   5076       10 111 000 10 0  imm9 11 n t  LDRSW Xt, [Xn|SP, #simm9]!
   5077       where
   5078          Rt is Wt when x==1, Xt when x==0
   5079          transfer-at-Rn when [11]==0, at EA when [11]==1
   5080    */
   5081    if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
   5082        && INSN(21,21) == 0 && INSN(10,10) == 1) {
   5083       /* Further checks on bits 31:30 and 22 */
   5084       Bool valid = False;
   5085       switch ((INSN(31,30) << 1) | INSN(22,22)) {
   5086          case BITS3(1,0,0):                    // LDRSW Xt
   5087          case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
   5088          case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
   5089             valid = True;
   5090             break;
   5091       }
   5092       if (valid) {
   5093          UInt   szLg2 = INSN(31,30);
   5094          UInt   imm9  = INSN(20,12);
   5095          Bool   atRN  = INSN(11,11) == 0;
   5096          UInt   nn    = INSN(9,5);
   5097          UInt   tt    = INSN(4,0);
   5098          IRTemp tRN   = newTemp(Ity_I64);
   5099          IRTemp tEA   = newTemp(Ity_I64);
   5100          IRTemp tTA   = IRTemp_INVALID;
   5101          ULong  simm9 = sx_to_64(imm9, 9);
   5102          Bool   is64  = INSN(22,22) == 0;
   5103          assign(tRN, getIReg64orSP(nn));
   5104          assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
   5105          tTA = atRN ? tRN : tEA;
   5106          HChar ch = '?';
   5107          /* There are 5 cases:
   5108                byte     load,           SX to 64
   5109                byte     load, SX to 32, ZX to 64
   5110                halfword load,           SX to 64
   5111                halfword load, SX to 32, ZX to 64
   5112                word     load,           SX to 64
   5113             The ifs below handle them in the listed order.
   5114          */
   5115          if (szLg2 == 0) {
   5116             ch = 'b';
   5117             if (is64) {
   5118                putIReg64orZR(tt, unop(Iop_8Sto64,
   5119                                       loadLE(Ity_I8, mkexpr(tTA))));
   5120             } else {
   5121                putIReg32orZR(tt, unop(Iop_8Sto32,
   5122                                       loadLE(Ity_I8, mkexpr(tTA))));
   5123             }
   5124          }
   5125          else if (szLg2 == 1) {
   5126             ch = 'h';
   5127             if (is64) {
   5128                putIReg64orZR(tt, unop(Iop_16Sto64,
   5129                                       loadLE(Ity_I16, mkexpr(tTA))));
   5130             } else {
   5131                putIReg32orZR(tt, unop(Iop_16Sto32,
   5132                                       loadLE(Ity_I16, mkexpr(tTA))));
   5133             }
   5134          }
   5135          else if (szLg2 == 2 && is64) {
   5136             ch = 'w';
   5137             putIReg64orZR(tt, unop(Iop_32Sto64,
   5138                                    loadLE(Ity_I32, mkexpr(tTA))));
   5139          }
   5140          else {
   5141             vassert(0);
   5142          }
   5143          putIReg64orSP(nn, mkexpr(tEA));
   5144          DIP(atRN ? "ldrs%c %s, [%s], #%lld\n" : "ldrs%c %s, [%s, #%lld]!",
   5145              ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
   5146          return True;
   5147       }
   5148       /* else fall through */
   5149    }
   5150 
   5151    /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
   5152    /* 31 29      23 21 20   11 9 4
   5153       00 111 000 1x 0  imm9 00 n t  LDURSB Rt, [Xn|SP, #simm9]
   5154       01 111 000 1x 0  imm9 00 n t  LDURSH Rt, [Xn|SP, #simm9]
   5155       10 111 000 10 0  imm9 00 n t  LDURSW Xt, [Xn|SP, #simm9]
   5156       where
   5157          Rt is Wt when x==1, Xt when x==0
   5158    */
   5159    if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
   5160        && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
   5161       /* Further checks on bits 31:30 and 22 */
   5162       Bool valid = False;
   5163       switch ((INSN(31,30) << 1) | INSN(22,22)) {
   5164          case BITS3(1,0,0):                    // LDURSW Xt
   5165          case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
   5166          case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
   5167             valid = True;
   5168             break;
   5169       }
   5170       if (valid) {
   5171          UInt   szLg2 = INSN(31,30);
   5172          UInt   imm9  = INSN(20,12);
   5173          UInt   nn    = INSN(9,5);
   5174          UInt   tt    = INSN(4,0);
   5175          IRTemp tRN   = newTemp(Ity_I64);
   5176          IRTemp tEA   = newTemp(Ity_I64);
   5177          ULong  simm9 = sx_to_64(imm9, 9);
   5178          Bool   is64  = INSN(22,22) == 0;
   5179          assign(tRN, getIReg64orSP(nn));
   5180          assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
   5181          HChar ch = '?';
   5182          /* There are 5 cases:
   5183                byte     load,           SX to 64
   5184                byte     load, SX to 32, ZX to 64
   5185                halfword load,           SX to 64
   5186                halfword load, SX to 32, ZX to 64
   5187                word     load,           SX to 64
   5188             The ifs below handle them in the listed order.
   5189          */
   5190          if (szLg2 == 0) {
   5191             ch = 'b';
   5192             if (is64) {
   5193                putIReg64orZR(tt, unop(Iop_8Sto64,
   5194                                       loadLE(Ity_I8, mkexpr(tEA))));
   5195             } else {
   5196                putIReg32orZR(tt, unop(Iop_8Sto32,
   5197                                       loadLE(Ity_I8, mkexpr(tEA))));
   5198             }
   5199          }
   5200          else if (szLg2 == 1) {
   5201             ch = 'h';
   5202             if (is64) {
   5203                putIReg64orZR(tt, unop(Iop_16Sto64,
   5204                                       loadLE(Ity_I16, mkexpr(tEA))));
   5205             } else {
   5206                putIReg32orZR(tt, unop(Iop_16Sto32,
   5207                                       loadLE(Ity_I16, mkexpr(tEA))));
   5208             }
   5209          }
   5210          else if (szLg2 == 2 && is64) {
   5211             ch = 'w';
   5212             putIReg64orZR(tt, unop(Iop_32Sto64,
   5213                                    loadLE(Ity_I32, mkexpr(tEA))));
   5214          }
   5215          else {
   5216             vassert(0);
   5217          }
   5218          DIP("ldurs%c %s, [%s, #%lld]",
   5219              ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
   5220          return True;
   5221       }
   5222       /* else fall through */
   5223    }
   5224 
   5225    /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
   5226    /* L==1    => mm==LD
   5227       L==0    => mm==ST
   5228       sz==00  => 32 bit (S) transfers
   5229       sz==01  => 64 bit (D) transfers
   5230       sz==10  => 128 bit (Q) transfers
   5231       sz==11  isn't allowed
   5232       simm7 is scaled by the (single-register) transfer size
   5233 
   5234       31 29  26   22 21   14 9 4
   5235 
   5236       sz 101 1000 L  imm7 t2 n t1   mmNP SDQt1, SDQt2, [Xn|SP, #imm]
   5237                                     (at-EA, with nontemporal hint)
   5238 
   5239       sz 101 1001 L  imm7 t2 n t1   mmP SDQt1, SDQt2, [Xn|SP], #imm
   5240                                     (at-Rn-then-Rn=EA)
   5241 
   5242       sz 101 1010 L  imm7 t2 n t1   mmP SDQt1, SDQt2, [Xn|SP, #imm]
   5243                                     (at-EA)
   5244 
   5245       sz 101 1011 L  imm7 t2 n t1   mmP SDQt1, SDQt2, [Xn|SP, #imm]!
   5246                                     (at-EA-then-Rn=EA)
   5247    */
   5248    if (INSN(29,25) == BITS5(1,0,1,1,0)) {
   5249       UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
   5250       Bool isLD   = INSN(22,22) == 1;
   5251       Bool wBack  = INSN(23,23) == 1;
   5252       Long simm7  = (Long)sx_to_64(INSN(21,15), 7);
   5253       UInt tt2    = INSN(14,10);
   5254       UInt nn     = INSN(9,5);
   5255       UInt tt1    = INSN(4,0);
   5256       if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) {
   5257          /* undecodable; fall through */
   5258       } else {
   5259          if (nn == 31) { /* FIXME generate stack alignment check */ }
   5260 
   5261          // Compute the transfer address TA and the writeback address WA.
   5262          UInt   szB = 4 << szSlg2; /* szB is the per-register size */
   5263          IRTemp tRN = newTemp(Ity_I64);
   5264          assign(tRN, getIReg64orSP(nn));
   5265          IRTemp tEA = newTemp(Ity_I64);
   5266          simm7 = szB * simm7;
   5267          assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
   5268 
   5269          IRTemp tTA = newTemp(Ity_I64);
   5270          IRTemp tWA = newTemp(Ity_I64);
   5271          switch (INSN(24,23)) {
   5272             case BITS2(0,1):
   5273                assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
   5274             case BITS2(1,1):
   5275                assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
   5276             case BITS2(1,0):
   5277             case BITS2(0,0):
   5278                assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
   5279             default:
   5280                vassert(0); /* NOTREACHED */
   5281          }
   5282 
   5283          IRType ty = Ity_INVALID;
   5284          switch (szB) {
   5285             case 4:  ty = Ity_F32;  break;
   5286             case 8:  ty = Ity_F64;  break;
   5287             case 16: ty = Ity_V128; break;
   5288             default: vassert(0);
   5289          }
   5290 
   5291          /* Normally rN would be updated after the transfer.  However, in
   5292             the special cases typifed by
   5293                stp q0, q1, [sp,#-512]!
   5294                stp d0, d1, [sp,#-512]!
   5295                stp s0, s1, [sp,#-512]!
   5296             it is necessary to update SP before the transfer, (1)
   5297             because Memcheck will otherwise complain about a write
   5298             below the stack pointer, and (2) because the segfault
   5299             stack extension mechanism will otherwise extend the stack
   5300             only down to SP before the instruction, which might not be
   5301             far enough, if the -512 bit takes the actual access
   5302             address to the next page.
   5303          */
   5304          Bool earlyWBack
   5305            = wBack && simm7 < 0
   5306              && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
   5307 
   5308          if (wBack && earlyWBack)
   5309             putIReg64orSP(nn, mkexpr(tEA));
   5310 
   5311          if (isLD) {
   5312             if (szB < 16) {
   5313                putQReg128(tt1, mkV128(0x0000));
   5314             }
   5315             putQRegLO(tt1,
   5316                       loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
   5317             if (szB < 16) {
   5318                putQReg128(tt2, mkV128(0x0000));
   5319             }
   5320             putQRegLO(tt2,
   5321                       loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
   5322          } else {
   5323             storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
   5324                     getQRegLO(tt1, ty));
   5325             storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
   5326                     getQRegLO(tt2, ty));
   5327          }
   5328 
   5329          if (wBack && !earlyWBack)
   5330             putIReg64orSP(nn, mkexpr(tEA));
   5331 
   5332          const HChar* fmt_str = NULL;
   5333          switch (INSN(24,23)) {
   5334             case BITS2(0,1):
   5335                fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
   5336                break;
   5337             case BITS2(1,1):
   5338                fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
   5339                break;
   5340             case BITS2(1,0):
   5341                fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
   5342                break;
   5343             case BITS2(0,0):
   5344                fmt_str = "%snp %s, %s, [%s, #%lld] (at-Rn)\n";
   5345                break;
   5346             default:
   5347                vassert(0);
   5348          }
   5349          DIP(fmt_str, isLD ? "ld" : "st",
   5350                       nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
   5351                       nameIReg64orSP(nn), simm7);
   5352          return True;
   5353       }
   5354    }
   5355 
   5356    /* -------------- {LD,ST}R (vector register) --------------- */
   5357    /* 31 29     23  20 15     12 11 9  4
   5358       |  |      |   |  |      |  |  |  |
   5359       00 111100 011 Rm option S  10 Rn Rt  LDR Bt, [Xn|SP, R<m>{ext/sh}]
   5360       01 111100 011 Rm option S  10 Rn Rt  LDR Ht, [Xn|SP, R<m>{ext/sh}]
   5361       10 111100 011 Rm option S  10 Rn Rt  LDR St, [Xn|SP, R<m>{ext/sh}]
   5362       11 111100 011 Rm option S  10 Rn Rt  LDR Dt, [Xn|SP, R<m>{ext/sh}]
   5363       00 111100 111 Rm option S  10 Rn Rt  LDR Qt, [Xn|SP, R<m>{ext/sh}]
   5364 
   5365       00 111100 001 Rm option S  10 Rn Rt  STR Bt, [Xn|SP, R<m>{ext/sh}]
   5366       01 111100 001 Rm option S  10 Rn Rt  STR Ht, [Xn|SP, R<m>{ext/sh}]
   5367       10 111100 001 Rm option S  10 Rn Rt  STR St, [Xn|SP, R<m>{ext/sh}]
   5368       11 111100 001 Rm option S  10 Rn Rt  STR Dt, [Xn|SP, R<m>{ext/sh}]
   5369       00 111100 101 Rm option S  10 Rn Rt  STR Qt, [Xn|SP, R<m>{ext/sh}]
   5370    */
   5371    if (INSN(29,24) == BITS6(1,1,1,1,0,0)
   5372        && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
   5373       HChar  dis_buf[64];
   5374       UInt   szLg2 = (INSN(23,23) << 2) | INSN(31,30);
   5375       Bool   isLD  = INSN(22,22) == 1;
   5376       UInt   tt    = INSN(4,0);
   5377       if (szLg2 > 4) goto after_LDR_STR_vector_register;
   5378       IRTemp ea    = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/);
   5379       if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
   5380       switch (szLg2) {
   5381          case 0: /* 8 bit */
   5382             if (isLD) {
   5383                putQReg128(tt, mkV128(0x0000));
   5384                putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
   5385                DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
   5386             } else {
   5387                storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
   5388                DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
   5389             }
   5390             break;
   5391          case 1:
   5392             if (isLD) {
   5393                putQReg128(tt, mkV128(0x0000));
   5394                putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
   5395                DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
   5396             } else {
   5397                storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
   5398                DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
   5399             }
   5400             break;
   5401          case 2: /* 32 bit */
   5402             if (isLD) {
   5403                putQReg128(tt, mkV128(0x0000));
   5404                putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
   5405                DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
   5406             } else {
   5407                storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
   5408                DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
   5409             }
   5410             break;
   5411          case 3: /* 64 bit */
   5412             if (isLD) {
   5413                putQReg128(tt, mkV128(0x0000));
   5414                putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
   5415                DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
   5416             } else {
   5417                storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
   5418                DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
   5419             }
   5420             break;
   5421          case 4:
   5422             if (isLD) {
   5423                putQReg128(tt, loadLE(Ity_V128, mkexpr(ea)));
   5424                DIP("ldr %s, %s\n", nameQReg128(tt), dis_buf);
   5425             } else {
   5426                storeLE(mkexpr(ea), getQReg128(tt));
   5427                DIP("str %s, %s\n", nameQReg128(tt), dis_buf);
   5428             }
   5429             break;
   5430          default:
   5431             vassert(0);
   5432       }
   5433       return True;
   5434    }
   5435   after_LDR_STR_vector_register:
   5436 
   5437    /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
   5438    /* 31 29      22 20 15  12 11 9  4
   5439       |  |       |  |  |   |  |  |  |
   5440       10 1110001 01 Rm opt S 10 Rn Rt    LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
   5441 
   5442       01 1110001 01 Rm opt S 10 Rn Rt    LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
   5443       01 1110001 11 Rm opt S 10 Rn Rt    LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
   5444 
   5445       00 1110001 01 Rm opt S 10 Rn Rt    LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
   5446       00 1110001 11 Rm opt S 10 Rn Rt    LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
   5447    */
   5448    if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
   5449        && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
   5450       HChar  dis_buf[64];
   5451       UInt   szLg2  = INSN(31,30);
   5452       Bool   sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
   5453       UInt   tt     = INSN(4,0);
   5454       if (szLg2 == 3) goto after_LDRS_integer_register;
   5455       IRTemp ea     = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
   5456       if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
   5457       /* Enumerate the 5 variants explicitly. */
   5458       if (szLg2 == 2/*32 bit*/ && sxTo64) {
   5459          putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
   5460          DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
   5461          return True;
   5462       }
   5463       else
   5464       if (szLg2 == 1/*16 bit*/) {
   5465          if (sxTo64) {
   5466             putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
   5467             DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
   5468          } else {
   5469             putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
   5470             DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
   5471          }
   5472          return True;
   5473       }
   5474       else
   5475       if (szLg2 == 0/*8 bit*/) {
   5476          if (sxTo64) {
   5477             putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
   5478             DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
   5479          } else {
   5480             putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
   5481             DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
   5482          }
   5483          return True;
   5484       }
   5485       /* else it's an invalid combination */
   5486    }
   5487   after_LDRS_integer_register:
   5488 
   5489    /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
   5490    /* This is the Unsigned offset variant only.  The Post-Index and
   5491       Pre-Index variants are below.
   5492 
   5493       31 29      23 21    9 4
   5494       00 111 101 01 imm12 n t   LDR Bt, [Xn|SP + imm12 * 1]
   5495       01 111 101 01 imm12 n t   LDR Ht, [Xn|SP + imm12 * 2]
   5496       10 111 101 01 imm12 n t   LDR St, [Xn|SP + imm12 * 4]
   5497       11 111 101 01 imm12 n t   LDR Dt, [Xn|SP + imm12 * 8]
   5498       00 111 101 11 imm12 n t   LDR Qt, [Xn|SP + imm12 * 16]
   5499 
   5500       00 111 101 00 imm12 n t   STR Bt, [Xn|SP + imm12 * 1]
   5501       01 111 101 00 imm12 n t   STR Ht, [Xn|SP + imm12 * 2]
   5502       10 111 101 00 imm12 n t   STR St, [Xn|SP + imm12 * 4]
   5503       11 111 101 00 imm12 n t   STR Dt, [Xn|SP + imm12 * 8]
   5504       00 111 101 10 imm12 n t   STR Qt, [Xn|SP + imm12 * 16]
   5505    */
   5506    if (INSN(29,24) == BITS6(1,1,1,1,0,1)
   5507        && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
   5508       UInt   szLg2  = (INSN(23,23) << 2) | INSN(31,30);
   5509       Bool   isLD   = INSN(22,22) == 1;
   5510       UInt   pimm12 = INSN(21,10) << szLg2;
   5511       UInt   nn     = INSN(9,5);
   5512       UInt   tt     = INSN(4,0);
   5513       IRTemp tEA    = newTemp(Ity_I64);
   5514       IRType ty     = preferredVectorSubTypeFromSize(1 << szLg2);
   5515       assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
   5516       if (isLD) {
   5517          if (szLg2 < 4) {
   5518             putQReg128(tt, mkV128(0x0000));
   5519          }
   5520          putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
   5521       } else {
   5522          storeLE(mkexpr(tEA), getQRegLO(tt, ty));
   5523       }
   5524       DIP("%s %s, [%s, #%u]\n",
   5525           isLD ? "ldr" : "str",
   5526           nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
   5527       return True;
   5528    }
   5529 
   5530    /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
   5531    /* These are the Post-Index and Pre-Index variants.
   5532 
   5533       31 29      23   20   11 9 4
   5534       (at-Rn-then-Rn=EA)
   5535       00 111 100 01 0 imm9 01 n t   LDR Bt, [Xn|SP], #simm
   5536       01 111 100 01 0 imm9 01 n t   LDR Ht, [Xn|SP], #simm
   5537       10 111 100 01 0 imm9 01 n t   LDR St, [Xn|SP], #simm
   5538       11 111 100 01 0 imm9 01 n t   LDR Dt, [Xn|SP], #simm
   5539       00 111 100 11 0 imm9 01 n t   LDR Qt, [Xn|SP], #simm
   5540 
   5541       (at-EA-then-Rn=EA)
   5542       00 111 100 01 0 imm9 11 n t   LDR Bt, [Xn|SP, #simm]!
   5543       01 111 100 01 0 imm9 11 n t   LDR Ht, [Xn|SP, #simm]!
   5544       10 111 100 01 0 imm9 11 n t   LDR St, [Xn|SP, #simm]!
   5545       11 111 100 01 0 imm9 11 n t   LDR Dt, [Xn|SP, #simm]!
   5546       00 111 100 11 0 imm9 11 n t   LDR Qt, [Xn|SP, #simm]!
   5547 
   5548       Stores are the same except with bit 22 set to 0.
   5549    */
   5550    if (INSN(29,24) == BITS6(1,1,1,1,0,0)
   5551        && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
   5552        && INSN(21,21) == 0 && INSN(10,10) == 1) {
   5553       UInt   szLg2  = (INSN(23,23) << 2) | INSN(31,30);
   5554       Bool   isLD   = INSN(22,22) == 1;
   5555       UInt   imm9   = INSN(20,12);
   5556       Bool   atRN   = INSN(11,11) == 0;
   5557       UInt   nn     = INSN(9,5);
   5558       UInt   tt     = INSN(4,0);
   5559       IRTemp tRN    = newTemp(Ity_I64);
   5560       IRTemp tEA    = newTemp(Ity_I64);
   5561       IRTemp tTA    = IRTemp_INVALID;
   5562       IRType ty     = preferredVectorSubTypeFromSize(1 << szLg2);
   5563       ULong  simm9  = sx_to_64(imm9, 9);
   5564       assign(tRN, getIReg64orSP(nn));
   5565       assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
   5566       tTA = atRN ? tRN : tEA;
   5567       if (isLD) {
   5568          if (szLg2 < 4) {
   5569             putQReg128(tt, mkV128(0x0000));
   5570          }
   5571          putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
   5572       } else {
   5573          storeLE(mkexpr(tTA), getQRegLO(tt, ty));
   5574       }
   5575       putIReg64orSP(nn, mkexpr(tEA));
   5576       DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
   5577           isLD ? "ldr" : "str",
   5578           nameQRegLO(tt, ty), nameIReg64orSP(nn), simm9);
   5579       return True;
   5580    }
   5581 
   5582    /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
   5583    /* 31 29      23   20   11 9 4
   5584       00 111 100 01 0 imm9 00 n t   LDR Bt, [Xn|SP, #simm]
   5585       01 111 100 01 0 imm9 00 n t   LDR Ht, [Xn|SP, #simm]
   5586       10 111 100 01 0 imm9 00 n t   LDR St, [Xn|SP, #simm]
   5587       11 111 100 01 0 imm9 00 n t   LDR Dt, [Xn|SP, #simm]
   5588       00 111 100 11 0 imm9 00 n t   LDR Qt, [Xn|SP, #simm]
   5589 
   5590       00 111 100 00 0 imm9 00 n t   STR Bt, [Xn|SP, #simm]
   5591       01 111 100 00 0 imm9 00 n t   STR Ht, [Xn|SP, #simm]
   5592       10 111 100 00 0 imm9 00 n t   STR St, [Xn|SP, #simm]
   5593       11 111 100 00 0 imm9 00 n t   STR Dt, [Xn|SP, #simm]
   5594       00 111 100 10 0 imm9 00 n t   STR Qt, [Xn|SP, #simm]
   5595    */
   5596    if (INSN(29,24) == BITS6(1,1,1,1,0,0)
   5597        && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
   5598        && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
   5599       UInt   szLg2  = (INSN(23,23) << 2) | INSN(31,30);
   5600       Bool   isLD   = INSN(22,22) == 1;
   5601       UInt   imm9   = INSN(20,12);
   5602       UInt   nn     = INSN(9,5);
   5603       UInt   tt     = INSN(4,0);
   5604       ULong  simm9  = sx_to_64(imm9, 9);
   5605       IRTemp tEA    = newTemp(Ity_I64);
   5606       IRType ty     = preferredVectorSubTypeFromSize(1 << szLg2);
   5607       assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
   5608       if (isLD) {
   5609          if (szLg2 < 4) {
   5610             putQReg128(tt, mkV128(0x0000));
   5611          }
   5612          putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
   5613       } else {
   5614          storeLE(mkexpr(tEA), getQRegLO(tt, ty));
   5615       }
   5616       DIP("%s %s, [%s, #%lld]\n",
   5617           isLD ? "ldur" : "stur",
   5618           nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
   5619       return True;
   5620    }
   5621 
   5622    /* ---------------- LDR (literal, SIMD&FP) ---------------- */
   5623    /* 31 29      23    4
   5624       00 011 100 imm19 t    LDR St, [PC + sxTo64(imm19 << 2)]
   5625       01 011 100 imm19 t    LDR Dt, [PC + sxTo64(imm19 << 2)]
   5626       10 011 100 imm19 t    LDR Qt, [PC + sxTo64(imm19 << 2)]
   5627    */
   5628    if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
   5629       UInt   szB   = 4 << INSN(31,30);
   5630       UInt   imm19 = INSN(23,5);
   5631       UInt   tt    = INSN(4,0);
   5632       ULong  ea    = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
   5633       IRType ty    = preferredVectorSubTypeFromSize(szB);
   5634       putQReg128(tt, mkV128(0x0000));
   5635       putQRegLO(tt, loadLE(ty, mkU64(ea)));
   5636       DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
   5637       return True;
   5638    }
   5639 
   5640    /* ------ LD1/ST1 (multiple 1-elem structs to/from 1 reg  ------ */
   5641    /* ------ LD2/ST2 (multiple 2-elem structs to/from 2 regs ------ */
   5642    /* ------ LD3/ST3 (multiple 3-elem structs to/from 3 regs ------ */
   5643    /* ------ LD4/ST4 (multiple 4-elem structs to/from 4 regs ------ */
   5644    /* 31 29  26   22 21 20    15   11 9 4
   5645 
   5646       0q 001 1000 L  0  00000 0000 sz n t  xx4 {Vt..t+3.T}, [Xn|SP]
   5647       0q 001 1001 L  0  m     0000 sz n t  xx4 {Vt..t+3.T}, [Xn|SP], step
   5648 
   5649       0q 001 1000 L  0  00000 0100 sz n t  xx3 {Vt..t+2.T}, [Xn|SP]
   5650       0q 001 1001 L  0  m     0100 sz n t  xx3 {Vt..t+2.T}, [Xn|SP], step
   5651 
   5652       0q 001 1000 L  0  00000 1000 sz n t  xx2 {Vt..t+1.T}, [Xn|SP]
   5653       0q 001 1001 L  0  m     1000 sz n t  xx2 {Vt..t+1.T}, [Xn|SP], step
   5654 
   5655       0q 001 1000 L  0  00000 0111 sz n t  xx1 {Vt.T},      [Xn|SP]
   5656       0q 001 1001 L  0  m     0111 sz n t  xx1 {Vt.T},      [Xn|SP], step
   5657 
   5658       T    = defined by Q and sz in the normal way
   5659       step = if m == 11111 then transfer-size else Xm
   5660       xx   = case L of 1 -> LD ; 0 -> ST
   5661    */
   5662    if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
   5663        && INSN(21,21) == 0) {
   5664       Bool bitQ  = INSN(30,30);
   5665       Bool isPX  = INSN(23,23) == 1;
   5666       Bool isLD  = INSN(22,22) == 1;
   5667       UInt mm    = INSN(20,16);
   5668       UInt opc   = INSN(15,12);
   5669       UInt sz    = INSN(11,10);
   5670       UInt nn    = INSN(9,5);
   5671       UInt tt    = INSN(4,0);
   5672       Bool isQ   = bitQ == 1;
   5673       Bool is1d  = sz == BITS2(1,1) && !isQ;
   5674       UInt nRegs = 0;
   5675       switch (opc) {
   5676          case BITS4(0,0,0,0): nRegs = 4; break;
   5677          case BITS4(0,1,0,0): nRegs = 3; break;
   5678          case BITS4(1,0,0,0): nRegs = 2; break;
   5679          case BITS4(0,1,1,1): nRegs = 1; break;
   5680          default: break;
   5681       }
   5682 
   5683       /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
   5684          If we see it, set nRegs to 0 so as to cause the next conditional
   5685          to fail. */
   5686       if (!isPX && mm != 0)
   5687          nRegs = 0;
   5688 
   5689       if (nRegs == 1                             /* .1d is allowed */
   5690           || (nRegs >= 2 && nRegs <= 4 && !is1d) /* .1d is not allowed */) {
   5691 
   5692          UInt xferSzB = (isQ ? 16 : 8) * nRegs;
   5693 
   5694          /* Generate the transfer address (TA) and if necessary the
   5695             writeback address (WB) */
   5696          IRTemp tTA = newTemp(Ity_I64);
   5697          assign(tTA, getIReg64orSP(nn));
   5698          if (nn == 31) { /* FIXME generate stack alignment check */ }
   5699          IRTemp tWB = IRTemp_INVALID;
   5700          if (isPX) {
   5701             tWB = newTemp(Ity_I64);
   5702             assign(tWB, binop(Iop_Add64,
   5703                               mkexpr(tTA),
   5704                               mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
   5705                                                      : getIReg64orZR(mm)));
   5706          }
   5707 
   5708          /* -- BEGIN generate the transfers -- */
   5709 
   5710          IRTemp u0, u1, u2, u3, i0, i1, i2, i3;
   5711          u0 = u1 = u2 = u3 = i0 = i1 = i2 = i3 = IRTemp_INVALID;
   5712          switch (nRegs) {
   5713             case 4: u3 = newTempV128(); i3 = newTempV128(); /* fallthru */
   5714             case 3: u2 = newTempV128(); i2 = newTempV128(); /* fallthru */
   5715             case 2: u1 = newTempV128(); i1 = newTempV128(); /* fallthru */
   5716             case 1: u0 = newTempV128(); i0 = newTempV128(); break;
   5717             default: vassert(0);
   5718          }
   5719 
   5720          /* -- Multiple 128 or 64 bit stores -- */
   5721          if (!isLD) {
   5722             switch (nRegs) {
   5723                case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
   5724                case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
   5725                case 2: assign(u1, getQReg128((tt+1) % 32)); /* fallthru */
   5726                case 1: assign(u0, getQReg128((tt+0) % 32)); break;
   5727                default: vassert(0);
   5728             }
   5729             switch (nRegs) {
   5730                case 4:  (isQ ? math_INTERLEAVE4_128 : math_INTERLEAVE4_64)
   5731                            (&i0, &i1, &i2, &i3, sz, u0, u1, u2, u3);
   5732                         break;
   5733                case 3:  (isQ ? math_INTERLEAVE3_128 : math_INTERLEAVE3_64)
   5734                            (&i0, &i1, &i2, sz, u0, u1, u2);
   5735                         break;
   5736                case 2:  (isQ ? math_INTERLEAVE2_128 : math_INTERLEAVE2_64)
   5737                            (&i0, &i1, sz, u0, u1);
   5738                         break;
   5739                case 1:  (isQ ? math_INTERLEAVE1_128 : math_INTERLEAVE1_64)
   5740                            (&i0, sz, u0);
   5741                         break;
   5742                default: vassert(0);
   5743             }
   5744 #           define MAYBE_NARROW_TO_64(_expr) \
   5745                       (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
   5746             UInt step = isQ ? 16 : 8;
   5747             switch (nRegs) {
   5748                case 4:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
   5749                                  MAYBE_NARROW_TO_64(mkexpr(i3)) );
   5750                         /* fallthru */
   5751                case 3:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
   5752                                  MAYBE_NARROW_TO_64(mkexpr(i2)) );
   5753                         /* fallthru */
   5754                case 2:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
   5755                                  MAYBE_NARROW_TO_64(mkexpr(i1)) );
   5756                         /* fallthru */
   5757                case 1:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
   5758                                  MAYBE_NARROW_TO_64(mkexpr(i0)) );
   5759                         break;
   5760                default: vassert(0);
   5761             }
   5762 #           undef MAYBE_NARROW_TO_64
   5763          }
   5764 
   5765          /* -- Multiple 128 or 64 bit loads -- */
   5766          else /* isLD */ {
   5767             UInt   step   = isQ ? 16 : 8;
   5768             IRType loadTy = isQ ? Ity_V128 : Ity_I64;
   5769 #           define MAYBE_WIDEN_FROM_64(_expr) \
   5770                       (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
   5771             switch (nRegs) {
   5772                case 4:
   5773                   assign(i3, MAYBE_WIDEN_FROM_64(
   5774                                 loadLE(loadTy,
   5775                                        binop(Iop_Add64, mkexpr(tTA),
   5776                                                         mkU64(3 * step)))));
   5777                   /* fallthru */
   5778                case 3:
   5779                   assign(i2, MAYBE_WIDEN_FROM_64(
   5780                                 loadLE(loadTy,
   5781                                        binop(Iop_Add64, mkexpr(tTA),
   5782                                                         mkU64(2 * step)))));
   5783                   /* fallthru */
   5784                case 2:
   5785                   assign(i1, MAYBE_WIDEN_FROM_64(
   5786                                 loadLE(loadTy,
   5787                                        binop(Iop_Add64, mkexpr(tTA),
   5788                                                         mkU64(1 * step)))));
   5789                   /* fallthru */
   5790                case 1:
   5791                   assign(i0, MAYBE_WIDEN_FROM_64(
   5792                                 loadLE(loadTy,
   5793                                        binop(Iop_Add64, mkexpr(tTA),
   5794                                                         mkU64(0 * step)))));
   5795                   break;
   5796                default:
   5797                   vassert(0);
   5798             }
   5799 #           undef MAYBE_WIDEN_FROM_64
   5800             switch (nRegs) {
   5801                case 4:  (isQ ? math_DEINTERLEAVE4_128 : math_DEINTERLEAVE4_64)
   5802                            (&u0, &u1, &u2, &u3, sz, i0,i1,i2,i3);
   5803                         break;
   5804                case 3:  (isQ ? math_DEINTERLEAVE3_128 : math_DEINTERLEAVE3_64)
   5805                            (&u0, &u1, &u2, sz, i0, i1, i2);
   5806                         break;
   5807                case 2:  (isQ ? math_DEINTERLEAVE2_128 : math_DEINTERLEAVE2_64)
   5808                            (&u0, &u1, sz, i0, i1);
   5809                         break;
   5810                case 1:  (isQ ? math_DEINTERLEAVE1_128 : math_DEINTERLEAVE1_64)
   5811                            (&u0, sz, i0);
   5812                         break;
   5813                default: vassert(0);
   5814             }
   5815             switch (nRegs) {
   5816                case 4:  putQReg128( (tt+3) % 32,
   5817                                     math_MAYBE_ZERO_HI64(bitQ, u3));
   5818                         /* fallthru */
   5819                case 3:  putQReg128( (tt+2) % 32,
   5820                                     math_MAYBE_ZERO_HI64(bitQ, u2));
   5821                         /* fallthru */
   5822                case 2:  putQReg128( (tt+1) % 32,
   5823                                     math_MAYBE_ZERO_HI64(bitQ, u1));
   5824                         /* fallthru */
   5825                case 1:  putQReg128( (tt+0) % 32,
   5826                                     math_MAYBE_ZERO_HI64(bitQ, u0));
   5827                         break;
   5828                default: vassert(0);
   5829             }
   5830          }
   5831 
   5832          /* -- END generate the transfers -- */
   5833 
   5834          /* Do the writeback, if necessary */
   5835          if (isPX) {
   5836             putIReg64orSP(nn, mkexpr(tWB));
   5837          }
   5838 
   5839          HChar pxStr[20];
   5840          pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
   5841          if (isPX) {
   5842             if (mm == BITS5(1,1,1,1,1))
   5843                vex_sprintf(pxStr, ", #%u", xferSzB);
   5844             else
   5845                vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
   5846          }
   5847          const HChar* arr = nameArr_Q_SZ(bitQ, sz);
   5848          DIP("%s%u {v%u.%s .. v%u.%s}, [%s]%s\n",
   5849              isLD ? "ld" : "st", nRegs,
   5850              (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
   5851              pxStr);
   5852 
   5853          return True;
   5854       }
   5855       /* else fall through */
   5856    }
   5857 
   5858    /* ------ LD1/ST1 (multiple 1-elem structs to/from 2 regs  ------ */
   5859    /* ------ LD1/ST1 (multiple 1-elem structs to/from 3 regs  ------ */
   5860    /* ------ LD1/ST1 (multiple 1-elem structs to/from 4 regs  ------ */
   5861    /* 31 29  26   22 21 20    15   11 9 4
   5862 
   5863       0q 001 1000 L  0  00000 0010 sz n t  xx1 {Vt..t+3.T}, [Xn|SP]
   5864       0q 001 1001 L  0  m     0010 sz n t  xx1 {Vt..t+3.T}, [Xn|SP], step
   5865 
   5866       0q 001 1000 L  0  00000 0110 sz n t  xx1 {Vt..t+2.T}, [Xn|SP]
   5867       0q 001 1001 L  0  m     0110 sz n t  xx1 {Vt..t+2.T}, [Xn|SP], step
   5868 
   5869       0q 001 1000 L  0  00000 1010 sz n t  xx1 {Vt..t+1.T}, [Xn|SP]
   5870       0q 001 1001 L  0  m     1010 sz n t  xx1 {Vt..t+1.T}, [Xn|SP], step
   5871 
   5872       T    = defined by Q and sz in the normal way
   5873       step = if m == 11111 then transfer-size else Xm
   5874       xx   = case L of 1 -> LD ; 0 -> ST
   5875    */
   5876    if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
   5877        && INSN(21,21) == 0) {
   5878       Bool bitQ  = INSN(30,30);
   5879       Bool isPX  = INSN(23,23) == 1;
   5880       Bool isLD  = INSN(22,22) == 1;
   5881       UInt mm    = INSN(20,16);
   5882       UInt opc   = INSN(15,12);
   5883       UInt sz    = INSN(11,10);
   5884       UInt nn    = INSN(9,5);
   5885       UInt tt    = INSN(4,0);
   5886       Bool isQ   = bitQ == 1;
   5887       UInt nRegs = 0;
   5888       switch (opc) {
   5889          case BITS4(0,0,1,0): nRegs = 4; break;
   5890          case BITS4(0,1,1,0): nRegs = 3; break;
   5891          case BITS4(1,0,1,0): nRegs = 2; break;
   5892          default: break;
   5893       }
   5894 
   5895       /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
   5896          If we see it, set nRegs to 0 so as to cause the next conditional
   5897          to fail. */
   5898       if (!isPX && mm != 0)
   5899          nRegs = 0;
   5900 
   5901       if (nRegs >= 2 && nRegs <= 4) {
   5902 
   5903          UInt xferSzB = (isQ ? 16 : 8) * nRegs;
   5904 
   5905          /* Generate the transfer address (TA) and if necessary the
   5906             writeback address (WB) */
   5907          IRTemp tTA = newTemp(Ity_I64);
   5908          assign(tTA, getIReg64orSP(nn));
   5909          if (nn == 31) { /* FIXME generate stack alignment check */ }
   5910          IRTemp tWB = IRTemp_INVALID;
   5911          if (isPX) {
   5912             tWB = newTemp(Ity_I64);
   5913             assign(tWB, binop(Iop_Add64,
   5914                               mkexpr(tTA),
   5915                               mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
   5916                                                      : getIReg64orZR(mm)));
   5917          }
   5918 
   5919          /* -- BEGIN generate the transfers -- */
   5920 
   5921          IRTemp u0, u1, u2, u3;
   5922          u0 = u1 = u2 = u3 = IRTemp_INVALID;
   5923          switch (nRegs) {
   5924             case 4: u3 = newTempV128(); /* fallthru */
   5925             case 3: u2 = newTempV128(); /* fallthru */
   5926             case 2: u1 = newTempV128();
   5927                     u0 = newTempV128(); break;
   5928             default: vassert(0);
   5929          }
   5930 
   5931          /* -- Multiple 128 or 64 bit stores -- */
   5932          if (!isLD) {
   5933             switch (nRegs) {
   5934                case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
   5935                case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
   5936                case 2: assign(u1, getQReg128((tt+1) % 32));
   5937                        assign(u0, getQReg128((tt+0) % 32)); break;
   5938                default: vassert(0);
   5939             }
   5940 #           define MAYBE_NARROW_TO_64(_expr) \
   5941                       (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
   5942             UInt step = isQ ? 16 : 8;
   5943             switch (nRegs) {
   5944                case 4:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
   5945                                  MAYBE_NARROW_TO_64(mkexpr(u3)) );
   5946                         /* fallthru */
   5947                case 3:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
   5948                                  MAYBE_NARROW_TO_64(mkexpr(u2)) );
   5949                         /* fallthru */
   5950                case 2:  storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
   5951                                  MAYBE_NARROW_TO_64(mkexpr(u1)) );
   5952                         storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
   5953                                  MAYBE_NARROW_TO_64(mkexpr(u0)) );
   5954                         break;
   5955                default: vassert(0);
   5956             }
   5957 #           undef MAYBE_NARROW_TO_64
   5958          }
   5959 
   5960          /* -- Multiple 128 or 64 bit loads -- */
   5961          else /* isLD */ {
   5962             UInt   step   = isQ ? 16 : 8;
   5963             IRType loadTy = isQ ? Ity_V128 : Ity_I64;
   5964 #           define MAYBE_WIDEN_FROM_64(_expr) \
   5965                       (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
   5966             switch (nRegs) {
   5967                case 4:
   5968                   assign(u3, MAYBE_WIDEN_FROM_64(
   5969                                 loadLE(loadTy,
   5970                                        binop(Iop_Add64, mkexpr(tTA),
   5971                                                         mkU64(3 * step)))));
   5972                   /* fallthru */
   5973                case 3:
   5974                   assign(u2, MAYBE_WIDEN_FROM_64(
   5975                                 loadLE(loadTy,
   5976                                        binop(Iop_Add64, mkexpr(tTA),
   5977                                                         mkU64(2 * step)))));
   5978                   /* fallthru */
   5979                case 2:
   5980                   assign(u1, MAYBE_WIDEN_FROM_64(
   5981                                 loadLE(loadTy,
   5982                                        binop(Iop_Add64, mkexpr(tTA),
   5983                                                         mkU64(1 * step)))));
   5984                   assign(u0, MAYBE_WIDEN_FROM_64(
   5985                                 loadLE(loadTy,
   5986                                        binop(Iop_Add64, mkexpr(tTA),
   5987                                                         mkU64(0 * step)))));
   5988                   break;
   5989                default:
   5990                   vassert(0);
   5991             }
   5992 #           undef MAYBE_WIDEN_FROM_64
   5993             switch (nRegs) {
   5994                case 4:  putQReg128( (tt+3) % 32,
   5995                                     math_MAYBE_ZERO_HI64(bitQ, u3));
   5996                         /* fallthru */
   5997                case 3:  putQReg128( (tt+2) % 32,
   5998                                     math_MAYBE_ZERO_HI64(bitQ, u2));
   5999                         /* fallthru */
   6000                case 2:  putQReg128( (tt+1) % 32,
   6001                                     math_MAYBE_ZERO_HI64(bitQ, u1));
   6002                         putQReg128( (tt+0) % 32,
   6003                                     math_MAYBE_ZERO_HI64(bitQ, u0));
   6004                         break;
   6005                default: vassert(0);
   6006             }
   6007          }
   6008 
   6009          /* -- END generate the transfers -- */
   6010 
   6011          /* Do the writeback, if necessary */
   6012          if (isPX) {
   6013             putIReg64orSP(nn, mkexpr(tWB));
   6014          }
   6015 
   6016          HChar pxStr[20];
   6017          pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
   6018          if (isPX) {
   6019             if (mm == BITS5(1,1,1,1,1))
   6020                vex_sprintf(pxStr, ", #%u", xferSzB);
   6021             else
   6022                vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
   6023          }
   6024          const HChar* arr = nameArr_Q_SZ(bitQ, sz);
   6025          DIP("%s1 {v%u.%s .. v%u.%s}, [%s]%s\n",
   6026              isLD ? "ld" : "st",
   6027              (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
   6028              pxStr);
   6029 
   6030          return True;
   6031       }
   6032       /* else fall through */
   6033    }
   6034 
   6035    /* ---------- LD1R (single structure, replicate) ---------- */
   6036    /* ---------- LD2R (single structure, replicate) ---------- */
   6037    /* ---------- LD3R (single structure, replicate) ---------- */
   6038    /* ---------- LD4R (single structure, replicate) ---------- */
   6039    /* 31 29       22 20    15    11 9 4
   6040       0q 001 1010 10 00000 110 0 sz n t  LD1R {Vt.T}, [Xn|SP]
   6041       0q 001 1011 10 m     110 0 sz n t  LD1R {Vt.T}, [Xn|SP], step
   6042 
   6043       0q 001 1010 11 00000 110 0 sz n t  LD2R {Vt..t+1.T}, [Xn|SP]
   6044       0q 001 1011 11 m     110 0 sz n t  LD2R {Vt..t+1.T}, [Xn|SP], step
   6045 
   6046       0q 001 1010 10 00000 111 0 sz n t  LD3R {Vt..t+2.T}, [Xn|SP]
   6047       0q 001 1011 10 m     111 0 sz n t  LD3R {Vt..t+2.T}, [Xn|SP], step
   6048 
   6049       0q 001 1010 11 00000 111 0 sz n t  LD4R {Vt..t+3.T}, [Xn|SP]
   6050       0q 001 1011 11 m     111 0 sz n t  LD4R {Vt..t+3.T}, [Xn|SP], step
   6051 
   6052       step = if m == 11111 then transfer-size else Xm
   6053    */
   6054    if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)
   6055        && INSN(22,22) == 1 && INSN(15,14) == BITS2(1,1)
   6056        && INSN(12,12) == 0) {
   6057       UInt   bitQ  = INSN(30,30);
   6058       Bool   isPX  = INSN(23,23) == 1;
   6059       UInt   nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
   6060       UInt   mm    = INSN(20,16);
   6061       UInt   sz    = INSN(11,10);
   6062       UInt   nn    = INSN(9,5);
   6063       UInt   tt    = INSN(4,0);
   6064 
   6065       /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
   6066       if (isPX || mm == 0) {
   6067 
   6068          IRType ty    = integerIRTypeOfSize(1 << sz);
   6069 
   6070          UInt laneSzB = 1 << sz;
   6071          UInt xferSzB = laneSzB * nRegs;
   6072 
   6073          /* Generate the transfer address (TA) and if necessary the
   6074             writeback address (WB) */
   6075          IRTemp tTA = newTemp(Ity_I64);
   6076          assign(tTA, getIReg64orSP(nn));
   6077          if (nn == 31) { /* FIXME generate stack alignment check */ }
   6078          IRTemp tWB = IRTemp_INVALID;
   6079          if (isPX) {
   6080             tWB = newTemp(Ity_I64);
   6081             assign(tWB, binop(Iop_Add64,
   6082                               mkexpr(tTA),
   6083                               mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
   6084                                                      : getIReg64orZR(mm)));
   6085          }
   6086 
   6087          /* Do the writeback, if necessary */
   6088          if (isPX) {
   6089             putIReg64orSP(nn, mkexpr(tWB));
   6090          }
   6091 
   6092          IRTemp e0, e1, e2, e3, v0, v1, v2, v3;
   6093          e0 = e1 = e2 = e3 = v0 = v1 = v2 = v3 = IRTemp_INVALID;
   6094          switch (nRegs) {
   6095             case 4:
   6096                e3 = newTemp(ty);
   6097                assign(e3, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
   6098                                                       mkU64(3 * laneSzB))));
   6099                v3 = math_DUP_TO_V128(e3, ty);
   6100                putQReg128((tt+3) % 32, math_MAYBE_ZERO_HI64(bitQ, v3));
   6101                /* fallthrough */
   6102             case 3:
   6103                e2 = newTemp(ty);
   6104                assign(e2, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
   6105                                                       mkU64(2 * laneSzB))));
   6106                v2 = math_DUP_TO_V128(e2, ty);
   6107                putQReg128((tt+2) % 32, math_MAYBE_ZERO_HI64(bitQ, v2));
   6108                /* fallthrough */
   6109             case 2:
   6110                e1 = newTemp(ty);
   6111                assign(e1, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
   6112                                                       mkU64(1 * laneSzB))));
   6113                v1 = math_DUP_TO_V128(e1, ty);
   6114                putQReg128((tt+1) % 32, math_MAYBE_ZERO_HI64(bitQ, v1));
   6115                /* fallthrough */
   6116             case 1:
   6117                e0 = newTemp(ty);
   6118                assign(e0, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
   6119                                                       mkU64(0 * laneSzB))));
   6120                v0 = math_DUP_TO_V128(e0, ty);
   6121                putQReg128((tt+0) % 32, math_MAYBE_ZERO_HI64(bitQ, v0));
   6122                break;
   6123             default:
   6124                vassert(0);
   6125          }
   6126 
   6127          HChar pxStr[20];
   6128          pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
   6129          if (isPX) {
   6130             if (mm == BITS5(1,1,1,1,1))
   6131                vex_sprintf(pxStr, ", #%u", xferSzB);
   6132             else
   6133                vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
   6134          }
   6135          const HChar* arr = nameArr_Q_SZ(bitQ, sz);
   6136          DIP("ld%ur {v%u.%s .. v%u.%s}, [%s]%s\n",
   6137              nRegs,
   6138              (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
   6139              pxStr);
   6140 
   6141          return True;
   6142       }
   6143       /* else fall through */
   6144    }
   6145 
   6146    /* ------ LD1/ST1 (single structure, to/from one lane) ------ */
   6147    /* ------ LD2/ST2 (single structure, to/from one lane) ------ */
   6148    /* ------ LD3/ST3 (single structure, to/from one lane) ------ */
   6149    /* ------ LD4/ST4 (single structure, to/from one lane) ------ */
   6150    /* 31 29       22 21 20    15    11 9 4
   6151       0q 001 1010 L  0  00000 xx0 S sz n t  op1 {Vt.T}[ix], [Xn|SP]
   6152       0q 001 1011 L  0  m     xx0 S sz n t  op1 {Vt.T}[ix], [Xn|SP], step
   6153 
   6154       0q 001 1010 L  1  00000 xx0 S sz n t  op2 {Vt..t+1.T}[ix], [Xn|SP]
   6155       0q 001 1011 L  1  m     xx0 S sz n t  op2 {Vt..t+1.T}[ix], [Xn|SP], step
   6156 
   6157       0q 001 1010 L  0  00000 xx1 S sz n t  op3 {Vt..t+2.T}[ix], [Xn|SP]
   6158       0q 001 1011 L  0  m     xx1 S sz n t  op3 {Vt..t+2.T}[ix], [Xn|SP], step
   6159 
   6160       0q 001 1010 L  1  00000 xx1 S sz n t  op4 {Vt..t+3.T}[ix], [Xn|SP]
   6161       0q 001 1011 L  1  m     xx1 S sz n t  op4 {Vt..t+3.T}[ix], [Xn|SP], step
   6162 
   6163       step = if m == 11111 then transfer-size else Xm
   6164       op   = case L of 1 -> LD ; 0 -> ST
   6165 
   6166       laneszB,ix = case xx:q:S:sz of 00:b:b:bb -> 1, bbbb
   6167                                      01:b:b:b0 -> 2, bbb
   6168                                      10:b:b:00 -> 4, bb
   6169                                      10:b:0:01 -> 8, b
   6170    */
   6171    if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)) {
   6172       UInt   bitQ  = INSN(30,30);
   6173       Bool   isPX  = INSN(23,23) == 1;
   6174       Bool   isLD  = INSN(22,22) == 1;
   6175       UInt   nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
   6176       UInt   mm    = INSN(20,16);
   6177       UInt   xx    = INSN(15,14);
   6178       UInt   bitS  = INSN(12,12);
   6179       UInt   sz    = INSN(11,10);
   6180       UInt   nn    = INSN(9,5);
   6181       UInt   tt    = INSN(4,0);
   6182 
   6183       Bool valid = True;
   6184 
   6185       /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
   6186       if (!isPX && mm != 0)
   6187          valid = False;
   6188 
   6189       UInt laneSzB = 0;  /* invalid */
   6190       UInt ix      = 16; /* invalid */
   6191 
   6192       UInt xx_q_S_sz = (xx << 4) | (bitQ << 3) | (bitS << 2) | sz;
   6193       switch (xx_q_S_sz) {
   6194          case 0x00: case 0x01: case 0x02: case 0x03:
   6195          case 0x04: case 0x05: case 0x06: case 0x07:
   6196          case 0x08: case 0x09: case 0x0A: case 0x0B:
   6197          case 0x0C: case 0x0D: case 0x0E: case 0x0F:
   6198             laneSzB = 1; ix = xx_q_S_sz & 0xF;
   6199             break;
   6200          case 0x10: case 0x12: case 0x14: case 0x16:
   6201          case 0x18: case 0x1A: case 0x1C: case 0x1E:
   6202             laneSzB = 2; ix = (xx_q_S_sz >> 1) & 7;
   6203             break;
   6204          case 0x20: case 0x24: case 0x28: case 0x2C:
   6205             laneSzB = 4; ix = (xx_q_S_sz >> 2) & 3;
   6206             break;
   6207          case 0x21: case 0x29:
   6208             laneSzB = 8; ix = (xx_q_S_sz >> 3) & 1;
   6209             break;
   6210          default:
   6211             break;
   6212       }
   6213 
   6214       if (valid && laneSzB != 0) {
   6215 
   6216          IRType ty      = integerIRTypeOfSize(laneSzB);
   6217          UInt   xferSzB = laneSzB * nRegs;
   6218 
   6219          /* Generate the transfer address (TA) and if necessary the
   6220             writeback address (WB) */
   6221          IRTemp tTA = newTemp(Ity_I64);
   6222          assign(tTA, getIReg64orSP(nn));
   6223          if (nn == 31) { /* FIXME generate stack alignment check */ }
   6224          IRTemp tWB = IRTemp_INVALID;
   6225          if (isPX) {
   6226             tWB = newTemp(Ity_I64);
   6227             assign(tWB, binop(Iop_Add64,
   6228                               mkexpr(tTA),
   6229                               mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
   6230                                                      : getIReg64orZR(mm)));
   6231          }
   6232 
   6233          /* Do the writeback, if necessary */
   6234          if (isPX) {
   6235             putIReg64orSP(nn, mkexpr(tWB));
   6236          }
   6237 
   6238          switch (nRegs) {
   6239             case 4: {
   6240                IRExpr* addr
   6241                   = binop(Iop_Add64, mkexpr(tTA), mkU64(3 * laneSzB));
   6242                if (isLD) {
   6243                   putQRegLane((tt+3) % 32, ix, loadLE(ty, addr));
   6244                } else {
   6245                   storeLE(addr, getQRegLane((tt+3) % 32, ix, ty));
   6246                }
   6247                /* fallthrough */
   6248             }
   6249             case 3: {
   6250                IRExpr* addr
   6251                   = binop(Iop_Add64, mkexpr(tTA), mkU64(2 * laneSzB));
   6252                if (isLD) {
   6253                   putQRegLane((tt+2) % 32, ix, loadLE(ty, addr));
   6254                } else {
   6255                   storeLE(addr, getQRegLane((tt+2) % 32, ix, ty));
   6256                }
   6257                /* fallthrough */
   6258             }
   6259             case 2: {
   6260                IRExpr* addr
   6261                   = binop(Iop_Add64, mkexpr(tTA), mkU64(1 * laneSzB));
   6262                if (isLD) {
   6263                   putQRegLane((tt+1) % 32, ix, loadLE(ty, addr));
   6264                } else {
   6265                   storeLE(addr, getQRegLane((tt+1) % 32, ix, ty));
   6266                }
   6267                /* fallthrough */
   6268             }
   6269             case 1: {
   6270                IRExpr* addr
   6271                   = binop(Iop_Add64, mkexpr(tTA), mkU64(0 * laneSzB));
   6272                if (isLD) {
   6273                   putQRegLane((tt+0) % 32, ix, loadLE(ty, addr));
   6274                } else {
   6275                   storeLE(addr, getQRegLane((tt+0) % 32, ix, ty));
   6276                }
   6277                break;
   6278             }
   6279             default:
   6280                vassert(0);
   6281          }
   6282 
   6283          HChar pxStr[20];
   6284          pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
   6285          if (isPX) {
   6286             if (mm == BITS5(1,1,1,1,1))
   6287                vex_sprintf(pxStr, ", #%u", xferSzB);
   6288             else
   6289                vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
   6290          }
   6291          const HChar* arr = nameArr_Q_SZ(bitQ, sz);
   6292          DIP("%s%u {v%u.%s .. v%u.%s}[%u], [%s]%s\n",
   6293              isLD ? "ld" : "st", nRegs,
   6294              (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr,
   6295              ix, nameIReg64orSP(nn), pxStr);
   6296 
   6297          return True;
   6298       }
   6299       /* else fall through */
   6300    }
   6301 
   6302    /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
   6303    /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
   6304    /* 31 29     23  20      14    9 4
   6305       sz 001000 010 11111 0 11111 n t   LDX{R,RH,RB}  Rt, [Xn|SP]
   6306       sz 001000 010 11111 1 11111 n t   LDAX{R,RH,RB} Rt, [Xn|SP]
   6307       sz 001000 000 s     0 11111 n t   STX{R,RH,RB}  Ws, Rt, [Xn|SP]
   6308       sz 001000 000 s     1 11111 n t   STLX{R,RH,RB} Ws, Rt, [Xn|SP]
   6309    */
   6310    if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
   6311        && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
   6312        && INSN(14,10) == BITS5(1,1,1,1,1)) {
   6313       UInt szBlg2     = INSN(31,30);
   6314       Bool isLD       = INSN(22,22) == 1;
   6315       Bool isAcqOrRel = INSN(15,15) == 1;
   6316       UInt ss         = INSN(20,16);
   6317       UInt nn         = INSN(9,5);
   6318       UInt tt         = INSN(4,0);
   6319 
   6320       vassert(szBlg2 < 4);
   6321       UInt   szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
   6322       IRType ty  = integerIRTypeOfSize(szB);
   6323       const HChar* suffix[4] = { "rb", "rh", "r", "r" };
   6324 
   6325       IRTemp ea = newTemp(Ity_I64);
   6326       assign(ea, getIReg64orSP(nn));
   6327       /* FIXME generate check that ea is szB-aligned */
   6328 
   6329       if (isLD && ss == BITS5(1,1,1,1,1)) {
   6330          IRTemp res = newTemp(ty);
   6331          stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
   6332          putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
   6333          if (isAcqOrRel) {
   6334             stmt(IRStmt_MBE(Imbe_Fence));
   6335          }
   6336          DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
   6337              nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
   6338          return True;
   6339       }
   6340       if (!isLD) {
   6341          if (isAcqOrRel) {
   6342             stmt(IRStmt_MBE(Imbe_Fence));
   6343          }
   6344          IRTemp  res  = newTemp(Ity_I1);
   6345          IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
   6346          stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
   6347          /* IR semantics: res is 1 if store succeeds, 0 if it fails.
   6348             Need to set rS to 1 on failure, 0 on success. */
   6349          putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
   6350                                             mkU64(1)));
   6351          DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
   6352              nameIRegOrZR(False, ss),
   6353              nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
   6354          return True;
   6355       }
   6356       /* else fall through */
   6357    }
   6358 
   6359    /* ------------------ LDA{R,RH,RB} ------------------ */
   6360    /* ------------------ STL{R,RH,RB} ------------------ */
   6361    /* 31 29     23  20      14    9 4
   6362       sz 001000 110 11111 1 11111 n t   LDAR<sz> Rt, [Xn|SP]
   6363       sz 001000 100 11111 1 11111 n t   STLR<sz> Rt, [Xn|SP]
   6364    */
   6365    if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
   6366        && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
   6367       UInt szBlg2 = INSN(31,30);
   6368       Bool isLD   = INSN(22,22) == 1;
   6369       UInt nn     = INSN(9,5);
   6370       UInt tt     = INSN(4,0);
   6371 
   6372       vassert(szBlg2 < 4);
   6373       UInt   szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
   6374       IRType ty  = integerIRTypeOfSize(szB);
   6375       const HChar* suffix[4] = { "rb", "rh", "r", "r" };
   6376 
   6377       IRTemp ea = newTemp(Ity_I64);
   6378       assign(ea, getIReg64orSP(nn));
   6379       /* FIXME generate check that ea is szB-aligned */
   6380 
   6381       if (isLD) {
   6382          IRTemp res = newTemp(ty);
   6383          assign(res, loadLE(ty, mkexpr(ea)));
   6384          putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
   6385          stmt(IRStmt_MBE(Imbe_Fence));
   6386          DIP("lda%s %s, [%s]\n", suffix[szBlg2],
   6387              nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
   6388       } else {
   6389          stmt(IRStmt_MBE(Imbe_Fence));
   6390          IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
   6391          storeLE(mkexpr(ea), data);
   6392          DIP("stl%s %s, [%s]\n", suffix[szBlg2],
   6393              nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
   6394       }
   6395       return True;
   6396    }
   6397 
   6398    /* ------------------ PRFM (immediate) ------------------ */
   6399    /* 31           21    9 4
   6400       11 111 00110 imm12 n t   PRFM pfrop=Rt, [Xn|SP, #pimm]
   6401    */
   6402    if (INSN(31,22) == BITS10(1,1,1,1,1,0,0,1,1,0)) {
   6403       UInt imm12 = INSN(21,10);
   6404       UInt nn    = INSN(9,5);
   6405       UInt tt    = INSN(4,0);
   6406       /* Generating any IR here is pointless, except for documentation
   6407          purposes, as it will get optimised away later. */
   6408       IRTemp ea = newTemp(Ity_I64);
   6409       assign(ea, binop(Iop_Add64, getIReg64orSP(nn), mkU64(imm12 * 8)));
   6410       DIP("prfm prfop=%u, [%s, #%u]\n", tt, nameIReg64orSP(nn), imm12 * 8);
   6411       return True;
   6412    }
   6413 
   6414    vex_printf("ARM64 front end: load_store\n");
   6415    return False;
   6416 #  undef INSN
   6417 }
   6418 
   6419 
   6420 /*------------------------------------------------------------*/
   6421 /*--- Control flow and misc instructions                   ---*/
   6422 /*------------------------------------------------------------*/
   6423 
   6424 static
   6425 Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn,
   6426                           const VexArchInfo* archinfo)
   6427 {
   6428 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   6429 
   6430    /* ---------------------- B cond ----------------------- */
   6431    /* 31        24    4 3
   6432       0101010 0 imm19 0 cond */
   6433    if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
   6434       UInt  cond   = INSN(3,0);
   6435       ULong uimm64 = INSN(23,5) << 2;
   6436       Long  simm64 = (Long)sx_to_64(uimm64, 21);
   6437       vassert(dres->whatNext    == Dis_Continue);
   6438       vassert(dres->len         == 4);
   6439       vassert(dres->continueAt  == 0);
   6440       vassert(dres->jk_StopHere == Ijk_INVALID);
   6441       stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
   6442                         Ijk_Boring,
   6443                         IRConst_U64(guest_PC_curr_instr + simm64),
   6444                         OFFB_PC) );
   6445       putPC(mkU64(guest_PC_curr_instr + 4));
   6446       dres->whatNext    = Dis_StopHere;
   6447       dres->jk_StopHere = Ijk_Boring;
   6448       DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
   6449       return True;
   6450    }
   6451 
   6452    /* -------------------- B{L} uncond -------------------- */
   6453    if (INSN(30,26) == BITS5(0,0,1,0,1)) {
   6454       /* 000101 imm26  B  (PC + sxTo64(imm26 << 2))
   6455          100101 imm26  B  (PC + sxTo64(imm26 << 2))
   6456       */
   6457       UInt  bLink  = INSN(31,31);
   6458       ULong uimm64 = INSN(25,0) << 2;
   6459       Long  simm64 = (Long)sx_to_64(uimm64, 28);
   6460       if (bLink) {
   6461          putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
   6462       }
   6463       putPC(mkU64(guest_PC_curr_instr + simm64));
   6464       dres->whatNext = Dis_StopHere;
   6465       dres->jk_StopHere = Ijk_Call;
   6466       DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
   6467                           guest_PC_curr_instr + simm64);
   6468       return True;
   6469    }
   6470 
   6471    /* --------------------- B{L} reg --------------------- */
   6472    /* 31      24 22 20    15     9  4
   6473       1101011 00 10 11111 000000 nn 00000  RET  Rn
   6474       1101011 00 01 11111 000000 nn 00000  CALL Rn
   6475       1101011 00 00 11111 000000 nn 00000  JMP  Rn
   6476    */
   6477    if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
   6478        && INSN(20,16) == BITS5(1,1,1,1,1)
   6479        && INSN(15,10) == BITS6(0,0,0,0,0,0)
   6480        && INSN(4,0) == BITS5(0,0,0,0,0)) {
   6481       UInt branch_type = INSN(22,21);
   6482       UInt nn          = INSN(9,5);
   6483       if (branch_type == BITS2(1,0) /* RET */) {
   6484          putPC(getIReg64orZR(nn));
   6485          dres->whatNext = Dis_StopHere;
   6486          dres->jk_StopHere = Ijk_Ret;
   6487          DIP("ret %s\n", nameIReg64orZR(nn));
   6488          return True;
   6489       }
   6490       if (branch_type == BITS2(0,1) /* CALL */) {
   6491          IRTemp dst = newTemp(Ity_I64);
   6492          assign(dst, getIReg64orZR(nn));
   6493          putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
   6494          putPC(mkexpr(dst));
   6495          dres->whatNext = Dis_StopHere;
   6496          dres->jk_StopHere = Ijk_Call;
   6497          DIP("blr %s\n", nameIReg64orZR(nn));
   6498          return True;
   6499       }
   6500       if (branch_type == BITS2(0,0) /* JMP */) {
   6501          putPC(getIReg64orZR(nn));
   6502          dres->whatNext = Dis_StopHere;
   6503          dres->jk_StopHere = Ijk_Boring;
   6504          DIP("jmp %s\n", nameIReg64orZR(nn));
   6505          return True;
   6506       }
   6507    }
   6508 
   6509    /* -------------------- CB{N}Z -------------------- */
   6510    /* sf 011 010 1 imm19 Rt   CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
   6511       sf 011 010 0 imm19 Rt   CBZ  Xt|Wt, (PC + sxTo64(imm19 << 2))
   6512    */
   6513    if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
   6514       Bool    is64   = INSN(31,31) == 1;
   6515       Bool    bIfZ   = INSN(24,24) == 0;
   6516       ULong   uimm64 = INSN(23,5) << 2;
   6517       UInt    rT     = INSN(4,0);
   6518       Long    simm64 = (Long)sx_to_64(uimm64, 21);
   6519       IRExpr* cond   = NULL;
   6520       if (is64) {
   6521          cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
   6522                       getIReg64orZR(rT), mkU64(0));
   6523       } else {
   6524          cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
   6525                       getIReg32orZR(rT), mkU32(0));
   6526       }
   6527       stmt( IRStmt_Exit(cond,
   6528                         Ijk_Boring,
   6529                         IRConst_U64(guest_PC_curr_instr + simm64),
   6530                         OFFB_PC) );
   6531       putPC(mkU64(guest_PC_curr_instr + 4));
   6532       dres->whatNext    = Dis_StopHere;
   6533       dres->jk_StopHere = Ijk_Boring;
   6534       DIP("cb%sz %s, 0x%llx\n",
   6535           bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
   6536           guest_PC_curr_instr + simm64);
   6537       return True;
   6538    }
   6539 
   6540    /* -------------------- TB{N}Z -------------------- */
   6541    /* 31 30      24 23  18  5 4
   6542       b5 011 011 1  b40 imm14 t  TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
   6543       b5 011 011 0  b40 imm14 t  TBZ  Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
   6544    */
   6545    if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
   6546       UInt    b5     = INSN(31,31);
   6547       Bool    bIfZ   = INSN(24,24) == 0;
   6548       UInt    b40    = INSN(23,19);
   6549       UInt    imm14  = INSN(18,5);
   6550       UInt    tt     = INSN(4,0);
   6551       UInt    bitNo  = (b5 << 5) | b40;
   6552       ULong   uimm64 = imm14 << 2;
   6553       Long    simm64 = sx_to_64(uimm64, 16);
   6554       IRExpr* cond
   6555          = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
   6556                  binop(Iop_And64,
   6557                        binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
   6558                        mkU64(1)),
   6559                  mkU64(0));
   6560       stmt( IRStmt_Exit(cond,
   6561                         Ijk_Boring,
   6562                         IRConst_U64(guest_PC_curr_instr + simm64),
   6563                         OFFB_PC) );
   6564       putPC(mkU64(guest_PC_curr_instr + 4));
   6565       dres->whatNext    = Dis_StopHere;
   6566       dres->jk_StopHere = Ijk_Boring;
   6567       DIP("tb%sz %s, #%u, 0x%llx\n",
   6568           bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
   6569           guest_PC_curr_instr + simm64);
   6570       return True;
   6571    }
   6572 
   6573    /* -------------------- SVC -------------------- */
   6574    /* 11010100 000 imm16 000 01
   6575       Don't bother with anything except the imm16==0 case.
   6576    */
   6577    if (INSN(31,0) == 0xD4000001) {
   6578       putPC(mkU64(guest_PC_curr_instr + 4));
   6579       dres->whatNext    = Dis_StopHere;
   6580       dres->jk_StopHere = Ijk_Sys_syscall;
   6581       DIP("svc #0\n");
   6582       return True;
   6583    }
   6584 
   6585    /* ------------------ M{SR,RS} ------------------ */
   6586    /* ---- Cases for TPIDR_EL0 ----
   6587       0xD51BD0 010 Rt   MSR tpidr_el0, rT
   6588       0xD53BD0 010 Rt   MRS rT, tpidr_el0
   6589    */
   6590    if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
   6591        || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
   6592       Bool toSys = INSN(21,21) == 0;
   6593       UInt tt    = INSN(4,0);
   6594       if (toSys) {
   6595          stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
   6596          DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
   6597       } else {
   6598          putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
   6599          DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
   6600       }
   6601       return True;
   6602    }
   6603    /* ---- Cases for FPCR ----
   6604       0xD51B44 000 Rt  MSR fpcr, rT
   6605       0xD53B44 000 Rt  MSR rT, fpcr
   6606    */
   6607    if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
   6608        || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
   6609       Bool toSys = INSN(21,21) == 0;
   6610       UInt tt    = INSN(4,0);
   6611       if (toSys) {
   6612          stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
   6613          DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
   6614       } else {
   6615          putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
   6616          DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
   6617       }
   6618       return True;
   6619    }
   6620    /* ---- Cases for FPSR ----
   6621       0xD51B44 001 Rt  MSR fpsr, rT
   6622       0xD53B44 001 Rt  MSR rT, fpsr
   6623       The only part of this we model is FPSR.QC.  All other bits
   6624       are ignored when writing to it and RAZ when reading from it.
   6625    */
   6626    if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
   6627        || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
   6628       Bool toSys = INSN(21,21) == 0;
   6629       UInt tt    = INSN(4,0);
   6630       if (toSys) {
   6631          /* Just deal with FPSR.QC.  Make up a V128 value which is
   6632             zero if Xt[27] is zero and any other value if Xt[27] is
   6633             nonzero. */
   6634          IRTemp qc64 = newTemp(Ity_I64);
   6635          assign(qc64, binop(Iop_And64,
   6636                             binop(Iop_Shr64, getIReg64orZR(tt), mkU8(27)),
   6637                             mkU64(1)));
   6638          IRExpr* qcV128 = binop(Iop_64HLtoV128, mkexpr(qc64), mkexpr(qc64));
   6639          stmt( IRStmt_Put( OFFB_QCFLAG, qcV128 ) );
   6640          DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
   6641       } else {
   6642          /* Generate a value which is all zeroes except for bit 27,
   6643             which must be zero if QCFLAG is all zeroes and one otherwise. */
   6644          IRTemp qcV128 = newTempV128();
   6645          assign(qcV128, IRExpr_Get( OFFB_QCFLAG, Ity_V128 ));
   6646          IRTemp qc64 = newTemp(Ity_I64);
   6647          assign(qc64, binop(Iop_Or64, unop(Iop_V128HIto64, mkexpr(qcV128)),
   6648                                       unop(Iop_V128to64,   mkexpr(qcV128))));
   6649          IRExpr* res = binop(Iop_Shl64,
   6650                              unop(Iop_1Uto64,
   6651                                   binop(Iop_CmpNE64, mkexpr(qc64), mkU64(0))),
   6652                              mkU8(27));
   6653          putIReg64orZR(tt, res);
   6654          DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
   6655       }
   6656       return True;
   6657    }
   6658    /* ---- Cases for NZCV ----
   6659       D51B42 000 Rt  MSR nzcv, rT
   6660       D53B42 000 Rt  MRS rT, nzcv
   6661       The only parts of NZCV that actually exist are bits 31:28, which
   6662       are the N Z C and V bits themselves.  Hence the flags thunk provides
   6663       all the state we need.
   6664    */
   6665    if (   (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
   6666        || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
   6667       Bool  toSys = INSN(21,21) == 0;
   6668       UInt  tt    = INSN(4,0);
   6669       if (toSys) {
   6670          IRTemp t = newTemp(Ity_I64);
   6671          assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
   6672          setFlags_COPY(t);
   6673          DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
   6674       } else {
   6675          IRTemp res = newTemp(Ity_I64);
   6676          assign(res, mk_arm64g_calculate_flags_nzcv());
   6677          putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
   6678          DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
   6679       }
   6680       return True;
   6681    }
   6682    /* ---- Cases for DCZID_EL0 ----
   6683       Don't support arbitrary reads and writes to this register.  Just
   6684       return the value 16, which indicates that the DC ZVA instruction
   6685       is not permitted, so we don't have to emulate it.
   6686       D5 3B 00 111 Rt  MRS rT, dczid_el0
   6687    */
   6688    if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
   6689       UInt tt = INSN(4,0);
   6690       putIReg64orZR(tt, mkU64(1<<4));
   6691       DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt));
   6692       return True;
   6693    }
   6694    /* ---- Cases for CTR_EL0 ----
   6695       We just handle reads, and make up a value from the D and I line
   6696       sizes in the VexArchInfo we are given, and patch in the following
   6697       fields that the Foundation model gives ("natively"):
   6698       CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
   6699       D5 3B 00 001 Rt  MRS rT, dczid_el0
   6700    */
   6701    if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
   6702       UInt tt = INSN(4,0);
   6703       /* Need to generate a value from dMinLine_lg2_szB and
   6704          dMinLine_lg2_szB.  The value in the register is in 32-bit
   6705          units, so need to subtract 2 from the values in the
   6706          VexArchInfo.  We can assume that the values here are valid --
   6707          disInstr_ARM64 checks them -- so there's no need to deal with
   6708          out-of-range cases. */
   6709       vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
   6710               && archinfo->arm64_dMinLine_lg2_szB <= 17
   6711               && archinfo->arm64_iMinLine_lg2_szB >= 2
   6712               && archinfo->arm64_iMinLine_lg2_szB <= 17);
   6713       UInt val
   6714          = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16)
   6715                       | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0);
   6716       putIReg64orZR(tt, mkU64(val));
   6717       DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt));
   6718       return True;
   6719    }
   6720    /* ---- Cases for CNTVCT_EL0 ----
   6721       This is a timestamp counter of some sort.  Support reads of it only
   6722       by passing through to the host.
   6723       D5 3B E0 010 Rt  MRS Xt, cntvct_el0
   6724    */
   6725    if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE040) {
   6726       UInt     tt   = INSN(4,0);
   6727       IRTemp   val  = newTemp(Ity_I64);
   6728       IRExpr** args = mkIRExprVec_0();
   6729       IRDirty* d    = unsafeIRDirty_1_N (
   6730                          val,
   6731                          0/*regparms*/,
   6732                          "arm64g_dirtyhelper_MRS_CNTVCT_EL0",
   6733                          &arm64g_dirtyhelper_MRS_CNTVCT_EL0,
   6734                          args
   6735                       );
   6736       /* execute the dirty call, dumping the result in val. */
   6737       stmt( IRStmt_Dirty(d) );
   6738       putIReg64orZR(tt, mkexpr(val));
   6739       DIP("mrs %s, cntvct_el0\n", nameIReg64orZR(tt));
   6740       return True;
   6741    }
   6742 
   6743    /* ------------------ IC_IVAU ------------------ */
   6744    /* D5 0B 75 001 Rt  ic ivau, rT
   6745    */
   6746    if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
   6747       /* We will always be provided with a valid iMinLine value. */
   6748       vassert(archinfo->arm64_iMinLine_lg2_szB >= 2
   6749               && archinfo->arm64_iMinLine_lg2_szB <= 17);
   6750       /* Round the requested address, in rT, down to the start of the
   6751          containing block. */
   6752       UInt   tt      = INSN(4,0);
   6753       ULong  lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB;
   6754       IRTemp addr    = newTemp(Ity_I64);
   6755       assign( addr, binop( Iop_And64,
   6756                            getIReg64orZR(tt),
   6757                            mkU64(~(lineszB - 1))) );
   6758       /* Set the invalidation range, request exit-and-invalidate, with
   6759          continuation at the next instruction. */
   6760       stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
   6761       stmt(IRStmt_Put(OFFB_CMLEN,   mkU64(lineszB)));
   6762       /* be paranoid ... */
   6763       stmt( IRStmt_MBE(Imbe_Fence) );
   6764       putPC(mkU64( guest_PC_curr_instr + 4 ));
   6765       dres->whatNext    = Dis_StopHere;
   6766       dres->jk_StopHere = Ijk_InvalICache;
   6767       DIP("ic ivau, %s\n", nameIReg64orZR(tt));
   6768       return True;
   6769    }
   6770 
   6771    /* ------------------ DC_CVAU ------------------ */
   6772    /* D5 0B 7B 001 Rt  dc cvau, rT
   6773    */
   6774    if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) {
   6775       /* Exactly the same scheme as for IC IVAU, except we observe the
   6776          dMinLine size, and request an Ijk_FlushDCache instead of
   6777          Ijk_InvalICache. */
   6778       /* We will always be provided with a valid dMinLine value. */
   6779       vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
   6780               && archinfo->arm64_dMinLine_lg2_szB <= 17);
   6781       /* Round the requested address, in rT, down to the start of the
   6782          containing block. */
   6783       UInt   tt      = INSN(4,0);
   6784       ULong  lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB;
   6785       IRTemp addr    = newTemp(Ity_I64);
   6786       assign( addr, binop( Iop_And64,
   6787                            getIReg64orZR(tt),
   6788                            mkU64(~(lineszB - 1))) );
   6789       /* Set the flush range, request exit-and-flush, with
   6790          continuation at the next instruction. */
   6791       stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
   6792       stmt(IRStmt_Put(OFFB_CMLEN,   mkU64(lineszB)));
   6793       /* be paranoid ... */
   6794       stmt( IRStmt_MBE(Imbe_Fence) );
   6795       putPC(mkU64( guest_PC_curr_instr + 4 ));
   6796       dres->whatNext    = Dis_StopHere;
   6797       dres->jk_StopHere = Ijk_FlushDCache;
   6798       DIP("dc cvau, %s\n", nameIReg64orZR(tt));
   6799       return True;
   6800    }
   6801 
   6802    /* ------------------ ISB, DMB, DSB ------------------ */
   6803    /* 31          21            11  7 6  4
   6804       11010 10100 0 00 011 0011 CRm 1 01 11111  DMB opt
   6805       11010 10100 0 00 011 0011 CRm 1 00 11111  DSB opt
   6806       11010 10100 0 00 011 0011 CRm 1 10 11111  ISB opt
   6807    */
   6808    if (INSN(31,22) == BITS10(1,1,0,1,0,1,0,1,0,0)
   6809        && INSN(21,12) == BITS10(0,0,0,0,1,1,0,0,1,1)
   6810        && INSN(7,7) == 1
   6811        && INSN(6,5) <= BITS2(1,0) && INSN(4,0) == BITS5(1,1,1,1,1)) {
   6812       UInt opc = INSN(6,5);
   6813       UInt CRm = INSN(11,8);
   6814       vassert(opc <= 2 && CRm <= 15);
   6815       stmt(IRStmt_MBE(Imbe_Fence));
   6816       const HChar* opNames[3]
   6817          = { "dsb", "dmb", "isb" };
   6818       const HChar* howNames[16]
   6819          = { "#0", "oshld", "oshst", "osh", "#4", "nshld", "nshst", "nsh",
   6820              "#8", "ishld", "ishst", "ish", "#12", "ld", "st", "sy" };
   6821       DIP("%s %s\n", opNames[opc], howNames[CRm]);
   6822       return True;
   6823    }
   6824 
   6825    /* -------------------- NOP -------------------- */
   6826    if (INSN(31,0) == 0xD503201F) {
   6827       DIP("nop\n");
   6828       return True;
   6829    }
   6830 
   6831    /* -------------------- BRK -------------------- */
   6832    /* 31        23  20    4
   6833       1101 0100 001 imm16 00000  BRK #imm16
   6834    */
   6835    if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,0)
   6836        && INSN(23,21) == BITS3(0,0,1) && INSN(4,0) == BITS5(0,0,0,0,0)) {
   6837       UInt imm16 = INSN(20,5);
   6838       /* Request SIGTRAP and then restart of this insn. */
   6839       putPC(mkU64(guest_PC_curr_instr + 0));
   6840       dres->whatNext    = Dis_StopHere;
   6841       dres->jk_StopHere = Ijk_SigTRAP;
   6842       DIP("brk #%u\n", imm16);
   6843       return True;
   6844    }
   6845 
   6846   //fail:
   6847    vex_printf("ARM64 front end: branch_etc\n");
   6848    return False;
   6849 #  undef INSN
   6850 }
   6851 
   6852 
   6853 /*------------------------------------------------------------*/
   6854 /*--- SIMD and FP instructions: helper functions           ---*/
   6855 /*------------------------------------------------------------*/
   6856 
   6857 /* Some constructors for interleave/deinterleave expressions. */
   6858 
   6859 static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) {
   6860    // returns a0 b0
   6861    return binop(Iop_InterleaveLO64x2, mkexpr(a10), mkexpr(b10));
   6862 }
   6863 
   6864 static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) {
   6865    // returns a1 b1
   6866    return binop(Iop_InterleaveHI64x2, mkexpr(a10), mkexpr(b10));
   6867 }
   6868 
   6869 static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
   6870    // returns a2 a0 b2 b0
   6871    return binop(Iop_CatEvenLanes32x4, mkexpr(a3210), mkexpr(b3210));
   6872 }
   6873 
   6874 static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
   6875    // returns a3 a1 b3 b1
   6876    return binop(Iop_CatOddLanes32x4, mkexpr(a3210), mkexpr(b3210));
   6877 }
   6878 
   6879 static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) {
   6880    // returns a1 b1 a0 b0
   6881    return binop(Iop_InterleaveLO32x4, mkexpr(a3210), mkexpr(b3210));
   6882 }
   6883 
   6884 static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) {
   6885    // returns a3 b3 a2 b2
   6886    return binop(Iop_InterleaveHI32x4, mkexpr(a3210), mkexpr(b3210));
   6887 }
   6888 
   6889 static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
   6890    // returns a6 a4 a2 a0 b6 b4 b2 b0
   6891    return binop(Iop_CatEvenLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
   6892 }
   6893 
   6894 static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
   6895    // returns a7 a5 a3 a1 b7 b5 b3 b1
   6896    return binop(Iop_CatOddLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
   6897 }
   6898 
   6899 static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
   6900    // returns a3 b3 a2 b2 a1 b1 a0 b0
   6901    return binop(Iop_InterleaveLO16x8, mkexpr(a76543210), mkexpr(b76543210));
   6902 }
   6903 
   6904 static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
   6905    // returns a7 b7 a6 b6 a5 b5 a4 b4
   6906    return binop(Iop_InterleaveHI16x8, mkexpr(a76543210), mkexpr(b76543210));
   6907 }
   6908 
   6909 static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210,
   6910                                      IRTemp bFEDCBA9876543210 ) {
   6911    // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
   6912    return binop(Iop_CatEvenLanes8x16, mkexpr(aFEDCBA9876543210),
   6913                                       mkexpr(bFEDCBA9876543210));
   6914 }
   6915 
   6916 static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210,
   6917                                     IRTemp bFEDCBA9876543210 ) {
   6918    // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
   6919    return binop(Iop_CatOddLanes8x16, mkexpr(aFEDCBA9876543210),
   6920                                      mkexpr(bFEDCBA9876543210));
   6921 }
   6922 
   6923 static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210,
   6924                                      IRTemp bFEDCBA9876543210 ) {
   6925    // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
   6926    return binop(Iop_InterleaveLO8x16, mkexpr(aFEDCBA9876543210),
   6927                                       mkexpr(bFEDCBA9876543210));
   6928 }
   6929 
   6930 static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210,
   6931                                      IRTemp bFEDCBA9876543210 ) {
   6932    // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
   6933    return binop(Iop_InterleaveHI8x16, mkexpr(aFEDCBA9876543210),
   6934                                       mkexpr(bFEDCBA9876543210));
   6935 }
   6936 
   6937 /* Generate N copies of |bit| in the bottom of a ULong. */
   6938 static ULong Replicate ( ULong bit, Int N )
   6939 {
   6940    vassert(bit <= 1 && N >= 1 && N < 64);
   6941    if (bit == 0) {
   6942       return 0;
   6943     } else {
   6944       /* Careful.  This won't work for N == 64. */
   6945       return (1ULL << N) - 1;
   6946    }
   6947 }
   6948 
   6949 static ULong Replicate32x2 ( ULong bits32 )
   6950 {
   6951    vassert(0 == (bits32 & ~0xFFFFFFFFULL));
   6952    return (bits32 << 32) | bits32;
   6953 }
   6954 
   6955 static ULong Replicate16x4 ( ULong bits16 )
   6956 {
   6957    vassert(0 == (bits16 & ~0xFFFFULL));
   6958    return Replicate32x2((bits16 << 16) | bits16);
   6959 }
   6960 
   6961 static ULong Replicate8x8 ( ULong bits8 )
   6962 {
   6963    vassert(0 == (bits8 & ~0xFFULL));
   6964    return Replicate16x4((bits8 << 8) | bits8);
   6965 }
   6966 
   6967 /* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
   6968    |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
   6969    is 64.  In the former case, the upper 32 bits of the returned value
   6970    are guaranteed to be zero. */
   6971 static ULong VFPExpandImm ( ULong imm8, Int N )
   6972 {
   6973    vassert(imm8 <= 0xFF);
   6974    vassert(N == 32 || N == 64);
   6975    Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
   6976    Int F = N - E - 1;
   6977    ULong imm8_6 = (imm8 >> 6) & 1;
   6978    /* sign: 1 bit */
   6979    /* exp:  E bits */
   6980    /* frac: F bits */
   6981    ULong sign = (imm8 >> 7) & 1;
   6982    ULong exp  = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1);
   6983    ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6);
   6984    vassert(sign < (1ULL << 1));
   6985    vassert(exp  < (1ULL << E));
   6986    vassert(frac < (1ULL << F));
   6987    vassert(1 + E + F == N);
   6988    ULong res = (sign << (E+F)) | (exp << F) | frac;
   6989    return res;
   6990 }
   6991 
   6992 /* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
   6993    This might fail, as indicated by the returned Bool.  Page 2530 of
   6994    the manual. */
   6995 static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res,
   6996                                UInt op, UInt cmode, UInt imm8 )
   6997 {
   6998    vassert(op <= 1);
   6999    vassert(cmode <= 15);
   7000    vassert(imm8 <= 255);
   7001 
   7002    *res = 0; /* will overwrite iff returning True */
   7003 
   7004    ULong imm64    = 0;
   7005    Bool  testimm8 = False;
   7006 
   7007    switch (cmode >> 1) {
   7008       case 0:
   7009          testimm8 = False; imm64 = Replicate32x2(imm8); break;
   7010       case 1:
   7011          testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break;
   7012       case 2:
   7013          testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break;
   7014       case 3:
   7015          testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break;
   7016       case 4:
   7017           testimm8 = False; imm64 = Replicate16x4(imm8); break;
   7018       case 5:
   7019           testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break;
   7020       case 6:
   7021           testimm8 = True;
   7022           if ((cmode & 1) == 0)
   7023               imm64 = Replicate32x2((imm8 << 8) | 0xFF);
   7024           else
   7025               imm64 = Replicate32x2((imm8 << 16) | 0xFFFF);
   7026           break;
   7027       case 7:
   7028          testimm8 = False;
   7029          if ((cmode & 1) == 0 && op == 0)
   7030              imm64 = Replicate8x8(imm8);
   7031          if ((cmode & 1) == 0 && op == 1) {
   7032              imm64 = 0;   imm64 |= (imm8 & 0x80) ? 0xFF : 0x00;
   7033              imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00;
   7034              imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00;
   7035              imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00;
   7036              imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00;
   7037              imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00;
   7038              imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00;
   7039              imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00;
   7040          }
   7041          if ((cmode & 1) == 1 && op == 0) {
   7042             ULong imm8_7  = (imm8 >> 7) & 1;
   7043             ULong imm8_6  = (imm8 >> 6) & 1;
   7044             ULong imm8_50 = imm8 & 63;
   7045             ULong imm32 = (imm8_7                 << (1 + 5 + 6 + 19))
   7046                           | ((imm8_6 ^ 1)         << (5 + 6 + 19))
   7047                           | (Replicate(imm8_6, 5) << (6 + 19))
   7048                           | (imm8_50              << 19);
   7049             imm64 = Replicate32x2(imm32);
   7050          }
   7051          if ((cmode & 1) == 1 && op == 1) {
   7052             // imm64 = imm8<7>:NOT(imm8<6>)
   7053             //                :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
   7054             ULong imm8_7  = (imm8 >> 7) & 1;
   7055             ULong imm8_6  = (imm8 >> 6) & 1;
   7056             ULong imm8_50 = imm8 & 63;
   7057             imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62)
   7058                     | (Replicate(imm8_6, 8) << 54)
   7059                     | (imm8_50 << 48);
   7060          }
   7061          break;
   7062       default:
   7063         vassert(0);
   7064    }
   7065 
   7066    if (testimm8 && imm8 == 0)
   7067       return False;
   7068 
   7069    *res = imm64;
   7070    return True;
   7071 }
   7072 
   7073 /* Help a bit for decoding laneage for vector operations that can be
   7074    of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
   7075    and SZ bits, typically for vector floating point. */
   7076 static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI,  /*OUT*/IRType* tyF,
   7077                                /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper,
   7078                                /*OUT*/const HChar** arrSpec,
   7079                                Bool bitQ, Bool bitSZ )
   7080 {
   7081    vassert(bitQ == True || bitQ == False);
   7082    vassert(bitSZ == True || bitSZ == False);
   7083    if (bitQ && bitSZ) { // 2x64
   7084       if (tyI)       *tyI       = Ity_I64;
   7085       if (tyF)       *tyF       = Ity_F64;
   7086       if (nLanes)    *nLanes    = 2;
   7087       if (zeroUpper) *zeroUpper = False;
   7088       if (arrSpec)   *arrSpec   = "2d";
   7089       return True;
   7090    }
   7091    if (bitQ && !bitSZ) { // 4x32
   7092       if (tyI)       *tyI       = Ity_I32;
   7093       if (tyF)       *tyF       = Ity_F32;
   7094       if (nLanes)    *nLanes    = 4;
   7095       if (zeroUpper) *zeroUpper = False;
   7096       if (arrSpec)   *arrSpec   = "4s";
   7097       return True;
   7098    }
   7099    if (!bitQ && !bitSZ) { // 2x32
   7100       if (tyI)       *tyI       = Ity_I32;
   7101       if (tyF)       *tyF       = Ity_F32;
   7102       if (nLanes)    *nLanes    = 2;
   7103       if (zeroUpper) *zeroUpper = True;
   7104       if (arrSpec)   *arrSpec   = "2s";
   7105       return True;
   7106    }
   7107    // Else impliedly 1x64, which isn't allowed.
   7108    return False;
   7109 }
   7110 
   7111 /* Helper for decoding laneage for shift-style vector operations
   7112    that involve an immediate shift amount. */
   7113 static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2,
   7114                                     UInt immh, UInt immb )
   7115 {
   7116    vassert(immh < (1<<4));
   7117    vassert(immb < (1<<3));
   7118    UInt immhb = (immh << 3) | immb;
   7119    if (immh & 8) {
   7120       if (shift)  *shift  = 128 - immhb;
   7121       if (szBlg2) *szBlg2 = 3;
   7122       return True;
   7123    }
   7124    if (immh & 4) {
   7125       if (shift)  *shift  = 64 - immhb;
   7126       if (szBlg2) *szBlg2 = 2;
   7127       return True;
   7128    }
   7129    if (immh & 2) {
   7130       if (shift)  *shift  = 32 - immhb;
   7131       if (szBlg2) *szBlg2 = 1;
   7132       return True;
   7133    }
   7134    if (immh & 1) {
   7135       if (shift)  *shift  = 16 - immhb;
   7136       if (szBlg2) *szBlg2 = 0;
   7137       return True;
   7138    }
   7139    return False;
   7140 }
   7141 
   7142 /* Generate IR to fold all lanes of the V128 value in 'src' as
   7143    characterised by the operator 'op', and return the result in the
   7144    bottom bits of a V128, with all other bits set to zero. */
   7145 static IRTemp math_FOLDV ( IRTemp src, IROp op )
   7146 {
   7147    /* The basic idea is to use repeated applications of Iop_CatEven*
   7148       and Iop_CatOdd* operators to 'src' so as to clone each lane into
   7149       a complete vector.  Then fold all those vectors with 'op' and
   7150       zero out all but the least significant lane. */
   7151    switch (op) {
   7152       case Iop_Min8Sx16: case Iop_Min8Ux16:
   7153       case Iop_Max8Sx16: case Iop_Max8Ux16: case Iop_Add8x16: {
   7154          /* NB: temp naming here is misleading -- the naming is for 8
   7155             lanes of 16 bit, whereas what is being operated on is 16
   7156             lanes of 8 bits. */
   7157          IRTemp x76543210 = src;
   7158          IRTemp x76547654 = newTempV128();
   7159          IRTemp x32103210 = newTempV128();
   7160          assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
   7161          assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
   7162          IRTemp x76767676 = newTempV128();
   7163          IRTemp x54545454 = newTempV128();
   7164          IRTemp x32323232 = newTempV128();
   7165          IRTemp x10101010 = newTempV128();
   7166          assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
   7167          assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
   7168          assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
   7169          assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
   7170          IRTemp x77777777 = newTempV128();
   7171          IRTemp x66666666 = newTempV128();
   7172          IRTemp x55555555 = newTempV128();
   7173          IRTemp x44444444 = newTempV128();
   7174          IRTemp x33333333 = newTempV128();
   7175          IRTemp x22222222 = newTempV128();
   7176          IRTemp x11111111 = newTempV128();
   7177          IRTemp x00000000 = newTempV128();
   7178          assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
   7179          assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
   7180          assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
   7181          assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
   7182          assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
   7183          assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
   7184          assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
   7185          assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
   7186          /* Naming not misleading after here. */
   7187          IRTemp xAllF = newTempV128();
   7188          IRTemp xAllE = newTempV128();
   7189          IRTemp xAllD = newTempV128();
   7190          IRTemp xAllC = newTempV128();
   7191          IRTemp xAllB = newTempV128();
   7192          IRTemp xAllA = newTempV128();
   7193          IRTemp xAll9 = newTempV128();
   7194          IRTemp xAll8 = newTempV128();
   7195          IRTemp xAll7 = newTempV128();
   7196          IRTemp xAll6 = newTempV128();
   7197          IRTemp xAll5 = newTempV128();
   7198          IRTemp xAll4 = newTempV128();
   7199          IRTemp xAll3 = newTempV128();
   7200          IRTemp xAll2 = newTempV128();
   7201          IRTemp xAll1 = newTempV128();
   7202          IRTemp xAll0 = newTempV128();
   7203          assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777));
   7204          assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777));
   7205          assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666));
   7206          assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666));
   7207          assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555));
   7208          assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555));
   7209          assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444));
   7210          assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444));
   7211          assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333));
   7212          assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333));
   7213          assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222));
   7214          assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222));
   7215          assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111));
   7216          assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111));
   7217          assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000));
   7218          assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000));
   7219          IRTemp maxFE = newTempV128();
   7220          IRTemp maxDC = newTempV128();
   7221          IRTemp maxBA = newTempV128();
   7222          IRTemp max98 = newTempV128();
   7223          IRTemp max76 = newTempV128();
   7224          IRTemp max54 = newTempV128();
   7225          IRTemp max32 = newTempV128();
   7226          IRTemp max10 = newTempV128();
   7227          assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE)));
   7228          assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC)));
   7229          assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA)));
   7230          assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8)));
   7231          assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6)));
   7232          assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4)));
   7233          assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2)));
   7234          assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0)));
   7235          IRTemp maxFEDC = newTempV128();
   7236          IRTemp maxBA98 = newTempV128();
   7237          IRTemp max7654 = newTempV128();
   7238          IRTemp max3210 = newTempV128();
   7239          assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC)));
   7240          assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98)));
   7241          assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
   7242          assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
   7243          IRTemp maxFEDCBA98 = newTempV128();
   7244          IRTemp max76543210 = newTempV128();
   7245          assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98)));
   7246          assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
   7247          IRTemp maxAllLanes = newTempV128();
   7248          assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98),
   7249                                        mkexpr(max76543210)));
   7250          IRTemp res = newTempV128();
   7251          assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes)));
   7252          return res;
   7253       }
   7254       case Iop_Min16Sx8: case Iop_Min16Ux8:
   7255       case Iop_Max16Sx8: case Iop_Max16Ux8: case Iop_Add16x8: {
   7256          IRTemp x76543210 = src;
   7257          IRTemp x76547654 = newTempV128();
   7258          IRTemp x32103210 = newTempV128();
   7259          assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
   7260          assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
   7261          IRTemp x76767676 = newTempV128();
   7262          IRTemp x54545454 = newTempV128();
   7263          IRTemp x32323232 = newTempV128();
   7264          IRTemp x10101010 = newTempV128();
   7265          assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
   7266          assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
   7267          assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
   7268          assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
   7269          IRTemp x77777777 = newTempV128();
   7270          IRTemp x66666666 = newTempV128();
   7271          IRTemp x55555555 = newTempV128();
   7272          IRTemp x44444444 = newTempV128();
   7273          IRTemp x33333333 = newTempV128();
   7274          IRTemp x22222222 = newTempV128();
   7275          IRTemp x11111111 = newTempV128();
   7276          IRTemp x00000000 = newTempV128();
   7277          assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
   7278          assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
   7279          assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
   7280          assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
   7281          assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
   7282          assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
   7283          assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
   7284          assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
   7285          IRTemp max76 = newTempV128();
   7286          IRTemp max54 = newTempV128();
   7287          IRTemp max32 = newTempV128();
   7288          IRTemp max10 = newTempV128();
   7289          assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
   7290          assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
   7291          assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
   7292          assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
   7293          IRTemp max7654 = newTempV128();
   7294          IRTemp max3210 = newTempV128();
   7295          assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
   7296          assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
   7297          IRTemp max76543210 = newTempV128();
   7298          assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
   7299          IRTemp res = newTempV128();
   7300          assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
   7301          return res;
   7302       }
   7303       case Iop_Max32Fx4: case Iop_Min32Fx4:
   7304       case Iop_Min32Sx4: case Iop_Min32Ux4:
   7305       case Iop_Max32Sx4: case Iop_Max32Ux4: case Iop_Add32x4: {
   7306          IRTemp x3210 = src;
   7307          IRTemp x3232 = newTempV128();
   7308          IRTemp x1010 = newTempV128();
   7309          assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
   7310          assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
   7311          IRTemp x3333 = newTempV128();
   7312          IRTemp x2222 = newTempV128();
   7313          IRTemp x1111 = newTempV128();
   7314          IRTemp x0000 = newTempV128();
   7315          assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
   7316          assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
   7317          assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
   7318          assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
   7319          IRTemp max32 = newTempV128();
   7320          IRTemp max10 = newTempV128();
   7321          assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
   7322          assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
   7323          IRTemp max3210 = newTempV128();
   7324          assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
   7325          IRTemp res = newTempV128();
   7326          assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
   7327          return res;
   7328       }
   7329       case Iop_Add64x2: {
   7330          IRTemp x10 = src;
   7331          IRTemp x00 = newTempV128();
   7332          IRTemp x11 = newTempV128();
   7333          assign(x11, binop(Iop_InterleaveHI64x2, mkexpr(x10), mkexpr(x10)));
   7334          assign(x00, binop(Iop_InterleaveLO64x2, mkexpr(x10), mkexpr(x10)));
   7335          IRTemp max10 = newTempV128();
   7336          assign(max10, binop(op, mkexpr(x11), mkexpr(x00)));
   7337          IRTemp res = newTempV128();
   7338          assign(res, unop(Iop_ZeroHI64ofV128, mkexpr(max10)));
   7339          return res;
   7340       }
   7341       default:
   7342          vassert(0);
   7343    }
   7344 }
   7345 
   7346 
   7347 /* Generate IR for TBL and TBX.  This deals with the 128 bit case
   7348    only. */
   7349 static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
   7350                              IRTemp oor_values )
   7351 {
   7352    vassert(len >= 0 && len <= 3);
   7353 
   7354    /* Generate some useful constants as concisely as possible. */
   7355    IRTemp half15 = newTemp(Ity_I64);
   7356    assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
   7357    IRTemp half16 = newTemp(Ity_I64);
   7358    assign(half16, mkU64(0x1010101010101010ULL));
   7359 
   7360    /* A zero vector */
   7361    IRTemp allZero = newTempV128();
   7362    assign(allZero, mkV128(0x0000));
   7363    /* A vector containing 15 in each 8-bit lane */
   7364    IRTemp all15 = newTempV128();
   7365    assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
   7366    /* A vector containing 16 in each 8-bit lane */
   7367    IRTemp all16 = newTempV128();
   7368    assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
   7369    /* A vector containing 32 in each 8-bit lane */
   7370    IRTemp all32 = newTempV128();
   7371    assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
   7372    /* A vector containing 48 in each 8-bit lane */
   7373    IRTemp all48 = newTempV128();
   7374    assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
   7375    /* A vector containing 64 in each 8-bit lane */
   7376    IRTemp all64 = newTempV128();
   7377    assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
   7378 
   7379    /* Group the 16/32/48/64 vectors so as to be indexable. */
   7380    IRTemp allXX[4] = { all16, all32, all48, all64 };
   7381 
   7382    /* Compute the result for each table vector, with zeroes in places
   7383       where the index values are out of range, and OR them into the
   7384       running vector. */
   7385    IRTemp running_result = newTempV128();
   7386    assign(running_result, mkV128(0));
   7387 
   7388    UInt tabent;
   7389    for (tabent = 0; tabent <= len; tabent++) {
   7390       vassert(tabent >= 0 && tabent < 4);
   7391       IRTemp bias = newTempV128();
   7392       assign(bias,
   7393              mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
   7394       IRTemp biased_indices = newTempV128();
   7395       assign(biased_indices,
   7396              binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
   7397       IRTemp valid_mask = newTempV128();
   7398       assign(valid_mask,
   7399              binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
   7400       IRTemp safe_biased_indices = newTempV128();
   7401       assign(safe_biased_indices,
   7402              binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
   7403       IRTemp results_or_junk = newTempV128();
   7404       assign(results_or_junk,
   7405              binop(Iop_Perm8x16, mkexpr(tab[tabent]),
   7406                                  mkexpr(safe_biased_indices)));
   7407       IRTemp results_or_zero = newTempV128();
   7408       assign(results_or_zero,
   7409              binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
   7410       /* And OR that into the running result. */
   7411       IRTemp tmp = newTempV128();
   7412       assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
   7413                         mkexpr(running_result)));
   7414       running_result = tmp;
   7415    }
   7416 
   7417    /* So now running_result holds the overall result where the indices
   7418       are in range, and zero in out-of-range lanes.  Now we need to
   7419       compute an overall validity mask and use this to copy in the
   7420       lanes in the oor_values for out of range indices.  This is
   7421       unnecessary for TBL but will get folded out by iropt, so we lean
   7422       on that and generate the same code for TBL and TBX here. */
   7423    IRTemp overall_valid_mask = newTempV128();
   7424    assign(overall_valid_mask,
   7425           binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
   7426    IRTemp result = newTempV128();
   7427    assign(result,
   7428           binop(Iop_OrV128,
   7429                 mkexpr(running_result),
   7430                 binop(Iop_AndV128,
   7431                       mkexpr(oor_values),
   7432                       unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
   7433    return result;
   7434 }
   7435 
   7436 
   7437 /* Let |argL| and |argR| be V128 values, and let |opI64x2toV128| be
   7438    an op which takes two I64s and produces a V128.  That is, a widening
   7439    operator.  Generate IR which applies |opI64x2toV128| to either the
   7440    lower (if |is2| is False) or upper (if |is2| is True) halves of
   7441    |argL| and |argR|, and return the value in a new IRTemp.
   7442 */
   7443 static
   7444 IRTemp math_BINARY_WIDENING_V128 ( Bool is2, IROp opI64x2toV128,
   7445                                    IRExpr* argL, IRExpr* argR )
   7446 {
   7447    IRTemp res   = newTempV128();
   7448    IROp   slice = is2 ? Iop_V128HIto64 : Iop_V128to64;
   7449    assign(res, binop(opI64x2toV128, unop(slice, argL),
   7450                                     unop(slice, argR)));
   7451    return res;
   7452 }
   7453 
   7454 
   7455 /* Generate signed/unsigned absolute difference vector IR. */
   7456 static
   7457 IRTemp math_ABD ( Bool isU, UInt size, IRExpr* argLE, IRExpr* argRE )
   7458 {
   7459    vassert(size <= 3);
   7460    IRTemp argL = newTempV128();
   7461    IRTemp argR = newTempV128();
   7462    IRTemp msk  = newTempV128();
   7463    IRTemp res  = newTempV128();
   7464    assign(argL, argLE);
   7465    assign(argR, argRE);
   7466    assign(msk, binop(isU ? mkVecCMPGTU(size) : mkVecCMPGTS(size),
   7467                      mkexpr(argL), mkexpr(argR)));
   7468    assign(res,
   7469           binop(Iop_OrV128,
   7470                 binop(Iop_AndV128,
   7471                       binop(mkVecSUB(size), mkexpr(argL), mkexpr(argR)),
   7472                       mkexpr(msk)),
   7473                 binop(Iop_AndV128,
   7474                       binop(mkVecSUB(size), mkexpr(argR), mkexpr(argL)),
   7475                       unop(Iop_NotV128, mkexpr(msk)))));
   7476    return res;
   7477 }
   7478 
   7479 
   7480 /* Generate IR that takes a V128 and sign- or zero-widens
   7481    either the lower or upper set of lanes to twice-as-wide,
   7482    resulting in a new V128 value. */
   7483 static
   7484 IRTemp math_WIDEN_LO_OR_HI_LANES ( Bool zWiden, Bool fromUpperHalf,
   7485                                    UInt sizeNarrow, IRExpr* srcE )
   7486 {
   7487    IRTemp src = newTempV128();
   7488    IRTemp res = newTempV128();
   7489    assign(src, srcE);
   7490    switch (sizeNarrow) {
   7491       case X10:
   7492          assign(res,
   7493                 binop(zWiden ? Iop_ShrN64x2 : Iop_SarN64x2,
   7494                       binop(fromUpperHalf ? Iop_InterleaveHI32x4
   7495                                           : Iop_InterleaveLO32x4,
   7496                             mkexpr(src),
   7497                             mkexpr(src)),
   7498                       mkU8(32)));
   7499          break;
   7500       case X01:
   7501          assign(res,
   7502                 binop(zWiden ? Iop_ShrN32x4 : Iop_SarN32x4,
   7503                       binop(fromUpperHalf ? Iop_InterleaveHI16x8
   7504                                           : Iop_InterleaveLO16x8,
   7505                             mkexpr(src),
   7506                             mkexpr(src)),
   7507                       mkU8(16)));
   7508          break;
   7509       case X00:
   7510          assign(res,
   7511                 binop(zWiden ? Iop_ShrN16x8 : Iop_SarN16x8,
   7512                       binop(fromUpperHalf ? Iop_InterleaveHI8x16
   7513                                           : Iop_InterleaveLO8x16,
   7514                             mkexpr(src),
   7515                             mkexpr(src)),
   7516                       mkU8(8)));
   7517          break;
   7518       default:
   7519          vassert(0);
   7520    }
   7521    return res;
   7522 }
   7523 
   7524 
   7525 /* Generate IR that takes a V128 and sign- or zero-widens
   7526    either the even or odd lanes to twice-as-wide,
   7527    resulting in a new V128 value. */
   7528 static
   7529 IRTemp math_WIDEN_EVEN_OR_ODD_LANES ( Bool zWiden, Bool fromOdd,
   7530                                       UInt sizeNarrow, IRExpr* srcE )
   7531 {
   7532    IRTemp src   = newTempV128();
   7533    IRTemp res   = newTempV128();
   7534    IROp   opSAR = mkVecSARN(sizeNarrow+1);
   7535    IROp   opSHR = mkVecSHRN(sizeNarrow+1);
   7536    IROp   opSHL = mkVecSHLN(sizeNarrow+1);
   7537    IROp   opSxR = zWiden ? opSHR : opSAR;
   7538    UInt   amt   = 0;
   7539    switch (sizeNarrow) {
   7540       case X10: amt = 32; break;
   7541       case X01: amt = 16; break;
   7542       case X00: amt = 8;  break;
   7543       default: vassert(0);
   7544    }
   7545    assign(src, srcE);
   7546    if (fromOdd) {
   7547       assign(res, binop(opSxR, mkexpr(src), mkU8(amt)));
   7548    } else {
   7549       assign(res, binop(opSxR, binop(opSHL, mkexpr(src), mkU8(amt)),
   7550                                mkU8(amt)));
   7551    }
   7552    return res;
   7553 }
   7554 
   7555 
   7556 /* Generate IR that takes two V128s and narrows (takes lower half)
   7557    of each lane, producing a single V128 value. */
   7558 static
   7559 IRTemp math_NARROW_LANES ( IRTemp argHi, IRTemp argLo, UInt sizeNarrow )
   7560 {
   7561    IRTemp res = newTempV128();
   7562    assign(res, binop(mkVecCATEVENLANES(sizeNarrow),
   7563                      mkexpr(argHi), mkexpr(argLo)));
   7564    return res;
   7565 }
   7566 
   7567 
   7568 /* Return a temp which holds the vector dup of the lane of width
   7569    (1 << size) obtained from src[laneNo]. */
   7570 static
   7571 IRTemp math_DUP_VEC_ELEM ( IRExpr* src, UInt size, UInt laneNo )
   7572 {
   7573    vassert(size <= 3);
   7574    /* Normalise |laneNo| so it is of the form
   7575       x000 for D, xx00 for S, xxx0 for H, and xxxx for B.
   7576       This puts the bits we want to inspect at constant offsets
   7577       regardless of the value of |size|.
   7578    */
   7579    UInt ix = laneNo << size;
   7580    vassert(ix <= 15);
   7581    IROp ops[4] = { Iop_INVALID, Iop_INVALID, Iop_INVALID, Iop_INVALID };
   7582    switch (size) {
   7583       case 0: /* B */
   7584          ops[0] = (ix & 1) ? Iop_CatOddLanes8x16 : Iop_CatEvenLanes8x16;
   7585          /* fallthrough */
   7586       case 1: /* H */
   7587          ops[1] = (ix & 2) ? Iop_CatOddLanes16x8 : Iop_CatEvenLanes16x8;
   7588          /* fallthrough */
   7589       case 2: /* S */
   7590          ops[2] = (ix & 4) ? Iop_CatOddLanes32x4 : Iop_CatEvenLanes32x4;
   7591          /* fallthrough */
   7592       case 3: /* D */
   7593          ops[3] = (ix & 8) ? Iop_InterleaveHI64x2 : Iop_InterleaveLO64x2;
   7594          break;
   7595       default:
   7596          vassert(0);
   7597    }
   7598    IRTemp res = newTempV128();
   7599    assign(res, src);
   7600    Int i;
   7601    for (i = 3; i >= 0; i--) {
   7602       if (ops[i] == Iop_INVALID)
   7603          break;
   7604       IRTemp tmp = newTempV128();
   7605       assign(tmp, binop(ops[i], mkexpr(res), mkexpr(res)));
   7606       res = tmp;
   7607    }
   7608    return res;
   7609 }
   7610 
   7611 
   7612 /* Let |srcV| be a V128 value, and let |imm5| be a lane-and-size
   7613    selector encoded as shown below.  Return a new V128 holding the
   7614    selected lane from |srcV| dup'd out to V128, and also return the
   7615    lane number, log2 of the lane size in bytes, and width-character via
   7616    *laneNo, *laneSzLg2 and *laneCh respectively.  It may be that imm5
   7617    is an invalid selector, in which case return
   7618    IRTemp_INVALID, 0, 0 and '?' respectively.
   7619 
   7620    imm5 = xxxx1   signifies .b[xxxx]
   7621         = xxx10   .h[xxx]
   7622         = xx100   .s[xx]
   7623         = x1000   .d[x]
   7624         otherwise invalid
   7625 */
   7626 static
   7627 IRTemp handle_DUP_VEC_ELEM ( /*OUT*/UInt* laneNo,
   7628                              /*OUT*/UInt* laneSzLg2, /*OUT*/HChar* laneCh,
   7629                              IRExpr* srcV, UInt imm5 )
   7630 {
   7631    *laneNo    = 0;
   7632    *laneSzLg2 = 0;
   7633    *laneCh    = '?';
   7634 
   7635    if (imm5 & 1) {
   7636       *laneNo    = (imm5 >> 1) & 15;
   7637       *laneSzLg2 = 0;
   7638       *laneCh    = 'b';
   7639    }
   7640    else if (imm5 & 2) {
   7641       *laneNo    = (imm5 >> 2) & 7;
   7642       *laneSzLg2 = 1;
   7643       *laneCh    = 'h';
   7644    }
   7645    else if (imm5 & 4) {
   7646       *laneNo    = (imm5 >> 3) & 3;
   7647       *laneSzLg2 = 2;
   7648       *laneCh    = 's';
   7649    }
   7650    else if (imm5 & 8) {
   7651       *laneNo    = (imm5 >> 4) & 1;
   7652       *laneSzLg2 = 3;
   7653       *laneCh    = 'd';
   7654    }
   7655    else {
   7656       /* invalid */
   7657       return IRTemp_INVALID;
   7658    }
   7659 
   7660    return math_DUP_VEC_ELEM(srcV, *laneSzLg2, *laneNo);
   7661 }
   7662 
   7663 
   7664 /* Clone |imm| to every lane of a V128, with lane size log2 of |size|. */
   7665 static
   7666 IRTemp math_VEC_DUP_IMM ( UInt size, ULong imm )
   7667 {
   7668    IRType ty  = Ity_INVALID;
   7669    IRTemp rcS = IRTemp_INVALID;
   7670    switch (size) {
   7671       case X01:
   7672          vassert(imm <= 0xFFFFULL);
   7673          ty  = Ity_I16;
   7674          rcS = newTemp(ty); assign(rcS, mkU16( (UShort)imm ));
   7675          break;
   7676       case X10:
   7677          vassert(imm <= 0xFFFFFFFFULL);
   7678          ty  = Ity_I32;
   7679          rcS = newTemp(ty); assign(rcS, mkU32( (UInt)imm ));
   7680          break;
   7681       case X11:
   7682          ty  = Ity_I64;
   7683          rcS = newTemp(ty); assign(rcS, mkU64(imm)); break;
   7684       default:
   7685          vassert(0);
   7686    }
   7687    IRTemp rcV = math_DUP_TO_V128(rcS, ty);
   7688    return rcV;
   7689 }
   7690 
   7691 
   7692 /* Let |new64| be a V128 in which only the lower 64 bits are interesting,
   7693    and the upper can contain any value -- it is ignored.  If |is2| is False,
   7694    generate IR to put |new64| in the lower half of vector reg |dd| and zero
   7695    the upper half.  If |is2| is True, generate IR to put |new64| in the upper
   7696    half of vector reg |dd| and leave the lower half unchanged.  This
   7697    simulates the behaviour of the "foo/foo2" instructions in which the
   7698    destination is half the width of sources, for example addhn/addhn2.
   7699 */
   7700 static
   7701 void putLO64andZUorPutHI64 ( Bool is2, UInt dd, IRTemp new64 )
   7702 {
   7703    if (is2) {
   7704       /* Get the old contents of Vdd, zero the upper half, and replace
   7705          it with 'x'. */
   7706       IRTemp t_zero_oldLO = newTempV128();
   7707       assign(t_zero_oldLO, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
   7708       IRTemp t_newHI_zero = newTempV128();
   7709       assign(t_newHI_zero, binop(Iop_InterleaveLO64x2, mkexpr(new64),
   7710                                                        mkV128(0x0000)));
   7711       IRTemp res = newTempV128();
   7712       assign(res, binop(Iop_OrV128, mkexpr(t_zero_oldLO),
   7713                                     mkexpr(t_newHI_zero)));
   7714       putQReg128(dd, mkexpr(res));
   7715    } else {
   7716       /* This is simple. */
   7717       putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(new64)));
   7718    }
   7719 }
   7720 
   7721 
   7722 /* Compute vector SQABS at lane size |size| for |srcE|, returning
   7723    the q result in |*qabs| and the normal result in |*nabs|. */
   7724 static
   7725 void math_SQABS ( /*OUT*/IRTemp* qabs, /*OUT*/IRTemp* nabs,
   7726                   IRExpr* srcE, UInt size )
   7727 {
   7728       IRTemp src, mask, maskn, nsub, qsub;
   7729       src = mask = maskn = nsub = qsub = IRTemp_INVALID;
   7730       newTempsV128_7(&src, &mask, &maskn, &nsub, &qsub, nabs, qabs);
   7731       assign(src,   srcE);
   7732       assign(mask,  binop(mkVecCMPGTS(size),  mkV128(0x0000), mkexpr(src)));
   7733       assign(maskn, unop(Iop_NotV128, mkexpr(mask)));
   7734       assign(nsub,  binop(mkVecSUB(size),   mkV128(0x0000), mkexpr(src)));
   7735       assign(qsub,  binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
   7736       assign(*nabs, binop(Iop_OrV128,
   7737                           binop(Iop_AndV128, mkexpr(nsub), mkexpr(mask)),
   7738                           binop(Iop_AndV128, mkexpr(src),  mkexpr(maskn))));
   7739       assign(*qabs, binop(Iop_OrV128,
   7740                           binop(Iop_AndV128, mkexpr(qsub), mkexpr(mask)),
   7741                           binop(Iop_AndV128, mkexpr(src),  mkexpr(maskn))));
   7742 }
   7743 
   7744 
   7745 /* Compute vector SQNEG at lane size |size| for |srcE|, returning
   7746    the q result in |*qneg| and the normal result in |*nneg|. */
   7747 static
   7748 void math_SQNEG ( /*OUT*/IRTemp* qneg, /*OUT*/IRTemp* nneg,
   7749                   IRExpr* srcE, UInt size )
   7750 {
   7751       IRTemp src = IRTemp_INVALID;
   7752       newTempsV128_3(&src, nneg, qneg);
   7753       assign(src,   srcE);
   7754       assign(*nneg, binop(mkVecSUB(size),   mkV128(0x0000), mkexpr(src)));
   7755       assign(*qneg, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
   7756 }
   7757 
   7758 
   7759 /* Zero all except the least significant lane of |srcE|, where |size|
   7760    indicates the lane size in the usual way. */
   7761 static IRTemp math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size, IRExpr* srcE )
   7762 {
   7763    vassert(size < 4);
   7764    IRTemp t = newTempV128();
   7765    assign(t, unop(mkVecZEROHIxxOFV128(size), srcE));
   7766    return t;
   7767 }
   7768 
   7769 
   7770 /* Generate IR to compute vector widening MULL from either the lower
   7771    (is2==False) or upper (is2==True) halves of vecN and vecM.  The
   7772    widening multiplies are unsigned when isU==True and signed when
   7773    isU==False.  |size| is the narrow lane size indication.  Optionally,
   7774    the product may be added to or subtracted from vecD, at the wide lane
   7775    size.  This happens when |mas| is 'a' (add) or 's' (sub).  When |mas|
   7776    is 'm' (only multiply) then the accumulate part does not happen, and
   7777    |vecD| is expected to == IRTemp_INVALID.
   7778 
   7779    Only size==0 (h_b_b), size==1 (s_h_h) and size==2 (d_s_s) variants
   7780    are allowed.  The result is returned in a new IRTemp, which is
   7781    returned in *res. */
   7782 static
   7783 void math_MULL_ACC ( /*OUT*/IRTemp* res,
   7784                      Bool is2, Bool isU, UInt size, HChar mas,
   7785                      IRTemp vecN, IRTemp vecM, IRTemp vecD )
   7786 {
   7787    vassert(res && *res == IRTemp_INVALID);
   7788    vassert(size <= 2);
   7789    vassert(mas == 'm' || mas == 'a' || mas == 's');
   7790    if (mas == 'm') vassert(vecD == IRTemp_INVALID);
   7791    IROp   mulOp = isU ? mkVecMULLU(size) : mkVecMULLS(size);
   7792    IROp   accOp = (mas == 'a') ? mkVecADD(size+1)
   7793                   : (mas == 's' ? mkVecSUB(size+1)
   7794                   : Iop_INVALID);
   7795    IRTemp mul   = math_BINARY_WIDENING_V128(is2, mulOp,
   7796                                             mkexpr(vecN), mkexpr(vecM));
   7797    *res = newTempV128();
   7798    assign(*res, mas == 'm' ? mkexpr(mul)
   7799                            : binop(accOp, mkexpr(vecD), mkexpr(mul)));
   7800 }
   7801 
   7802 
   7803 /* Same as math_MULL_ACC, except the multiply is signed widening,
   7804    the multiplied value is then doubled, before being added to or
   7805    subtracted from the accumulated value.  And everything is
   7806    saturated.  In all cases, saturation residuals are returned
   7807    via (sat1q, sat1n), and in the accumulate cases,
   7808    via (sat2q, sat2n) too.  All results are returned in new temporaries.
   7809    In the no-accumulate case, *sat2q and *sat2n are never instantiated,
   7810    so the caller can tell this has happened. */
   7811 static
   7812 void math_SQDMULL_ACC ( /*OUT*/IRTemp* res,
   7813                         /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
   7814                         /*OUT*/IRTemp* sat2q, /*OUT*/IRTemp* sat2n,
   7815                         Bool is2, UInt size, HChar mas,
   7816                         IRTemp vecN, IRTemp vecM, IRTemp vecD )
   7817 {
   7818    vassert(size <= 2);
   7819    vassert(mas == 'm' || mas == 'a' || mas == 's');
   7820    /* Compute
   7821          sat1q = vecN.D[is2] *sq vecM.d[is2] *q 2
   7822          sat1n = vecN.D[is2] *s  vecM.d[is2] *  2
   7823       IOW take either the low or high halves of vecN and vecM, signed widen,
   7824       multiply, double that, and signedly saturate.  Also compute the same
   7825       but without saturation.
   7826    */
   7827    vassert(sat2q && *sat2q == IRTemp_INVALID);
   7828    vassert(sat2n && *sat2n == IRTemp_INVALID);
   7829    newTempsV128_3(sat1q, sat1n, res);
   7830    IRTemp tq = math_BINARY_WIDENING_V128(is2, mkVecQDMULLS(size),
   7831                                          mkexpr(vecN), mkexpr(vecM));
   7832    IRTemp tn = math_BINARY_WIDENING_V128(is2, mkVecMULLS(size),
   7833                                          mkexpr(vecN), mkexpr(vecM));
   7834    assign(*sat1q, mkexpr(tq));
   7835    assign(*sat1n, binop(mkVecADD(size+1), mkexpr(tn), mkexpr(tn)));
   7836 
   7837    /* If there is no accumulation, the final result is sat1q,
   7838       and there's no assignment to sat2q or sat2n. */
   7839    if (mas == 'm') {
   7840       assign(*res, mkexpr(*sat1q));
   7841       return;
   7842    }
   7843 
   7844    /* Compute
   7845          sat2q  = vecD +sq/-sq sat1q
   7846          sat2n  = vecD +/-     sat1n
   7847          result = sat2q
   7848    */
   7849    newTempsV128_2(sat2q, sat2n);
   7850    assign(*sat2q, binop(mas == 'a' ? mkVecQADDS(size+1) : mkVecQSUBS(size+1),
   7851                         mkexpr(vecD), mkexpr(*sat1q)));
   7852    assign(*sat2n, binop(mas == 'a' ? mkVecADD(size+1) : mkVecSUB(size+1),
   7853                         mkexpr(vecD), mkexpr(*sat1n)));
   7854    assign(*res, mkexpr(*sat2q));
   7855 }
   7856 
   7857 
   7858 /* Generate IR for widening signed vector multiplies.  The operands
   7859    have their lane width signedly widened, and they are then multiplied
   7860    at the wider width, returning results in two new IRTemps. */
   7861 static
   7862 void math_MULLS ( /*OUT*/IRTemp* resHI, /*OUT*/IRTemp* resLO,
   7863                   UInt sizeNarrow, IRTemp argL, IRTemp argR )
   7864 {
   7865    vassert(sizeNarrow <= 2);
   7866    newTempsV128_2(resHI, resLO);
   7867    IRTemp argLhi = newTemp(Ity_I64);
   7868    IRTemp argLlo = newTemp(Ity_I64);
   7869    IRTemp argRhi = newTemp(Ity_I64);
   7870    IRTemp argRlo = newTemp(Ity_I64);
   7871    assign(argLhi, unop(Iop_V128HIto64, mkexpr(argL)));
   7872    assign(argLlo, unop(Iop_V128to64,   mkexpr(argL)));
   7873    assign(argRhi, unop(Iop_V128HIto64, mkexpr(argR)));
   7874    assign(argRlo, unop(Iop_V128to64,   mkexpr(argR)));
   7875    IROp opMulls = mkVecMULLS(sizeNarrow);
   7876    assign(*resHI, binop(opMulls, mkexpr(argLhi), mkexpr(argRhi)));
   7877    assign(*resLO, binop(opMulls, mkexpr(argLlo), mkexpr(argRlo)));
   7878 }
   7879 
   7880 
   7881 /* Generate IR for SQDMULH and SQRDMULH: signedly wideningly multiply,
   7882    double that, possibly add a rounding constant (R variants), and take
   7883    the high half. */
   7884 static
   7885 void math_SQDMULH ( /*OUT*/IRTemp* res,
   7886                     /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
   7887                     Bool isR, UInt size, IRTemp vN, IRTemp vM )
   7888 {
   7889    vassert(size == X01 || size == X10); /* s or h only */
   7890 
   7891    newTempsV128_3(res, sat1q, sat1n);
   7892 
   7893    IRTemp mullsHI = IRTemp_INVALID, mullsLO = IRTemp_INVALID;
   7894    math_MULLS(&mullsHI, &mullsLO, size, vN, vM);
   7895 
   7896    IRTemp addWide = mkVecADD(size+1);
   7897 
   7898    if (isR) {
   7899       assign(*sat1q, binop(mkVecQRDMULHIS(size), mkexpr(vN), mkexpr(vM)));
   7900 
   7901       Int    rcShift    = size == X01 ? 15 : 31;
   7902       IRTemp roundConst = math_VEC_DUP_IMM(size+1, 1ULL << rcShift);
   7903       assign(*sat1n,
   7904              binop(mkVecCATODDLANES(size),
   7905                    binop(addWide,
   7906                          binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
   7907                          mkexpr(roundConst)),
   7908                    binop(addWide,
   7909                          binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO)),
   7910                          mkexpr(roundConst))));
   7911    } else {
   7912       assign(*sat1q, binop(mkVecQDMULHIS(size), mkexpr(vN), mkexpr(vM)));
   7913 
   7914       assign(*sat1n,
   7915              binop(mkVecCATODDLANES(size),
   7916                    binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
   7917                    binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO))));
   7918    }
   7919 
   7920    assign(*res, mkexpr(*sat1q));
   7921 }
   7922 
   7923 
   7924 /* Generate IR for SQSHL, UQSHL, SQSHLU by imm.  Put the result in
   7925    a new temp in *res, and the Q difference pair in new temps in
   7926    *qDiff1 and *qDiff2 respectively.  |nm| denotes which of the
   7927    three operations it is. */
   7928 static
   7929 void math_QSHL_IMM ( /*OUT*/IRTemp* res,
   7930                      /*OUT*/IRTemp* qDiff1, /*OUT*/IRTemp* qDiff2,
   7931                      IRTemp src, UInt size, UInt shift, const HChar* nm )
   7932 {
   7933    vassert(size <= 3);
   7934    UInt laneBits = 8 << size;
   7935    vassert(shift < laneBits);
   7936    newTempsV128_3(res, qDiff1, qDiff2);
   7937    IRTemp z128 = newTempV128();
   7938    assign(z128, mkV128(0x0000));
   7939 
   7940    /* UQSHL */
   7941    if (vex_streq(nm, "uqshl")) {
   7942       IROp qop = mkVecQSHLNSATUU(size);
   7943       assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
   7944       if (shift == 0) {
   7945          /* No shift means no saturation. */
   7946          assign(*qDiff1, mkexpr(z128));
   7947          assign(*qDiff2, mkexpr(z128));
   7948       } else {
   7949          /* Saturation has occurred if any of the shifted-out bits are
   7950             nonzero.  We get the shifted-out bits by right-shifting the
   7951             original value. */
   7952          UInt rshift = laneBits - shift;
   7953          vassert(rshift >= 1 && rshift < laneBits);
   7954          assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
   7955          assign(*qDiff2, mkexpr(z128));
   7956       }
   7957       return;
   7958    }
   7959 
   7960    /* SQSHL */
   7961    if (vex_streq(nm, "sqshl")) {
   7962       IROp qop = mkVecQSHLNSATSS(size);
   7963       assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
   7964       if (shift == 0) {
   7965          /* No shift means no saturation. */
   7966          assign(*qDiff1, mkexpr(z128));
   7967          assign(*qDiff2, mkexpr(z128));
   7968       } else {
   7969          /* Saturation has occurred if any of the shifted-out bits are
   7970             different from the top bit of the original value. */
   7971          UInt rshift = laneBits - 1 - shift;
   7972          vassert(rshift >= 0 && rshift < laneBits-1);
   7973          /* qDiff1 is the shifted out bits, and the top bit of the original
   7974             value, preceded by zeroes. */
   7975          assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
   7976          /* qDiff2 is the top bit of the original value, cloned the
   7977             correct number of times. */
   7978          assign(*qDiff2, binop(mkVecSHRN(size),
   7979                                binop(mkVecSARN(size), mkexpr(src),
   7980                                                       mkU8(laneBits-1)),
   7981                                mkU8(rshift)));
   7982          /* This also succeeds in comparing the top bit of the original
   7983             value to itself, which is a bit stupid, but not wrong. */
   7984       }
   7985       return;
   7986    }
   7987 
   7988    /* SQSHLU */
   7989    if (vex_streq(nm, "sqshlu")) {
   7990       IROp qop = mkVecQSHLNSATSU(size);
   7991       assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
   7992       if (shift == 0) {
   7993          /* If there's no shift, saturation depends on the top bit
   7994             of the source. */
   7995          assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(laneBits-1)));
   7996          assign(*qDiff2, mkexpr(z128));
   7997       } else {
   7998          /* Saturation has occurred if any of the shifted-out bits are
   7999             nonzero.  We get the shifted-out bits by right-shifting the
   8000             original value. */
   8001          UInt rshift = laneBits - shift;
   8002          vassert(rshift >= 1 && rshift < laneBits);
   8003          assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
   8004          assign(*qDiff2, mkexpr(z128));
   8005       }
   8006       return;
   8007    }
   8008 
   8009    vassert(0);
   8010 }
   8011 
   8012 
   8013 /* Generate IR to do SRHADD and URHADD. */
   8014 static
   8015 IRTemp math_RHADD ( UInt size, Bool isU, IRTemp aa, IRTemp bb )
   8016 {
   8017    /* Generate this:
   8018       (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1)
   8019    */
   8020    vassert(size <= 3);
   8021    IROp opSHR = isU ? mkVecSHRN(size) : mkVecSARN(size);
   8022    IROp opADD = mkVecADD(size);
   8023    /* The only tricky bit is to generate the correct vector 1 constant. */
   8024    const ULong ones64[4]
   8025       = { 0x0101010101010101ULL, 0x0001000100010001ULL,
   8026           0x0000000100000001ULL, 0x0000000000000001ULL };
   8027    IRTemp imm64 = newTemp(Ity_I64);
   8028    assign(imm64, mkU64(ones64[size]));
   8029    IRTemp vecOne = newTempV128();
   8030    assign(vecOne, binop(Iop_64HLtoV128, mkexpr(imm64), mkexpr(imm64)));
   8031    IRTemp scaOne = newTemp(Ity_I8);
   8032    assign(scaOne, mkU8(1));
   8033    IRTemp res = newTempV128();
   8034    assign(res,
   8035           binop(opADD,
   8036                 binop(opSHR, mkexpr(aa), mkexpr(scaOne)),
   8037                 binop(opADD,
   8038                       binop(opSHR, mkexpr(bb), mkexpr(scaOne)),
   8039                       binop(opSHR,
   8040                             binop(opADD,
   8041                                   binop(opADD,
   8042                                         binop(Iop_AndV128, mkexpr(aa),
   8043                                                            mkexpr(vecOne)),
   8044                                         binop(Iop_AndV128, mkexpr(bb),
   8045                                                            mkexpr(vecOne))
   8046                                   ),
   8047                                   mkexpr(vecOne)
   8048                             ),
   8049                             mkexpr(scaOne)
   8050                       )
   8051                 )
   8052           )
   8053    );
   8054    return res;
   8055 }
   8056 
   8057 
   8058 /* QCFLAG tracks the SIMD sticky saturation status.  Update the status
   8059    thusly: if, after application of |opZHI| to both |qres| and |nres|,
   8060    they have the same value, leave QCFLAG unchanged.  Otherwise, set it
   8061    (implicitly) to 1.  |opZHI| may only be one of the Iop_ZeroHIxxofV128
   8062    operators, or Iop_INVALID, in which case |qres| and |nres| are used
   8063    unmodified.  The presence |opZHI| means this function can be used to
   8064    generate QCFLAG update code for both scalar and vector SIMD operations.
   8065 */
   8066 static
   8067 void updateQCFLAGwithDifferenceZHI ( IRTemp qres, IRTemp nres, IROp opZHI )
   8068 {
   8069    IRTemp diff      = newTempV128();
   8070    IRTemp oldQCFLAG = newTempV128();
   8071    IRTemp newQCFLAG = newTempV128();
   8072    if (opZHI == Iop_INVALID) {
   8073       assign(diff, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres)));
   8074    } else {
   8075       vassert(opZHI == Iop_ZeroHI64ofV128
   8076               || opZHI == Iop_ZeroHI96ofV128 || opZHI == Iop_ZeroHI112ofV128);
   8077       assign(diff, unop(opZHI, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres))));
   8078    }
   8079    assign(oldQCFLAG, IRExpr_Get(OFFB_QCFLAG, Ity_V128));
   8080    assign(newQCFLAG, binop(Iop_OrV128, mkexpr(oldQCFLAG), mkexpr(diff)));
   8081    stmt(IRStmt_Put(OFFB_QCFLAG, mkexpr(newQCFLAG)));
   8082 }
   8083 
   8084 
   8085 /* A variant of updateQCFLAGwithDifferenceZHI in which |qres| and |nres|
   8086    are used unmodified, hence suitable for QCFLAG updates for whole-vector
   8087    operations. */
   8088 static
   8089 void updateQCFLAGwithDifference ( IRTemp qres, IRTemp nres )
   8090 {
   8091    updateQCFLAGwithDifferenceZHI(qres, nres, Iop_INVALID);
   8092 }
   8093 
   8094 
   8095 /* Generate IR to rearrange two vector values in a way which is useful
   8096    for doing S/D add-pair etc operations.  There are 3 cases:
   8097 
   8098    2d:  [m1 m0] [n1 n0]  -->  [m1 n1] [m0 n0]
   8099 
   8100    4s:  [m3 m2 m1 m0] [n3 n2 n1 n0]  -->  [m3 m1 n3 n1] [m2 m0 n2 n0]
   8101 
   8102    2s:  [m2 m2 m1 m0] [n3 n2 n1 n0]  -->  [0 0 m1 n1] [0 0 m0 n0]
   8103 
   8104    The cases are distinguished as follows:
   8105    isD == True,  bitQ == 1  =>  2d
   8106    isD == False, bitQ == 1  =>  4s
   8107    isD == False, bitQ == 0  =>  2s
   8108 */
   8109 static
   8110 void math_REARRANGE_FOR_FLOATING_PAIRWISE (
   8111         /*OUT*/IRTemp* rearrL, /*OUT*/IRTemp* rearrR,
   8112         IRTemp vecM, IRTemp vecN, Bool isD, UInt bitQ
   8113      )
   8114 {
   8115    vassert(rearrL && *rearrL == IRTemp_INVALID);
   8116    vassert(rearrR && *rearrR == IRTemp_INVALID);
   8117    *rearrL = newTempV128();
   8118    *rearrR = newTempV128();
   8119    if (isD) {
   8120       // 2d case
   8121       vassert(bitQ == 1);
   8122       assign(*rearrL, binop(Iop_InterleaveHI64x2, mkexpr(vecM), mkexpr(vecN)));
   8123       assign(*rearrR, binop(Iop_InterleaveLO64x2, mkexpr(vecM), mkexpr(vecN)));
   8124    }
   8125    else if (!isD && bitQ == 1) {
   8126       // 4s case
   8127       assign(*rearrL, binop(Iop_CatOddLanes32x4,  mkexpr(vecM), mkexpr(vecN)));
   8128       assign(*rearrR, binop(Iop_CatEvenLanes32x4, mkexpr(vecM), mkexpr(vecN)));
   8129    } else {
   8130       // 2s case
   8131       vassert(!isD && bitQ == 0);
   8132       IRTemp m1n1m0n0 = newTempV128();
   8133       IRTemp m0n0m1n1 = newTempV128();
   8134       assign(m1n1m0n0, binop(Iop_InterleaveLO32x4,
   8135                              mkexpr(vecM), mkexpr(vecN)));
   8136       assign(m0n0m1n1, triop(Iop_SliceV128,
   8137                              mkexpr(m1n1m0n0), mkexpr(m1n1m0n0), mkU8(8)));
   8138       assign(*rearrL, unop(Iop_ZeroHI64ofV128, mkexpr(m1n1m0n0)));
   8139       assign(*rearrR, unop(Iop_ZeroHI64ofV128, mkexpr(m0n0m1n1)));
   8140    }
   8141 }
   8142 
   8143 
   8144 /* Returns 2.0 ^ (-n) for n in 1 .. 64 */
   8145 static Double two_to_the_minus ( Int n )
   8146 {
   8147    if (n == 1) return 0.5;
   8148    vassert(n >= 2 && n <= 64);
   8149    Int half = n / 2;
   8150    return two_to_the_minus(half) * two_to_the_minus(n - half);
   8151 }
   8152 
   8153 
   8154 /* Returns 2.0 ^ n for n in 1 .. 64 */
   8155 static Double two_to_the_plus ( Int n )
   8156 {
   8157    if (n == 1) return 2.0;
   8158    vassert(n >= 2 && n <= 64);
   8159    Int half = n / 2;
   8160    return two_to_the_plus(half) * two_to_the_plus(n - half);
   8161 }
   8162 
   8163 
   8164 /*------------------------------------------------------------*/
   8165 /*--- SIMD and FP instructions                             ---*/
   8166 /*------------------------------------------------------------*/
   8167 
   8168 static
   8169 Bool dis_AdvSIMD_EXT(/*MB_OUT*/DisResult* dres, UInt insn)
   8170 {
   8171    /* 31  29     23  21 20 15 14   10 9 4
   8172       0 q 101110 op2 0  m  0  imm4 0  n d
   8173       Decode fields: op2
   8174    */
   8175 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   8176    if (INSN(31,31) != 0
   8177        || INSN(29,24) != BITS6(1,0,1,1,1,0)
   8178        || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) {
   8179       return False;
   8180    }
   8181    UInt bitQ = INSN(30,30);
   8182    UInt op2  = INSN(23,22);
   8183    UInt mm   = INSN(20,16);
   8184    UInt imm4 = INSN(14,11);
   8185    UInt nn   = INSN(9,5);
   8186    UInt dd   = INSN(4,0);
   8187 
   8188    if (op2 == BITS2(0,0)) {
   8189       /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */
   8190       IRTemp sHi = newTempV128();
   8191       IRTemp sLo = newTempV128();
   8192       IRTemp res = newTempV128();
   8193       assign(sHi, getQReg128(mm));
   8194       assign(sLo, getQReg128(nn));
   8195       if (bitQ == 1) {
   8196          if (imm4 == 0) {
   8197             assign(res, mkexpr(sLo));
   8198          } else {
   8199             vassert(imm4 >= 1 && imm4 <= 15);
   8200             assign(res, triop(Iop_SliceV128,
   8201                               mkexpr(sHi), mkexpr(sLo), mkU8(imm4)));
   8202          }
   8203          putQReg128(dd, mkexpr(res));
   8204          DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4);
   8205       } else {
   8206          if (imm4 >= 8) return False;
   8207          if (imm4 == 0) {
   8208             assign(res, mkexpr(sLo));
   8209          } else {
   8210             vassert(imm4 >= 1 && imm4 <= 7);
   8211             IRTemp hi64lo64 = newTempV128();
   8212             assign(hi64lo64, binop(Iop_InterleaveLO64x2,
   8213                                    mkexpr(sHi), mkexpr(sLo)));
   8214             assign(res, triop(Iop_SliceV128,
   8215                               mkexpr(hi64lo64), mkexpr(hi64lo64), mkU8(imm4)));
   8216          }
   8217          putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
   8218          DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4);
   8219       }
   8220       return True;
   8221    }
   8222 
   8223    return False;
   8224 #  undef INSN
   8225 }
   8226 
   8227 
   8228 static
   8229 Bool dis_AdvSIMD_TBL_TBX(/*MB_OUT*/DisResult* dres, UInt insn)
   8230 {
   8231    /* 31  29     23  21 20 15 14  12 11 9 4
   8232       0 q 001110 op2 0  m  0  len op 00 n d
   8233       Decode fields: op2,len,op
   8234    */
   8235 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   8236    if (INSN(31,31) != 0
   8237        || INSN(29,24) != BITS6(0,0,1,1,1,0)
   8238        || INSN(21,21) != 0
   8239        || INSN(15,15) != 0
   8240        || INSN(11,10) != BITS2(0,0)) {
   8241       return False;
   8242    }
   8243    UInt bitQ  = INSN(30,30);
   8244    UInt op2   = INSN(23,22);
   8245    UInt mm    = INSN(20,16);
   8246    UInt len   = INSN(14,13);
   8247    UInt bitOP = INSN(12,12);
   8248    UInt nn    = INSN(9,5);
   8249    UInt dd    = INSN(4,0);
   8250 
   8251    if (op2 == X00) {
   8252       /* -------- 00,xx,0 TBL, xx register table -------- */
   8253       /* -------- 00,xx,1 TBX, xx register table -------- */
   8254       /* 31  28        20 15 14  12  9 4
   8255          0q0 01110 000 m  0  len 000 n d  TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
   8256          0q0 01110 000 m  0  len 100 n d  TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
   8257          where Ta = 16b(q=1) or 8b(q=0)
   8258       */
   8259       Bool isTBX = bitOP == 1;
   8260       /* The out-of-range values to use. */
   8261       IRTemp oor_values = newTempV128();
   8262       assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
   8263       /* src value */
   8264       IRTemp src = newTempV128();
   8265       assign(src, getQReg128(mm));
   8266       /* The table values */
   8267       IRTemp tab[4];
   8268       UInt   i;
   8269       for (i = 0; i <= len; i++) {
   8270          vassert(i < 4);
   8271          tab[i] = newTempV128();
   8272          assign(tab[i], getQReg128((nn + i) % 32));
   8273       }
   8274       IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
   8275       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   8276       const HChar* Ta = bitQ ==1 ? "16b" : "8b";
   8277       const HChar* nm = isTBX ? "tbx" : "tbl";
   8278       DIP("%s %s.%s, {v%d.16b .. v%d.16b}, %s.%s\n",
   8279           nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
   8280       return True;
   8281    }
   8282 
   8283 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   8284    return False;
   8285 #  undef INSN
   8286 }
   8287 
   8288 
   8289 static
   8290 Bool dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult* dres, UInt insn)
   8291 {
   8292    /* 31  29     23   21 20 15 14     11 9 4
   8293       0 q 001110 size 0  m  0  opcode 10 n d
   8294       Decode fields: opcode
   8295    */
   8296 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   8297    if (INSN(31,31) != 0
   8298        || INSN(29,24) != BITS6(0,0,1,1,1,0)
   8299        || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(11,10) != BITS2(1,0)) {
   8300       return False;
   8301    }
   8302    UInt bitQ   = INSN(30,30);
   8303    UInt size   = INSN(23,22);
   8304    UInt mm     = INSN(20,16);
   8305    UInt opcode = INSN(14,12);
   8306    UInt nn     = INSN(9,5);
   8307    UInt dd     = INSN(4,0);
   8308 
   8309    if (opcode == BITS3(0,0,1) || opcode == BITS3(1,0,1)) {
   8310       /* -------- 001 UZP1 std7_std7_std7 -------- */
   8311       /* -------- 101 UZP2 std7_std7_std7 -------- */
   8312       if (bitQ == 0 && size == X11) return False; // implied 1d case
   8313       Bool   isUZP1 = opcode == BITS3(0,0,1);
   8314       IROp   op     = isUZP1 ? mkVecCATEVENLANES(size)
   8315                              : mkVecCATODDLANES(size);
   8316       IRTemp preL = newTempV128();
   8317       IRTemp preR = newTempV128();
   8318       IRTemp res  = newTempV128();
   8319       if (bitQ == 0) {
   8320          assign(preL, binop(Iop_InterleaveLO64x2, getQReg128(mm),
   8321                                                   getQReg128(nn)));
   8322          assign(preR, mkexpr(preL));
   8323       } else {
   8324          assign(preL, getQReg128(mm));
   8325          assign(preR, getQReg128(nn));
   8326       }
   8327       assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
   8328       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   8329       const HChar* nm  = isUZP1 ? "uzp1" : "uzp2";
   8330       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   8331       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
   8332           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   8333       return True;
   8334    }
   8335 
   8336    if (opcode == BITS3(0,1,0) || opcode == BITS3(1,1,0)) {
   8337       /* -------- 010 TRN1 std7_std7_std7 -------- */
   8338       /* -------- 110 TRN2 std7_std7_std7 -------- */
   8339       if (bitQ == 0 && size == X11) return False; // implied 1d case
   8340       Bool   isTRN1 = opcode == BITS3(0,1,0);
   8341       IROp   op1    = isTRN1 ? mkVecCATEVENLANES(size)
   8342                              : mkVecCATODDLANES(size);
   8343       IROp op2 = mkVecINTERLEAVEHI(size);
   8344       IRTemp srcM = newTempV128();
   8345       IRTemp srcN = newTempV128();
   8346       IRTemp res  = newTempV128();
   8347       assign(srcM, getQReg128(mm));
   8348       assign(srcN, getQReg128(nn));
   8349       assign(res, binop(op2, binop(op1, mkexpr(srcM), mkexpr(srcM)),
   8350                              binop(op1, mkexpr(srcN), mkexpr(srcN))));
   8351       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   8352       const HChar* nm  = isTRN1 ? "trn1" : "trn2";
   8353       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   8354       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
   8355           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   8356       return True;
   8357    }
   8358 
   8359    if (opcode == BITS3(0,1,1) || opcode == BITS3(1,1,1)) {
   8360       /* -------- 011 ZIP1 std7_std7_std7 -------- */
   8361       /* -------- 111 ZIP2 std7_std7_std7 -------- */
   8362       if (bitQ == 0 && size == X11) return False; // implied 1d case
   8363       Bool   isZIP1 = opcode == BITS3(0,1,1);
   8364       IROp   op     = isZIP1 ? mkVecINTERLEAVELO(size)
   8365                              : mkVecINTERLEAVEHI(size);
   8366       IRTemp preL = newTempV128();
   8367       IRTemp preR = newTempV128();
   8368       IRTemp res  = newTempV128();
   8369       if (bitQ == 0 && !isZIP1) {
   8370          IRTemp z128 = newTempV128();
   8371          assign(z128, mkV128(0x0000));
   8372          // preL = Vm shifted left 32 bits
   8373          // preR = Vn shifted left 32 bits
   8374          assign(preL, triop(Iop_SliceV128,
   8375                             getQReg128(mm), mkexpr(z128), mkU8(12)));
   8376          assign(preR, triop(Iop_SliceV128,
   8377                             getQReg128(nn), mkexpr(z128), mkU8(12)));
   8378 
   8379       } else {
   8380          assign(preL, getQReg128(mm));
   8381          assign(preR, getQReg128(nn));
   8382       }
   8383       assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
   8384       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   8385       const HChar* nm  = isZIP1 ? "zip1" : "zip2";
   8386       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   8387       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
   8388           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   8389       return True;
   8390    }
   8391 
   8392    return False;
   8393 #  undef INSN
   8394 }
   8395 
   8396 
   8397 static
   8398 Bool dis_AdvSIMD_across_lanes(/*MB_OUT*/DisResult* dres, UInt insn)
   8399 {
   8400    /* 31    28    23   21    16     11 9 4
   8401       0 q u 01110 size 11000 opcode 10 n d
   8402       Decode fields: u,size,opcode
   8403    */
   8404 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   8405    if (INSN(31,31) != 0
   8406        || INSN(28,24) != BITS5(0,1,1,1,0)
   8407        || INSN(21,17) != BITS5(1,1,0,0,0) || INSN(11,10) != BITS2(1,0)) {
   8408       return False;
   8409    }
   8410    UInt bitQ   = INSN(30,30);
   8411    UInt bitU   = INSN(29,29);
   8412    UInt size   = INSN(23,22);
   8413    UInt opcode = INSN(16,12);
   8414    UInt nn     = INSN(9,5);
   8415    UInt dd     = INSN(4,0);
   8416 
   8417    if (opcode == BITS5(0,0,0,1,1)) {
   8418       /* -------- 0,xx,00011 SADDLV -------- */
   8419       /* -------- 1,xx,00011 UADDLV -------- */
   8420       /* size is the narrow size */
   8421       if (size == X11 || (size == X10 && bitQ == 0)) return False;
   8422       Bool   isU = bitU == 1;
   8423       IRTemp src = newTempV128();
   8424       assign(src, getQReg128(nn));
   8425       /* The basic plan is to widen the lower half, and if Q = 1,
   8426          the upper half too.  Add them together (if Q = 1), and in
   8427          either case fold with add at twice the lane width.
   8428       */
   8429       IRExpr* widened
   8430          = mkexpr(math_WIDEN_LO_OR_HI_LANES(
   8431                      isU, False/*!fromUpperHalf*/, size, mkexpr(src)));
   8432       if (bitQ == 1) {
   8433          widened
   8434             = binop(mkVecADD(size+1),
   8435                     widened,
   8436                     mkexpr(math_WIDEN_LO_OR_HI_LANES(
   8437                               isU, True/*fromUpperHalf*/, size, mkexpr(src)))
   8438               );
   8439       }
   8440       /* Now fold. */
   8441       IRTemp tWi = newTempV128();
   8442       assign(tWi, widened);
   8443       IRTemp res = math_FOLDV(tWi, mkVecADD(size+1));
   8444       putQReg128(dd, mkexpr(res));
   8445       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   8446       const HChar  ch  = "bhsd"[size];
   8447       DIP("%s %s.%c, %s.%s\n", isU ? "uaddlv" : "saddlv",
   8448           nameQReg128(dd), ch, nameQReg128(nn), arr);
   8449       return True;
   8450    }
   8451 
   8452    UInt ix = 0;
   8453    /**/ if (opcode == BITS5(0,1,0,1,0)) { ix = bitU == 0 ? 1 : 2; }
   8454    else if (opcode == BITS5(1,1,0,1,0)) { ix = bitU == 0 ? 3 : 4; }
   8455    else if (opcode == BITS5(1,1,0,1,1) && bitU == 0) { ix = 5; }
   8456    /**/
   8457    if (ix != 0) {
   8458       /* -------- 0,xx,01010: SMAXV -------- (1) */
   8459       /* -------- 1,xx,01010: UMAXV -------- (2) */
   8460       /* -------- 0,xx,11010: SMINV -------- (3) */
   8461       /* -------- 1,xx,11010: UMINV -------- (4) */
   8462       /* -------- 0,xx,11011: ADDV  -------- (5) */
   8463       vassert(ix >= 1 && ix <= 5);
   8464       if (size == X11) return False; // 1d,2d cases not allowed
   8465       if (size == X10 && bitQ == 0) return False; // 2s case not allowed
   8466       const IROp opMAXS[3]
   8467          = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
   8468       const IROp opMAXU[3]
   8469          = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
   8470       const IROp opMINS[3]
   8471          = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
   8472       const IROp opMINU[3]
   8473          = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
   8474       const IROp opADD[3]
   8475          = { Iop_Add8x16,  Iop_Add16x8,  Iop_Add32x4 };
   8476       vassert(size < 3);
   8477       IROp op = Iop_INVALID;
   8478       const HChar* nm = NULL;
   8479       switch (ix) {
   8480          case 1: op = opMAXS[size]; nm = "smaxv"; break;
   8481          case 2: op = opMAXU[size]; nm = "umaxv"; break;
   8482          case 3: op = opMINS[size]; nm = "sminv"; break;
   8483          case 4: op = opMINU[size]; nm = "uminv"; break;
   8484          case 5: op = opADD[size];  nm = "addv";  break;
   8485          default: vassert(0);
   8486       }
   8487       vassert(op != Iop_INVALID && nm != NULL);
   8488       IRTemp tN1 = newTempV128();
   8489       assign(tN1, getQReg128(nn));
   8490       /* If Q == 0, we're just folding lanes in the lower half of
   8491          the value.  In which case, copy the lower half of the
   8492          source into the upper half, so we can then treat it the
   8493          same as the full width case.  Except for the addition case,
   8494          in which we have to zero out the upper half. */
   8495       IRTemp tN2 = newTempV128();
   8496       assign(tN2, bitQ == 0
   8497                      ? (ix == 5 ? unop(Iop_ZeroHI64ofV128, mkexpr(tN1))
   8498                                 : mk_CatEvenLanes64x2(tN1,tN1))
   8499                      : mkexpr(tN1));
   8500       IRTemp res = math_FOLDV(tN2, op);
   8501       if (res == IRTemp_INVALID)
   8502          return False; /* means math_FOLDV
   8503                           doesn't handle this case yet */
   8504       putQReg128(dd, mkexpr(res));
   8505       const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
   8506       IRType laneTy = tys[size];
   8507       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   8508       DIP("%s %s, %s.%s\n", nm,
   8509           nameQRegLO(dd, laneTy), nameQReg128(nn), arr);
   8510       return True;
   8511    }
   8512 
   8513    if ((size == X00 || size == X10)
   8514        && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
   8515       /* -------- 0,00,01100: FMAXMNV s_4s -------- */
   8516       /* -------- 0,10,01100: FMINMNV s_4s -------- */
   8517       /* -------- 1,00,01111: FMAXV   s_4s -------- */
   8518       /* -------- 1,10,01111: FMINV   s_4s -------- */
   8519       /* FMAXNM, FMINNM: FIXME -- KLUDGED */
   8520       if (bitQ == 0) return False; // Only 4s is allowed
   8521       Bool   isMIN = (size & 2) == 2;
   8522       Bool   isNM  = opcode == BITS5(0,1,1,0,0);
   8523       IROp   opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(2);
   8524       IRTemp src = newTempV128();
   8525       assign(src, getQReg128(nn));
   8526       IRTemp res = math_FOLDV(src, opMXX);
   8527       putQReg128(dd, mkexpr(res));
   8528       DIP("%s%sv s%u, %u.4s\n",
   8529           isMIN ? "fmin" : "fmax", isNM ? "nm" : "", dd, nn);
   8530       return True;
   8531    }
   8532 
   8533 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   8534    return False;
   8535 #  undef INSN
   8536 }
   8537 
   8538 
   8539 static
   8540 Bool dis_AdvSIMD_copy(/*MB_OUT*/DisResult* dres, UInt insn)
   8541 {
   8542    /* 31     28       20   15 14   10 9 4
   8543       0 q op 01110000 imm5 0  imm4 1  n d
   8544       Decode fields: q,op,imm4
   8545    */
   8546 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   8547    if (INSN(31,31) != 0
   8548        || INSN(28,21) != BITS8(0,1,1,1,0,0,0,0)
   8549        || INSN(15,15) != 0 || INSN(10,10) != 1) {
   8550       return False;
   8551    }
   8552    UInt bitQ  = INSN(30,30);
   8553    UInt bitOP = INSN(29,29);
   8554    UInt imm5  = INSN(20,16);
   8555    UInt imm4  = INSN(14,11);
   8556    UInt nn    = INSN(9,5);
   8557    UInt dd    = INSN(4,0);
   8558 
   8559    /* -------- x,0,0000: DUP (element, vector) -------- */
   8560    /* 31  28       20   15     9 4
   8561       0q0 01110000 imm5 000001 n d  DUP Vd.T, Vn.Ts[index]
   8562    */
   8563    if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
   8564       UInt   laneNo    = 0;
   8565       UInt   laneSzLg2 = 0;
   8566       HChar  laneCh    = '?';
   8567       IRTemp res       = handle_DUP_VEC_ELEM(&laneNo, &laneSzLg2, &laneCh,
   8568                                              getQReg128(nn), imm5);
   8569       if (res == IRTemp_INVALID)
   8570          return False;
   8571       if (bitQ == 0 && laneSzLg2 == X11)
   8572          return False; /* .1d case */
   8573       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   8574       const HChar* arT = nameArr_Q_SZ(bitQ, laneSzLg2);
   8575       DIP("dup %s.%s, %s.%c[%u]\n",
   8576            nameQReg128(dd), arT, nameQReg128(nn), laneCh, laneNo);
   8577       return True;
   8578    }
   8579 
   8580    /* -------- x,0,0001: DUP (general, vector) -------- */
   8581    /* 31  28       20   15       9 4
   8582       0q0 01110000 imm5 0 0001 1 n d  DUP Vd.T, Rn
   8583       Q=0 writes 64, Q=1 writes 128
   8584       imm5: xxxx1  8B(q=0)      or 16b(q=1),     R=W
   8585             xxx10  4H(q=0)      or 8H(q=1),      R=W
   8586             xx100  2S(q=0)      or 4S(q=1),      R=W
   8587             x1000  Invalid(q=0) or 2D(q=1),      R=X
   8588             x0000  Invalid(q=0) or Invalid(q=1)
   8589       Require op=0, imm4=0001
   8590    */
   8591    if (bitOP == 0 && imm4 == BITS4(0,0,0,1)) {
   8592       Bool   isQ = bitQ == 1;
   8593       IRTemp w0  = newTemp(Ity_I64);
   8594       const HChar* arT = "??";
   8595       IRType laneTy = Ity_INVALID;
   8596       if (imm5 & 1) {
   8597          arT    = isQ ? "16b" : "8b";
   8598          laneTy = Ity_I8;
   8599          assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
   8600       }
   8601       else if (imm5 & 2) {
   8602          arT    = isQ ? "8h" : "4h";
   8603          laneTy = Ity_I16;
   8604          assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
   8605       }
   8606       else if (imm5 & 4) {
   8607          arT    = isQ ? "4s" : "2s";
   8608          laneTy = Ity_I32;
   8609          assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
   8610       }
   8611       else if ((imm5 & 8) && isQ) {
   8612          arT    = "2d";
   8613          laneTy = Ity_I64;
   8614          assign(w0, getIReg64orZR(nn));
   8615       }
   8616       else {
   8617          /* invalid; leave laneTy unchanged. */
   8618       }
   8619       /* */
   8620       if (laneTy != Ity_INVALID) {
   8621          IRTemp w1 = math_DUP_TO_64(w0, laneTy);
   8622          putQReg128(dd, binop(Iop_64HLtoV128,
   8623                               isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
   8624          DIP("dup %s.%s, %s\n",
   8625              nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
   8626          return True;
   8627       }
   8628       /* invalid */
   8629       return False;
   8630    }
   8631 
   8632    /* -------- 1,0,0011: INS (general) -------- */
   8633    /* 31  28       20   15     9 4
   8634       010 01110000 imm5 000111 n d  INS Vd.Ts[ix], Rn
   8635       where Ts,ix = case imm5 of xxxx1 -> B, xxxx
   8636                                  xxx10 -> H, xxx
   8637                                  xx100 -> S, xx
   8638                                  x1000 -> D, x
   8639    */
   8640    if (bitQ == 1 && bitOP == 0 && imm4 == BITS4(0,0,1,1)) {
   8641       HChar   ts     = '?';
   8642       UInt    laneNo = 16;
   8643       IRExpr* src    = NULL;
   8644       if (imm5 & 1) {
   8645          src    = unop(Iop_64to8, getIReg64orZR(nn));
   8646          laneNo = (imm5 >> 1) & 15;
   8647          ts     = 'b';
   8648       }
   8649       else if (imm5 & 2) {
   8650          src    = unop(Iop_64to16, getIReg64orZR(nn));
   8651          laneNo = (imm5 >> 2) & 7;
   8652          ts     = 'h';
   8653       }
   8654       else if (imm5 & 4) {
   8655          src    = unop(Iop_64to32, getIReg64orZR(nn));
   8656          laneNo = (imm5 >> 3) & 3;
   8657          ts     = 's';
   8658       }
   8659       else if (imm5 & 8) {
   8660          src    = getIReg64orZR(nn);
   8661          laneNo = (imm5 >> 4) & 1;
   8662          ts     = 'd';
   8663       }
   8664       /* */
   8665       if (src) {
   8666          vassert(laneNo < 16);
   8667          putQRegLane(dd, laneNo, src);
   8668          DIP("ins %s.%c[%u], %s\n",
   8669              nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn));
   8670          return True;
   8671       }
   8672       /* invalid */
   8673       return False;
   8674    }
   8675 
   8676    /* -------- x,0,0101: SMOV -------- */
   8677    /* -------- x,0,0111: UMOV -------- */
   8678    /* 31  28        20   15     9 4
   8679       0q0 01110 000 imm5 001111 n d  UMOV Xd/Wd, Vn.Ts[index]
   8680       0q0 01110 000 imm5 001011 n d  SMOV Xd/Wd, Vn.Ts[index]
   8681       dest is Xd when q==1, Wd when q==0
   8682       UMOV:
   8683          Ts,index,ops = case q:imm5 of
   8684                           0:xxxx1 -> B, xxxx, 8Uto64
   8685                           1:xxxx1 -> invalid
   8686                           0:xxx10 -> H, xxx,  16Uto64
   8687                           1:xxx10 -> invalid
   8688                           0:xx100 -> S, xx,   32Uto64
   8689                           1:xx100 -> invalid
   8690                           1:x1000 -> D, x,    copy64
   8691                           other   -> invalid
   8692       SMOV:
   8693          Ts,index,ops = case q:imm5 of
   8694                           0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
   8695                           1:xxxx1 -> B, xxxx, 8Sto64
   8696                           0:xxx10 -> H, xxx,  (32Uto64 . 16Sto32)
   8697                           1:xxx10 -> H, xxx,  16Sto64
   8698                           0:xx100 -> invalid
   8699                           1:xx100 -> S, xx,   32Sto64
   8700                           1:x1000 -> invalid
   8701                           other   -> invalid
   8702    */
   8703    if (bitOP == 0 && (imm4 == BITS4(0,1,0,1) || imm4 == BITS4(0,1,1,1))) {
   8704       Bool isU  = (imm4 & 2) == 2;
   8705       const HChar* arTs = "??";
   8706       UInt    laneNo = 16; /* invalid */
   8707       // Setting 'res' to non-NULL determines valid/invalid
   8708       IRExpr* res    = NULL;
   8709       if (!bitQ && (imm5 & 1)) { // 0:xxxx1
   8710          laneNo = (imm5 >> 1) & 15;
   8711          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
   8712          res = isU ? unop(Iop_8Uto64, lane)
   8713                    : unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
   8714          arTs = "b";
   8715       }
   8716       else if (bitQ && (imm5 & 1)) { // 1:xxxx1
   8717          laneNo = (imm5 >> 1) & 15;
   8718          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
   8719          res = isU ? NULL
   8720                    : unop(Iop_8Sto64, lane);
   8721          arTs = "b";
   8722       }
   8723       else if (!bitQ && (imm5 & 2)) { // 0:xxx10
   8724          laneNo = (imm5 >> 2) & 7;
   8725          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
   8726          res = isU ? unop(Iop_16Uto64, lane)
   8727                    : unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
   8728          arTs = "h";
   8729       }
   8730       else if (bitQ && (imm5 & 2)) { // 1:xxx10
   8731          laneNo = (imm5 >> 2) & 7;
   8732          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
   8733          res = isU ? NULL
   8734                    : unop(Iop_16Sto64, lane);
   8735          arTs = "h";
   8736       }
   8737       else if (!bitQ && (imm5 & 4)) { // 0:xx100
   8738          laneNo = (imm5 >> 3) & 3;
   8739          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
   8740          res = isU ? unop(Iop_32Uto64, lane)
   8741                    : NULL;
   8742          arTs = "s";
   8743       }
   8744       else if (bitQ && (imm5 & 4)) { // 1:xxx10
   8745          laneNo = (imm5 >> 3) & 3;
   8746          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
   8747          res = isU ? NULL
   8748                    : unop(Iop_32Sto64, lane);
   8749          arTs = "s";
   8750       }
   8751       else if (bitQ && (imm5 & 8)) { // 1:x1000
   8752          laneNo = (imm5 >> 4) & 1;
   8753          IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
   8754          res = isU ? lane
   8755                    : NULL;
   8756          arTs = "d";
   8757       }
   8758       /* */
   8759       if (res) {
   8760          vassert(laneNo < 16);
   8761          putIReg64orZR(dd, res);
   8762          DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
   8763              nameIRegOrZR(bitQ == 1, dd),
   8764              nameQReg128(nn), arTs, laneNo);
   8765          return True;
   8766       }
   8767       /* invalid */
   8768       return False;
   8769    }
   8770 
   8771    /* -------- 1,1,xxxx: INS (element) -------- */
   8772    /* 31  28       20     14   9 4
   8773       011 01110000 imm5 0 imm4 n d  INS Vd.Ts[ix1], Vn.Ts[ix2]
   8774       where Ts,ix1,ix2
   8775                = case imm5 of xxxx1 -> B, xxxx, imm4[3:0]
   8776                               xxx10 -> H, xxx,  imm4[3:1]
   8777                               xx100 -> S, xx,   imm4[3:2]
   8778                               x1000 -> D, x,    imm4[3:3]
   8779    */
   8780    if (bitQ == 1 && bitOP == 1) {
   8781       HChar   ts  = '?';
   8782       IRType  ity = Ity_INVALID;
   8783       UInt    ix1 = 16;
   8784       UInt    ix2 = 16;
   8785       if (imm5 & 1) {
   8786          ts  = 'b';
   8787          ity = Ity_I8;
   8788          ix1 = (imm5 >> 1) & 15;
   8789          ix2 = (imm4 >> 0) & 15;
   8790       }
   8791       else if (imm5 & 2) {
   8792          ts  = 'h';
   8793          ity = Ity_I16;
   8794          ix1 = (imm5 >> 2) & 7;
   8795          ix2 = (imm4 >> 1) & 7;
   8796       }
   8797       else if (imm5 & 4) {
   8798          ts  = 's';
   8799          ity = Ity_I32;
   8800          ix1 = (imm5 >> 3) & 3;
   8801          ix2 = (imm4 >> 2) & 3;
   8802       }
   8803       else if (imm5 & 8) {
   8804          ts  = 'd';
   8805          ity = Ity_I64;
   8806          ix1 = (imm5 >> 4) & 1;
   8807          ix2 = (imm4 >> 3) & 1;
   8808       }
   8809       /* */
   8810       if (ity != Ity_INVALID) {
   8811          vassert(ix1 < 16);
   8812          vassert(ix2 < 16);
   8813          putQRegLane(dd, ix1, getQRegLane(nn, ix2, ity));
   8814          DIP("ins %s.%c[%u], %s.%c[%u]\n",
   8815              nameQReg128(dd), ts, ix1, nameQReg128(nn), ts, ix2);
   8816          return True;
   8817       }
   8818       /* invalid */
   8819       return False;
   8820    }
   8821 
   8822    return False;
   8823 #  undef INSN
   8824 }
   8825 
   8826 
   8827 static
   8828 Bool dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
   8829 {
   8830    /* 31    28          18  15    11 9     4
   8831       0q op 01111 00000 abc cmode 01 defgh d
   8832       Decode fields: q,op,cmode
   8833       Bit 11 is really "o2", but it is always zero.
   8834    */
   8835 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   8836    if (INSN(31,31) != 0
   8837        || INSN(28,19) != BITS10(0,1,1,1,1,0,0,0,0,0)
   8838        || INSN(11,10) != BITS2(0,1)) {
   8839       return False;
   8840    }
   8841    UInt bitQ     = INSN(30,30);
   8842    UInt bitOP    = INSN(29,29);
   8843    UInt cmode    = INSN(15,12);
   8844    UInt abcdefgh = (INSN(18,16) << 5) | INSN(9,5);
   8845    UInt dd       = INSN(4,0);
   8846 
   8847    ULong imm64lo  = 0;
   8848    UInt  op_cmode = (bitOP << 4) | cmode;
   8849    Bool  ok       = False;
   8850    Bool  isORR    = False;
   8851    Bool  isBIC    = False;
   8852    Bool  isMOV    = False;
   8853    Bool  isMVN    = False;
   8854    Bool  isFMOV   = False;
   8855    switch (op_cmode) {
   8856       /* -------- x,0,0000 MOVI 32-bit shifted imm -------- */
   8857       /* -------- x,0,0010 MOVI 32-bit shifted imm -------- */
   8858       /* -------- x,0,0100 MOVI 32-bit shifted imm -------- */
   8859       /* -------- x,0,0110 MOVI 32-bit shifted imm -------- */
   8860       case BITS5(0,0,0,0,0): case BITS5(0,0,0,1,0):
   8861       case BITS5(0,0,1,0,0): case BITS5(0,0,1,1,0): // 0:0xx0
   8862          ok = True; isMOV = True; break;
   8863 
   8864       /* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */
   8865       /* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */
   8866       /* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */
   8867       /* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */
   8868       case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1):
   8869       case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1
   8870          ok = True; isORR = True; break;
   8871 
   8872       /* -------- x,0,1000 MOVI 16-bit shifted imm -------- */
   8873       /* -------- x,0,1010 MOVI 16-bit shifted imm -------- */
   8874       case BITS5(0,1,0,0,0): case BITS5(0,1,0,1,0): // 0:10x0
   8875          ok = True; isMOV = True; break;
   8876 
   8877       /* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */
   8878       /* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */
   8879       case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1
   8880          ok = True; isORR = True; break;
   8881 
   8882       /* -------- x,0,1100 MOVI 32-bit shifting ones -------- */
   8883       /* -------- x,0,1101 MOVI 32-bit shifting ones -------- */
   8884       case BITS5(0,1,1,0,0): case BITS5(0,1,1,0,1): // 0:110x
   8885          ok = True; isMOV = True; break;
   8886 
   8887       /* -------- x,0,1110 MOVI 8-bit -------- */
   8888       case BITS5(0,1,1,1,0):
   8889          ok = True; isMOV = True; break;
   8890 
   8891       /* -------- x,0,1111 FMOV (vector, immediate, F32) -------- */
   8892       case BITS5(0,1,1,1,1): // 0:1111
   8893          ok = True; isFMOV = True; break;
   8894 
   8895       /* -------- x,1,0000 MVNI 32-bit shifted imm -------- */
   8896       /* -------- x,1,0010 MVNI 32-bit shifted imm  -------- */
   8897       /* -------- x,1,0100 MVNI 32-bit shifted imm  -------- */
   8898       /* -------- x,1,0110 MVNI 32-bit shifted imm  -------- */
   8899       case BITS5(1,0,0,0,0): case BITS5(1,0,0,1,0):
   8900       case BITS5(1,0,1,0,0): case BITS5(1,0,1,1,0): // 1:0xx0
   8901          ok = True; isMVN = True; break;
   8902 
   8903       /* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */
   8904       /* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */
   8905       /* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */
   8906       /* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */
   8907       case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1):
   8908       case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1
   8909          ok = True; isBIC = True; break;
   8910 
   8911       /* -------- x,1,1000 MVNI 16-bit shifted imm -------- */
   8912       /* -------- x,1,1010 MVNI 16-bit shifted imm -------- */
   8913       case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
   8914          ok = True; isMVN = True; break;
   8915 
   8916       /* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */
   8917       /* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */
   8918       case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1
   8919          ok = True; isBIC = True; break;
   8920 
   8921       /* -------- x,1,1100 MVNI 32-bit shifting ones -------- */
   8922       /* -------- x,1,1101 MVNI 32-bit shifting ones -------- */
   8923       case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
   8924          ok = True; isMVN = True; break;
   8925 
   8926       /* -------- 0,1,1110 MOVI 64-bit scalar -------- */
   8927       /* -------- 1,1,1110 MOVI 64-bit vector -------- */
   8928       case BITS5(1,1,1,1,0):
   8929          ok = True; isMOV = True; break;
   8930 
   8931       /* -------- 1,1,1111 FMOV (vector, immediate, F64) -------- */
   8932       case BITS5(1,1,1,1,1): // 1:1111
   8933          ok = bitQ == 1; isFMOV = True; break;
   8934 
   8935       default:
   8936         break;
   8937    }
   8938    if (ok) {
   8939       vassert(1 == (isMOV ? 1 : 0) + (isMVN ? 1 : 0)
   8940                    + (isORR ? 1 : 0) + (isBIC ? 1 : 0) + (isFMOV ? 1 : 0));
   8941       ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, abcdefgh);
   8942    }
   8943    if (ok) {
   8944       if (isORR || isBIC) {
   8945          ULong inv
   8946             = isORR ? 0ULL : ~0ULL;
   8947          IRExpr* immV128
   8948             = binop(Iop_64HLtoV128, mkU64(inv ^ imm64lo), mkU64(inv ^ imm64lo));
   8949          IRExpr* res
   8950             = binop(isORR ? Iop_OrV128 : Iop_AndV128, getQReg128(dd), immV128);
   8951          const HChar* nm = isORR ? "orr" : "bic";
   8952          if (bitQ == 0) {
   8953             putQReg128(dd, unop(Iop_ZeroHI64ofV128, res));
   8954             DIP("%s %s.1d, %016llx\n", nm, nameQReg128(dd), imm64lo);
   8955          } else {
   8956             putQReg128(dd, res);
   8957             DIP("%s %s.2d, #0x%016llx'%016llx\n", nm,
   8958                 nameQReg128(dd), imm64lo, imm64lo);
   8959          }
   8960       }
   8961       else if (isMOV || isMVN || isFMOV) {
   8962          if (isMVN) imm64lo = ~imm64lo;
   8963          ULong   imm64hi = bitQ == 0  ? 0  :  imm64lo;
   8964          IRExpr* immV128 = binop(Iop_64HLtoV128, mkU64(imm64hi),
   8965                                                  mkU64(imm64lo));
   8966          putQReg128(dd, immV128);
   8967          DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
   8968       }
   8969       return True;
   8970    }
   8971    /* else fall through */
   8972 
   8973    return False;
   8974 #  undef INSN
   8975 }
   8976 
   8977 
   8978 static
   8979 Bool dis_AdvSIMD_scalar_copy(/*MB_OUT*/DisResult* dres, UInt insn)
   8980 {
   8981    /* 31    28       20   15 14   10 9 4
   8982       01 op 11110000 imm5 0  imm4 1  n d
   8983       Decode fields: op,imm4
   8984    */
   8985 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   8986    if (INSN(31,30) != BITS2(0,1)
   8987        || INSN(28,21) != BITS8(1,1,1,1,0,0,0,0)
   8988        || INSN(15,15) != 0 || INSN(10,10) != 1) {
   8989       return False;
   8990    }
   8991    UInt bitOP = INSN(29,29);
   8992    UInt imm5  = INSN(20,16);
   8993    UInt imm4  = INSN(14,11);
   8994    UInt nn    = INSN(9,5);
   8995    UInt dd    = INSN(4,0);
   8996 
   8997    if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
   8998       /* -------- 0,0000 DUP (element, scalar) -------- */
   8999       IRTemp w0     = newTemp(Ity_I64);
   9000       const HChar* arTs = "??";
   9001       IRType laneTy = Ity_INVALID;
   9002       UInt   laneNo = 16; /* invalid */
   9003       if (imm5 & 1) {
   9004          arTs   = "b";
   9005          laneNo = (imm5 >> 1) & 15;
   9006          laneTy = Ity_I8;
   9007          assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
   9008       }
   9009       else if (imm5 & 2) {
   9010          arTs   = "h";
   9011          laneNo = (imm5 >> 2) & 7;
   9012          laneTy = Ity_I16;
   9013          assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
   9014       }
   9015       else if (imm5 & 4) {
   9016          arTs   = "s";
   9017          laneNo = (imm5 >> 3) & 3;
   9018          laneTy = Ity_I32;
   9019          assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
   9020       }
   9021       else if (imm5 & 8) {
   9022          arTs   = "d";
   9023          laneNo = (imm5 >> 4) & 1;
   9024          laneTy = Ity_I64;
   9025          assign(w0, getQRegLane(nn, laneNo, laneTy));
   9026       }
   9027       else {
   9028          /* invalid; leave laneTy unchanged. */
   9029       }
   9030       /* */
   9031       if (laneTy != Ity_INVALID) {
   9032          vassert(laneNo < 16);
   9033          putQReg128(dd, binop(Iop_64HLtoV128, mkU64(0), mkexpr(w0)));
   9034          DIP("dup %s, %s.%s[%u]\n",
   9035              nameQRegLO(dd, laneTy), nameQReg128(nn), arTs, laneNo);
   9036          return True;
   9037       }
   9038       /* else fall through */
   9039    }
   9040 
   9041    return False;
   9042 #  undef INSN
   9043 }
   9044 
   9045 
   9046 static
   9047 Bool dis_AdvSIMD_scalar_pairwise(/*MB_OUT*/DisResult* dres, UInt insn)
   9048 {
   9049    /* 31   28    23 21    16     11 9 4
   9050       01 u 11110 sz 11000 opcode 10 n d
   9051       Decode fields: u,sz,opcode
   9052    */
   9053 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   9054    if (INSN(31,30) != BITS2(0,1)
   9055        || INSN(28,24) != BITS5(1,1,1,1,0)
   9056        || INSN(21,17) != BITS5(1,1,0,0,0)
   9057        || INSN(11,10) != BITS2(1,0)) {
   9058       return False;
   9059    }
   9060    UInt bitU   = INSN(29,29);
   9061    UInt sz     = INSN(23,22);
   9062    UInt opcode = INSN(16,12);
   9063    UInt nn     = INSN(9,5);
   9064    UInt dd     = INSN(4,0);
   9065 
   9066    if (bitU == 0 && sz == X11 && opcode == BITS5(1,1,0,1,1)) {
   9067       /* -------- 0,11,11011 ADDP d_2d -------- */
   9068       IRTemp xy = newTempV128();
   9069       IRTemp xx = newTempV128();
   9070       assign(xy, getQReg128(nn));
   9071       assign(xx, binop(Iop_InterleaveHI64x2, mkexpr(xy), mkexpr(xy)));
   9072       putQReg128(dd, unop(Iop_ZeroHI64ofV128,
   9073                           binop(Iop_Add64x2, mkexpr(xy), mkexpr(xx))));
   9074       DIP("addp d%u, %s.2d\n", dd, nameQReg128(nn));
   9075       return True;
   9076    }
   9077 
   9078    if (bitU == 1 && sz <= X01 && opcode == BITS5(0,1,1,0,1)) {
   9079       /* -------- 1,00,01101 ADDP s_2s -------- */
   9080       /* -------- 1,01,01101 ADDP d_2d -------- */
   9081       Bool   isD   = sz == X01;
   9082       IROp   opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
   9083       IROp   opADD = mkVecADDF(isD ? 3 : 2);
   9084       IRTemp src   = newTempV128();
   9085       IRTemp argL  = newTempV128();
   9086       IRTemp argR  = newTempV128();
   9087       assign(src, getQReg128(nn));
   9088       assign(argL, unop(opZHI, mkexpr(src)));
   9089       assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
   9090                                                     mkU8(isD ? 8 : 4))));
   9091       putQReg128(dd, unop(opZHI,
   9092                           triop(opADD, mkexpr(mk_get_IR_rounding_mode()),
   9093                                               mkexpr(argL), mkexpr(argR))));
   9094       DIP(isD ? "faddp d%u, v%u.2d\n" : "faddp s%u, v%u.2s\n", dd, nn);
   9095       return True;
   9096    }
   9097 
   9098    if (bitU == 1
   9099        && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
   9100       /* -------- 1,0x,01100 FMAXNMP d_2d, s_2s -------- */
   9101       /* -------- 1,1x,01100 FMINNMP d_2d, s_2s -------- */
   9102       /* -------- 1,0x,01111 FMAXP   d_2d, s_2s -------- */
   9103       /* -------- 1,1x,01111 FMINP   d_2d, s_2s -------- */
   9104       /* FMAXNM, FMINNM: FIXME -- KLUDGED */
   9105       Bool   isD   = (sz & 1) == 1;
   9106       Bool   isMIN = (sz & 2) == 2;
   9107       Bool   isNM  = opcode == BITS5(0,1,1,0,0);
   9108       IROp   opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
   9109       IROp   opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
   9110       IRTemp src   = newTempV128();
   9111       IRTemp argL  = newTempV128();
   9112       IRTemp argR  = newTempV128();
   9113       assign(src, getQReg128(nn));
   9114       assign(argL, unop(opZHI, mkexpr(src)));
   9115       assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
   9116                                                     mkU8(isD ? 8 : 4))));
   9117       putQReg128(dd, unop(opZHI,
   9118                           binop(opMXX, mkexpr(argL), mkexpr(argR))));
   9119       HChar c = isD ? 'd' : 's';
   9120       DIP("%s%sp %c%u, v%u.2%c\n",
   9121            isMIN ? "fmin" : "fmax", isNM ? "nm" : "", c, dd, nn, c);
   9122       return True;
   9123    }
   9124 
   9125    return False;
   9126 #  undef INSN
   9127 }
   9128 
   9129 
   9130 static
   9131 Bool dis_AdvSIMD_scalar_shift_by_imm(/*MB_OUT*/DisResult* dres, UInt insn)
   9132 {
   9133    /* 31   28     22   18   15     10 9 4
   9134       01 u 111110 immh immb opcode 1  n d
   9135       Decode fields: u,immh,opcode
   9136    */
   9137 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   9138    if (INSN(31,30) != BITS2(0,1)
   9139        || INSN(28,23) != BITS6(1,1,1,1,1,0) || INSN(10,10) != 1) {
   9140       return False;
   9141    }
   9142    UInt bitU   = INSN(29,29);
   9143    UInt immh   = INSN(22,19);
   9144    UInt immb   = INSN(18,16);
   9145    UInt opcode = INSN(15,11);
   9146    UInt nn     = INSN(9,5);
   9147    UInt dd     = INSN(4,0);
   9148    UInt immhb  = (immh << 3) | immb;
   9149 
   9150    if ((immh & 8) == 8
   9151        && (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0))) {
   9152       /* -------- 0,1xxx,00000 SSHR d_d_#imm -------- */
   9153       /* -------- 1,1xxx,00000 USHR d_d_#imm -------- */
   9154       /* -------- 0,1xxx,00010 SSRA d_d_#imm -------- */
   9155       /* -------- 1,1xxx,00010 USRA d_d_#imm -------- */
   9156       Bool isU   = bitU == 1;
   9157       Bool isAcc = opcode == BITS5(0,0,0,1,0);
   9158       UInt sh    = 128 - immhb;
   9159       vassert(sh >= 1 && sh <= 64);
   9160       IROp    op  = isU ? Iop_ShrN64x2 : Iop_SarN64x2;
   9161       IRExpr* src = getQReg128(nn);
   9162       IRTemp  shf = newTempV128();
   9163       IRTemp  res = newTempV128();
   9164       if (sh == 64 && isU) {
   9165          assign(shf, mkV128(0x0000));
   9166       } else {
   9167          UInt nudge = 0;
   9168          if (sh == 64) {
   9169             vassert(!isU);
   9170             nudge = 1;
   9171          }
   9172          assign(shf, binop(op, src, mkU8(sh - nudge)));
   9173       }
   9174       assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
   9175                         : mkexpr(shf));
   9176       putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
   9177       const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
   9178                               : (isU ? "ushr" : "sshr");
   9179       DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
   9180       return True;
   9181    }
   9182 
   9183    if ((immh & 8) == 8
   9184        && (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0))) {
   9185       /* -------- 0,1xxx,00100 SRSHR d_d_#imm -------- */
   9186       /* -------- 1,1xxx,00100 URSHR d_d_#imm -------- */
   9187       /* -------- 0,1xxx,00110 SRSRA d_d_#imm -------- */
   9188       /* -------- 1,1xxx,00110 URSRA d_d_#imm -------- */
   9189       Bool isU   = bitU == 1;
   9190       Bool isAcc = opcode == BITS5(0,0,1,1,0);
   9191       UInt sh    = 128 - immhb;
   9192       vassert(sh >= 1 && sh <= 64);
   9193       IROp    op  = isU ? Iop_Rsh64Ux2 : Iop_Rsh64Sx2;
   9194       vassert(sh >= 1 && sh <= 64);
   9195       IRExpr* src  = getQReg128(nn);
   9196       IRTemp  imm8 = newTemp(Ity_I8);
   9197       assign(imm8, mkU8((UChar)(-sh)));
   9198       IRExpr* amt  = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
   9199       IRTemp  shf  = newTempV128();
   9200       IRTemp  res  = newTempV128();
   9201       assign(shf, binop(op, src, amt));
   9202       assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
   9203                         : mkexpr(shf));
   9204       putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
   9205       const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
   9206                               : (isU ? "urshr" : "srshr");
   9207       DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
   9208       return True;
   9209    }
   9210 
   9211    if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,0,0)) {
   9212       /* -------- 1,1xxx,01000 SRI d_d_#imm -------- */
   9213       UInt sh = 128 - immhb;
   9214       vassert(sh >= 1 && sh <= 64);
   9215       if (sh == 64) {
   9216          putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
   9217       } else {
   9218          /* sh is in range 1 .. 63 */
   9219          ULong   nmask  = (ULong)(((Long)0x8000000000000000ULL) >> (sh-1));
   9220          IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
   9221          IRTemp  res    = newTempV128();
   9222          assign(res, binop(Iop_OrV128,
   9223                            binop(Iop_AndV128, getQReg128(dd), nmaskV),
   9224                            binop(Iop_ShrN64x2, getQReg128(nn), mkU8(sh))));
   9225          putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
   9226       }
   9227       DIP("sri d%u, d%u, #%u\n", dd, nn, sh);
   9228       return True;
   9229    }
   9230 
   9231    if (bitU == 0 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
   9232       /* -------- 0,1xxx,01010 SHL d_d_#imm -------- */
   9233       UInt sh = immhb - 64;
   9234       vassert(sh >= 0 && sh < 64);
   9235       putQReg128(dd,
   9236                  unop(Iop_ZeroHI64ofV128,
   9237                       sh == 0 ? getQReg128(nn)
   9238                               : binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
   9239       DIP("shl d%u, d%u, #%u\n", dd, nn, sh);
   9240       return True;
   9241    }
   9242 
   9243    if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
   9244       /* -------- 1,1xxx,01010 SLI d_d_#imm -------- */
   9245       UInt sh = immhb - 64;
   9246       vassert(sh >= 0 && sh < 64);
   9247       if (sh == 0) {
   9248          putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(nn)));
   9249       } else {
   9250          /* sh is in range 1 .. 63 */
   9251          ULong   nmask  = (1ULL << sh) - 1;
   9252          IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
   9253          IRTemp  res    = newTempV128();
   9254          assign(res, binop(Iop_OrV128,
   9255                            binop(Iop_AndV128, getQReg128(dd), nmaskV),
   9256                            binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
   9257          putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
   9258       }
   9259       DIP("sli d%u, d%u, #%u\n", dd, nn, sh);
   9260       return True;
   9261    }
   9262 
   9263    if (opcode == BITS5(0,1,1,1,0)
   9264        || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
   9265       /* -------- 0,01110  SQSHL  #imm -------- */
   9266       /* -------- 1,01110  UQSHL  #imm -------- */
   9267       /* -------- 1,01100  SQSHLU #imm -------- */
   9268       UInt size  = 0;
   9269       UInt shift = 0;
   9270       Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
   9271       if (!ok) return False;
   9272       vassert(size >= 0 && size <= 3);
   9273       /* The shift encoding has opposite sign for the leftwards case.
   9274          Adjust shift to compensate. */
   9275       UInt lanebits = 8 << size;
   9276       shift = lanebits - shift;
   9277       vassert(shift >= 0 && shift < lanebits);
   9278       const HChar* nm = NULL;
   9279       /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
   9280       else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
   9281       else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
   9282       else vassert(0);
   9283       IRTemp qDiff1 = IRTemp_INVALID;
   9284       IRTemp qDiff2 = IRTemp_INVALID;
   9285       IRTemp res = IRTemp_INVALID;
   9286       IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn));
   9287       /* This relies on the fact that the zeroed out lanes generate zeroed
   9288          result lanes and don't saturate, so there's no point in trimming
   9289          the resulting res, qDiff1 or qDiff2 values. */
   9290       math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
   9291       putQReg128(dd, mkexpr(res));
   9292       updateQCFLAGwithDifference(qDiff1, qDiff2);
   9293       const HChar arr = "bhsd"[size];
   9294       DIP("%s %c%u, %c%u, #%u\n", nm, arr, dd, arr, nn, shift);
   9295       return True;
   9296    }
   9297 
   9298    if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
   9299        || (bitU == 1
   9300            && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
   9301       /* -------- 0,10010   SQSHRN #imm -------- */
   9302       /* -------- 1,10010   UQSHRN #imm -------- */
   9303       /* -------- 0,10011  SQRSHRN #imm -------- */
   9304       /* -------- 1,10011  UQRSHRN #imm -------- */
   9305       /* -------- 1,10000  SQSHRUN #imm -------- */
   9306       /* -------- 1,10001 SQRSHRUN #imm -------- */
   9307       UInt size  = 0;
   9308       UInt shift = 0;
   9309       Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
   9310       if (!ok || size == X11) return False;
   9311       vassert(size >= X00 && size <= X10);
   9312       vassert(shift >= 1 && shift <= (8 << size));
   9313       const HChar* nm = "??";
   9314       IROp op = Iop_INVALID;
   9315       /* Decide on the name and the operation. */
   9316       /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
   9317          nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
   9318       }
   9319       else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
   9320          nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
   9321       }
   9322       else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
   9323          nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
   9324       }
   9325       else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
   9326          nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
   9327       }
   9328       else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
   9329          nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
   9330       }
   9331       else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
   9332          nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
   9333       }
   9334       else vassert(0);
   9335       /* Compute the result (Q, shifted value) pair. */
   9336       IRTemp src128 = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size+1, getQReg128(nn));
   9337       IRTemp pair   = newTempV128();
   9338       assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
   9339       /* Update the result reg */
   9340       IRTemp res64in128 = newTempV128();
   9341       assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
   9342       putQReg128(dd, mkexpr(res64in128));
   9343       /* Update the Q flag. */
   9344       IRTemp q64q64 = newTempV128();
   9345       assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
   9346       IRTemp z128 = newTempV128();
   9347       assign(z128, mkV128(0x0000));
   9348       updateQCFLAGwithDifference(q64q64, z128);
   9349       /* */
   9350       const HChar arrNarrow = "bhsd"[size];
   9351       const HChar arrWide   = "bhsd"[size+1];
   9352       DIP("%s %c%u, %c%u, #%u\n", nm, arrNarrow, dd, arrWide, nn, shift);
   9353       return True;
   9354    }
   9355 
   9356    if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,0,0)) {
   9357       /* -------- 0,!=00xx,11100 SCVTF d_d_imm, s_s_imm -------- */
   9358       /* -------- 1,!=00xx,11100 UCVTF d_d_imm, s_s_imm -------- */
   9359       UInt size  = 0;
   9360       UInt fbits = 0;
   9361       Bool ok    = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
   9362       /* The following holds because immh is never zero. */
   9363       vassert(ok);
   9364       /* The following holds because immh >= 0100. */
   9365       vassert(size == X10 || size == X11);
   9366       Bool isD = size == X11;
   9367       Bool isU = bitU == 1;
   9368       vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
   9369       Double  scale  = two_to_the_minus(fbits);
   9370       IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
   9371                              : IRExpr_Const(IRConst_F32( (Float)scale ));
   9372       IROp    opMUL  = isD ? Iop_MulF64 : Iop_MulF32;
   9373       IROp    opCVT  = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
   9374                            : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
   9375       IRType tyF = isD ? Ity_F64 : Ity_F32;
   9376       IRType tyI = isD ? Ity_I64 : Ity_I32;
   9377       IRTemp src = newTemp(tyI);
   9378       IRTemp res = newTemp(tyF);
   9379       IRTemp rm  = mk_get_IR_rounding_mode();
   9380       assign(src, getQRegLane(nn, 0, tyI));
   9381       assign(res, triop(opMUL, mkexpr(rm),
   9382                                binop(opCVT, mkexpr(rm), mkexpr(src)), scaleE));
   9383       putQRegLane(dd, 0, mkexpr(res));
   9384       if (!isD) {
   9385          putQRegLane(dd, 1, mkU32(0));
   9386       }
   9387       putQRegLane(dd, 1, mkU64(0));
   9388       const HChar ch = isD ? 'd' : 's';
   9389       DIP("%s %c%u, %c%u, #%u\n", isU ? "ucvtf" : "scvtf",
   9390           ch, dd, ch, nn, fbits);
   9391       return True;
   9392    }
   9393 
   9394    if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,1,1)) {
   9395       /* -------- 0,!=00xx,11111 FCVTZS d_d_imm, s_s_imm -------- */
   9396       /* -------- 1,!=00xx,11111 FCVTZU d_d_imm, s_s_imm -------- */
   9397       UInt size  = 0;
   9398       UInt fbits = 0;
   9399       Bool ok    = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
   9400       /* The following holds because immh is never zero. */
   9401       vassert(ok);
   9402       /* The following holds because immh >= 0100. */
   9403       vassert(size == X10 || size == X11);
   9404       Bool isD = size == X11;
   9405       Bool isU = bitU == 1;
   9406       vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
   9407       Double  scale  = two_to_the_plus(fbits);
   9408       IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
   9409                            : IRExpr_Const(IRConst_F32( (Float)scale ));
   9410       IROp    opMUL  = isD ? Iop_MulF64 : Iop_MulF32;
   9411       IROp    opCVT  = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
   9412                            : (isD ? Iop_F64toI64S : Iop_F32toI32S);
   9413       IRType tyF = isD ? Ity_F64 : Ity_F32;
   9414       IRType tyI = isD ? Ity_I64 : Ity_I32;
   9415       IRTemp src = newTemp(tyF);
   9416       IRTemp res = newTemp(tyI);
   9417       IRTemp rm  = newTemp(Ity_I32);
   9418       assign(src, getQRegLane(nn, 0, tyF));
   9419       assign(rm,  mkU32(Irrm_ZERO));
   9420       assign(res, binop(opCVT, mkexpr(rm),
   9421                                triop(opMUL, mkexpr(rm), mkexpr(src), scaleE)));
   9422       putQRegLane(dd, 0, mkexpr(res));
   9423       if (!isD) {
   9424          putQRegLane(dd, 1, mkU32(0));
   9425       }
   9426       putQRegLane(dd, 1, mkU64(0));
   9427       const HChar ch = isD ? 'd' : 's';
   9428       DIP("%s %c%u, %c%u, #%u\n", isU ? "fcvtzu" : "fcvtzs",
   9429           ch, dd, ch, nn, fbits);
   9430       return True;
   9431    }
   9432 
   9433 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   9434    return False;
   9435 #  undef INSN
   9436 }
   9437 
   9438 
   9439 static
   9440 Bool dis_AdvSIMD_scalar_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
   9441 {
   9442    /* 31 29 28    23   21 20 15     11 9 4
   9443       01 U  11110 size 1  m  opcode 00 n d
   9444       Decode fields: u,opcode
   9445    */
   9446 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   9447    if (INSN(31,30) != BITS2(0,1)
   9448        || INSN(28,24) != BITS5(1,1,1,1,0)
   9449        || INSN(21,21) != 1
   9450        || INSN(11,10) != BITS2(0,0)) {
   9451       return False;
   9452    }
   9453    UInt bitU   = INSN(29,29);
   9454    UInt size   = INSN(23,22);
   9455    UInt mm     = INSN(20,16);
   9456    UInt opcode = INSN(15,12);
   9457    UInt nn     = INSN(9,5);
   9458    UInt dd     = INSN(4,0);
   9459    vassert(size < 4);
   9460 
   9461    if (bitU == 0
   9462        && (opcode == BITS4(1,1,0,1)
   9463            || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
   9464       /* -------- 0,1101  SQDMULL -------- */ // 0 (ks)
   9465       /* -------- 0,1001  SQDMLAL -------- */ // 1
   9466       /* -------- 0,1011  SQDMLSL -------- */ // 2
   9467       /* Widens, and size refers to the narrowed lanes. */
   9468       UInt ks = 3;
   9469       switch (opcode) {
   9470          case BITS4(1,1,0,1): ks = 0; break;
   9471          case BITS4(1,0,0,1): ks = 1; break;
   9472          case BITS4(1,0,1,1): ks = 2; break;
   9473          default: vassert(0);
   9474       }
   9475       vassert(ks >= 0 && ks <= 2);
   9476       if (size == X00 || size == X11) return False;
   9477       vassert(size <= 2);
   9478       IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
   9479       vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
   9480       newTempsV128_3(&vecN, &vecM, &vecD);
   9481       assign(vecN, getQReg128(nn));
   9482       assign(vecM, getQReg128(mm));
   9483       assign(vecD, getQReg128(dd));
   9484       math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
   9485                        False/*!is2*/, size, "mas"[ks],
   9486                        vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
   9487       IROp opZHI = mkVecZEROHIxxOFV128(size+1);
   9488       putQReg128(dd, unop(opZHI, mkexpr(res)));
   9489       vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
   9490       updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
   9491       if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
   9492          updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
   9493       }
   9494       const HChar* nm        = ks == 0 ? "sqdmull"
   9495                                        : (ks == 1 ? "sqdmlal" : "sqdmlsl");
   9496       const HChar  arrNarrow = "bhsd"[size];
   9497       const HChar  arrWide   = "bhsd"[size+1];
   9498       DIP("%s %c%d, %c%d, %c%d\n",
   9499           nm, arrWide, dd, arrNarrow, nn, arrNarrow, mm);
   9500       return True;
   9501    }
   9502 
   9503    return False;
   9504 #  undef INSN
   9505 }
   9506 
   9507 
   9508 static
   9509 Bool dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
   9510 {
   9511    /* 31 29 28    23   21 20 15     10 9 4
   9512       01 U  11110 size 1  m  opcode 1  n d
   9513       Decode fields: u,size,opcode
   9514    */
   9515 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   9516    if (INSN(31,30) != BITS2(0,1)
   9517        || INSN(28,24) != BITS5(1,1,1,1,0)
   9518        || INSN(21,21) != 1
   9519        || INSN(10,10) != 1) {
   9520       return False;
   9521    }
   9522    UInt bitU   = INSN(29,29);
   9523    UInt size   = INSN(23,22);
   9524    UInt mm     = INSN(20,16);
   9525    UInt opcode = INSN(15,11);
   9526    UInt nn     = INSN(9,5);
   9527    UInt dd     = INSN(4,0);
   9528    vassert(size < 4);
   9529 
   9530    if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
   9531       /* -------- 0,xx,00001 SQADD std4_std4_std4 -------- */
   9532       /* -------- 1,xx,00001 UQADD std4_std4_std4 -------- */
   9533       /* -------- 0,xx,00101 SQSUB std4_std4_std4 -------- */
   9534       /* -------- 1,xx,00101 UQSUB std4_std4_std4 -------- */
   9535       Bool isADD = opcode == BITS5(0,0,0,0,1);
   9536       Bool isU   = bitU == 1;
   9537       IROp qop   = Iop_INVALID;
   9538       IROp nop   = Iop_INVALID;
   9539       if (isADD) {
   9540          qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
   9541          nop = mkVecADD(size);
   9542       } else {
   9543          qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
   9544          nop = mkVecSUB(size);
   9545       }
   9546       IRTemp argL = newTempV128();
   9547       IRTemp argR = newTempV128();
   9548       IRTemp qres = newTempV128();
   9549       IRTemp nres = newTempV128();
   9550       assign(argL, getQReg128(nn));
   9551       assign(argR, getQReg128(mm));
   9552       assign(qres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
   9553                              size, binop(qop, mkexpr(argL), mkexpr(argR)))));
   9554       assign(nres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
   9555                              size, binop(nop, mkexpr(argL), mkexpr(argR)))));
   9556       putQReg128(dd, mkexpr(qres));
   9557       updateQCFLAGwithDifference(qres, nres);
   9558       const HChar* nm  = isADD ? (isU ? "uqadd" : "sqadd")
   9559                                : (isU ? "uqsub" : "sqsub");
   9560       const HChar  arr = "bhsd"[size];
   9561       DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
   9562       return True;
   9563    }
   9564 
   9565    if (size == X11 && opcode == BITS5(0,0,1,1,0)) {
   9566       /* -------- 0,11,00110 CMGT d_d_d -------- */ // >s
   9567       /* -------- 1,11,00110 CMHI d_d_d -------- */ // >u
   9568       Bool    isGT = bitU == 0;
   9569       IRExpr* argL = getQReg128(nn);
   9570       IRExpr* argR = getQReg128(mm);
   9571       IRTemp  res  = newTempV128();
   9572       assign(res,
   9573              isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
   9574                   : binop(Iop_CmpGT64Ux2, argL, argR));
   9575       putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
   9576       DIP("%s %s, %s, %s\n",isGT ? "cmgt" : "cmhi",
   9577           nameQRegLO(dd, Ity_I64),
   9578           nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
   9579       return True;
   9580    }
   9581 
   9582    if (size == X11 && opcode == BITS5(0,0,1,1,1)) {
   9583       /* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s
   9584       /* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u
   9585       Bool    isGE = bitU == 0;
   9586       IRExpr* argL = getQReg128(nn);
   9587       IRExpr* argR = getQReg128(mm);
   9588       IRTemp  res  = newTempV128();
   9589       assign(res,
   9590              isGE ? unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL))
   9591                   : unop(Iop_NotV128, binop(Iop_CmpGT64Ux2, argR, argL)));
   9592       putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
   9593       DIP("%s %s, %s, %s\n", isGE ? "cmge" : "cmhs",
   9594           nameQRegLO(dd, Ity_I64),
   9595           nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
   9596       return True;
   9597    }
   9598 
   9599    if (size == X11 && (opcode == BITS5(0,1,0,0,0)
   9600                        || opcode == BITS5(0,1,0,1,0))) {
   9601       /* -------- 0,xx,01000 SSHL  d_d_d -------- */
   9602       /* -------- 0,xx,01010 SRSHL d_d_d -------- */
   9603       /* -------- 1,xx,01000 USHL  d_d_d -------- */
   9604       /* -------- 1,xx,01010 URSHL d_d_d -------- */
   9605       Bool isU = bitU == 1;
   9606       Bool isR = opcode == BITS5(0,1,0,1,0);
   9607       IROp op  = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
   9608                      : (isU ? mkVecSHU(size)  : mkVecSHS(size));
   9609       IRTemp res = newTempV128();
   9610       assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
   9611       putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
   9612       const HChar* nm  = isR ? (isU ? "urshl" : "srshl")
   9613                              : (isU ? "ushl"  : "sshl");
   9614       DIP("%s %s, %s, %s\n", nm,
   9615           nameQRegLO(dd, Ity_I64),
   9616           nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
   9617       return True;
   9618    }
   9619 
   9620    if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
   9621       /* -------- 0,xx,01001 SQSHL  std4_std4_std4 -------- */
   9622       /* -------- 0,xx,01011 SQRSHL std4_std4_std4 -------- */
   9623       /* -------- 1,xx,01001 UQSHL  std4_std4_std4 -------- */
   9624       /* -------- 1,xx,01011 UQRSHL std4_std4_std4 -------- */
   9625       Bool isU = bitU == 1;
   9626       Bool isR = opcode == BITS5(0,1,0,1,1);
   9627       IROp op  = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
   9628                      : (isU ? mkVecQANDUQSH(size)  : mkVecQANDSQSH(size));
   9629       /* This is a bit tricky.  Since we're only interested in the lowest
   9630          lane of the result, we zero out all the rest in the operands, so
   9631          as to ensure that other lanes don't pollute the returned Q value.
   9632          This works because it means, for the lanes we don't care about, we
   9633          are shifting zero by zero, which can never saturate. */
   9634       IRTemp res256 = newTemp(Ity_V256);
   9635       IRTemp resSH  = newTempV128();
   9636       IRTemp resQ   = newTempV128();
   9637       IRTemp zero   = newTempV128();
   9638       assign(
   9639          res256,
   9640          binop(op,
   9641                mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn))),
   9642                mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(mm)))));
   9643       assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
   9644       assign(resQ,  unop(Iop_V256toV128_1, mkexpr(res256)));
   9645       assign(zero,  mkV128(0x0000));
   9646       putQReg128(dd, mkexpr(resSH));
   9647       updateQCFLAGwithDifference(resQ, zero);
   9648       const HChar* nm  = isR ? (isU ? "uqrshl" : "sqrshl")
   9649                              : (isU ? "uqshl"  : "sqshl");
   9650       const HChar  arr = "bhsd"[size];
   9651       DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
   9652       return True;
   9653    }
   9654 
   9655    if (size == X11 && opcode == BITS5(1,0,0,0,0)) {
   9656       /* -------- 0,11,10000 ADD d_d_d -------- */
   9657       /* -------- 1,11,10000 SUB d_d_d -------- */
   9658       Bool   isSUB = bitU == 1;
   9659       IRTemp res   = newTemp(Ity_I64);
   9660       assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
   9661                         getQRegLane(nn, 0, Ity_I64),
   9662                         getQRegLane(mm, 0, Ity_I64)));
   9663       putQRegLane(dd, 0, mkexpr(res));
   9664       putQRegLane(dd, 1, mkU64(0));
   9665       DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
   9666           nameQRegLO(dd, Ity_I64),
   9667           nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
   9668       return True;
   9669    }
   9670 
   9671    if (size == X11 && opcode == BITS5(1,0,0,0,1)) {
   9672       /* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0
   9673       /* -------- 1,11,10001 CMEQ  d_d_d -------- */ // ==
   9674       Bool    isEQ = bitU == 1;
   9675       IRExpr* argL = getQReg128(nn);
   9676       IRExpr* argR = getQReg128(mm);
   9677       IRTemp  res  = newTempV128();
   9678       assign(res,
   9679              isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
   9680                   : unop(Iop_NotV128, binop(Iop_CmpEQ64x2,
   9681                                             binop(Iop_AndV128, argL, argR),
   9682                                             mkV128(0x0000))));
   9683       putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
   9684       DIP("%s %s, %s, %s\n", isEQ ? "cmeq" : "cmtst",
   9685           nameQRegLO(dd, Ity_I64),
   9686           nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
   9687       return True;
   9688    }
   9689 
   9690    if (opcode == BITS5(1,0,1,1,0)) {
   9691       /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
   9692       /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
   9693       if (size == X00 || size == X11) return False;
   9694       Bool isR = bitU == 1;
   9695       IRTemp res, sat1q, sat1n, vN, vM;
   9696       res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
   9697       newTempsV128_2(&vN, &vM);
   9698       assign(vN, getQReg128(nn));
   9699       assign(vM, getQReg128(mm));
   9700       math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
   9701       putQReg128(dd,
   9702                  mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res))));
   9703       updateQCFLAGwithDifference(
   9704          math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1q)),
   9705          math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1n)));
   9706       const HChar  arr = "bhsd"[size];
   9707       const HChar* nm  = isR ? "sqrdmulh" : "sqdmulh";
   9708       DIP("%s %c%d, %c%d, %c%d\n", nm, arr, dd, arr, nn, arr, mm);
   9709       return True;
   9710    }
   9711 
   9712    if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
   9713       /* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */
   9714       IRType ity = size == X11 ? Ity_F64 : Ity_F32;
   9715       IRTemp res = newTemp(ity);
   9716       assign(res, unop(mkABSF(ity),
   9717                        triop(mkSUBF(ity),
   9718                              mkexpr(mk_get_IR_rounding_mode()),
   9719                              getQRegLO(nn,ity), getQRegLO(mm,ity))));
   9720       putQReg128(dd, mkV128(0x0000));
   9721       putQRegLO(dd, mkexpr(res));
   9722       DIP("fabd %s, %s, %s\n",
   9723           nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
   9724       return True;
   9725    }
   9726 
   9727    if (bitU == 0 && size <= X01 && opcode == BITS5(1,1,0,1,1)) {
   9728       /* -------- 0,0x,11011 FMULX d_d_d, s_s_s -------- */
   9729       // KLUDGE: FMULX is treated the same way as FMUL.  That can't be right.
   9730       IRType ity = size == X01 ? Ity_F64 : Ity_F32;
   9731       IRTemp res = newTemp(ity);
   9732       assign(res, triop(mkMULF(ity),
   9733                         mkexpr(mk_get_IR_rounding_mode()),
   9734                         getQRegLO(nn,ity), getQRegLO(mm,ity)));
   9735       putQReg128(dd, mkV128(0x0000));
   9736       putQRegLO(dd, mkexpr(res));
   9737       DIP("fmulx %s, %s, %s\n",
   9738           nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
   9739       return True;
   9740    }
   9741 
   9742    if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
   9743       /* -------- 0,0x,11100 FCMEQ d_d_d, s_s_s -------- */
   9744       /* -------- 1,0x,11100 FCMGE d_d_d, s_s_s -------- */
   9745       Bool   isD   = size == X01;
   9746       IRType ity   = isD ? Ity_F64 : Ity_F32;
   9747       Bool   isGE  = bitU == 1;
   9748       IROp   opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
   9749                           : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
   9750       IRTemp res   = newTempV128();
   9751       assign(res, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
   9752                        : binop(opCMP, getQReg128(nn), getQReg128(mm)));
   9753       putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
   9754                                                              mkexpr(res))));
   9755       DIP("%s %s, %s, %s\n", isGE ? "fcmge" : "fcmeq",
   9756           nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
   9757       return True;
   9758    }
   9759 
   9760    if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
   9761       /* -------- 1,1x,11100 FCMGT d_d_d, s_s_s -------- */
   9762       Bool   isD   = size == X11;
   9763       IRType ity   = isD ? Ity_F64 : Ity_F32;
   9764       IROp   opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
   9765       IRTemp res   = newTempV128();
   9766       assign(res, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
   9767       putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
   9768                                                              mkexpr(res))));
   9769       DIP("%s %s, %s, %s\n", "fcmgt",
   9770           nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
   9771       return True;
   9772    }
   9773 
   9774    if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
   9775       /* -------- 1,0x,11101 FACGE d_d_d, s_s_s -------- */
   9776       /* -------- 1,1x,11101 FACGT d_d_d, s_s_s -------- */
   9777       Bool   isD   = (size & 1) == 1;
   9778       IRType ity   = isD ? Ity_F64 : Ity_F32;
   9779       Bool   isGT  = (size & 2) == 2;
   9780       IROp   opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
   9781                           : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
   9782       IROp   opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
   9783       IRTemp res   = newTempV128();
   9784       assign(res, binop(opCMP, unop(opABS, getQReg128(mm)),
   9785                                unop(opABS, getQReg128(nn)))); // swapd
   9786       putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
   9787                                                              mkexpr(res))));
   9788       DIP("%s %s, %s, %s\n", isGT ? "facgt" : "facge",
   9789           nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
   9790       return True;
   9791    }
   9792 
   9793    if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
   9794       /* -------- 0,0x,11111: FRECPS  d_d_d, s_s_s -------- */
   9795       /* -------- 0,1x,11111: FRSQRTS d_d_d, s_s_s -------- */
   9796       Bool isSQRT = (size & 2) == 2;
   9797       Bool isD    = (size & 1) == 1;
   9798       IROp op     = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
   9799                            : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
   9800       IRTemp res = newTempV128();
   9801       assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
   9802       putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
   9803                                                              mkexpr(res))));
   9804       HChar c = isD ? 'd' : 's';
   9805       DIP("%s %c%u, %c%u, %c%u\n", isSQRT ? "frsqrts" : "frecps",
   9806           c, dd, c, nn, c, mm);
   9807       return True;
   9808    }
   9809 
   9810    return False;
   9811 #  undef INSN
   9812 }
   9813 
   9814 
   9815 static
   9816 Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
   9817 {
   9818    /* 31 29 28    23   21    16     11 9 4
   9819       01 U  11110 size 10000 opcode 10 n d
   9820       Decode fields: u,size,opcode
   9821    */
   9822 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   9823    if (INSN(31,30) != BITS2(0,1)
   9824        || INSN(28,24) != BITS5(1,1,1,1,0)
   9825        || INSN(21,17) != BITS5(1,0,0,0,0)
   9826        || INSN(11,10) != BITS2(1,0)) {
   9827       return False;
   9828    }
   9829    UInt bitU   = INSN(29,29);
   9830    UInt size   = INSN(23,22);
   9831    UInt opcode = INSN(16,12);
   9832    UInt nn     = INSN(9,5);
   9833    UInt dd     = INSN(4,0);
   9834    vassert(size < 4);
   9835 
   9836    if (opcode == BITS5(0,0,0,1,1)) {
   9837       /* -------- 0,xx,00011: SUQADD std4_std4 -------- */
   9838       /* -------- 1,xx,00011: USQADD std4_std4 -------- */
   9839       /* These are a bit tricky (to say the least).  See comments on
   9840          the vector variants (in dis_AdvSIMD_two_reg_misc) below for
   9841          details. */
   9842       Bool   isUSQADD = bitU == 1;
   9843       IROp   qop  = isUSQADD ? mkVecQADDEXTSUSATUU(size)
   9844                              : mkVecQADDEXTUSSATSS(size);
   9845       IROp   nop  = mkVecADD(size);
   9846       IRTemp argL = newTempV128();
   9847       IRTemp argR = newTempV128();
   9848       assign(argL, getQReg128(nn));
   9849       assign(argR, getQReg128(dd));
   9850       IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
   9851                        size, binop(qop, mkexpr(argL), mkexpr(argR)));
   9852       IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
   9853                        size, binop(nop, mkexpr(argL), mkexpr(argR)));
   9854       putQReg128(dd, mkexpr(qres));
   9855       updateQCFLAGwithDifference(qres, nres);
   9856       const HChar arr = "bhsd"[size];
   9857       DIP("%s %c%u, %c%u\n", isUSQADD ? "usqadd" : "suqadd", arr, dd, arr, nn);
   9858       return True;
   9859    }
   9860 
   9861    if (opcode == BITS5(0,0,1,1,1)) {
   9862       /* -------- 0,xx,00111 SQABS std4_std4 -------- */
   9863       /* -------- 1,xx,00111 SQNEG std4_std4 -------- */
   9864       Bool isNEG = bitU == 1;
   9865       IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
   9866       (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
   9867                                          getQReg128(nn), size );
   9868       IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(qresFW));
   9869       IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(nresFW));
   9870       putQReg128(dd, mkexpr(qres));
   9871       updateQCFLAGwithDifference(qres, nres);
   9872       const HChar arr = "bhsd"[size];
   9873       DIP("%s %c%u, %c%u\n", isNEG ? "sqneg" : "sqabs", arr, dd, arr, nn);
   9874       return True;
   9875    }
   9876 
   9877    if (size == X11 && opcode == BITS5(0,1,0,0,0)) {
   9878       /* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0
   9879       /* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0
   9880       Bool    isGT = bitU == 0;
   9881       IRExpr* argL = getQReg128(nn);
   9882       IRExpr* argR = mkV128(0x0000);
   9883       IRTemp  res  = newTempV128();
   9884       assign(res, isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
   9885                        : unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL)));
   9886       putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
   9887       DIP("cm%s d%u, d%u, #0\n", isGT ? "gt" : "ge", dd, nn);
   9888       return True;
   9889    }
   9890 
   9891    if (size == X11 && opcode == BITS5(0,1,0,0,1)) {
   9892       /* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0
   9893       /* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0
   9894       Bool    isEQ = bitU == 0;
   9895       IRExpr* argL = getQReg128(nn);
   9896       IRExpr* argR = mkV128(0x0000);
   9897       IRTemp  res  = newTempV128();
   9898       assign(res, isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
   9899                        : unop(Iop_NotV128,
   9900                               binop(Iop_CmpGT64Sx2, argL, argR)));
   9901       putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
   9902       DIP("cm%s d%u, d%u, #0\n", isEQ ? "eq" : "le", dd, nn);
   9903       return True;
   9904    }
   9905 
   9906    if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,0)) {
   9907       /* -------- 0,11,01010: CMLT d_d_#0 -------- */ // <s 0
   9908       putQReg128(dd, unop(Iop_ZeroHI64ofV128,
   9909                           binop(Iop_CmpGT64Sx2, mkV128(0x0000),
   9910                                                 getQReg128(nn))));
   9911       DIP("cm%s d%u, d%u, #0\n", "lt", dd, nn);
   9912       return True;
   9913    }
   9914 
   9915    if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
   9916       /* -------- 0,11,01011 ABS d_d -------- */
   9917       putQReg128(dd, unop(Iop_ZeroHI64ofV128,
   9918                           unop(Iop_Abs64x2, getQReg128(nn))));
   9919       DIP("abs d%u, d%u\n", dd, nn);
   9920       return True;
   9921    }
   9922 
   9923    if (bitU == 1 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
   9924       /* -------- 1,11,01011 NEG d_d -------- */
   9925       putQReg128(dd, unop(Iop_ZeroHI64ofV128,
   9926                           binop(Iop_Sub64x2, mkV128(0x0000), getQReg128(nn))));
   9927       DIP("neg d%u, d%u\n", dd, nn);
   9928       return True;
   9929    }
   9930 
   9931    UInt ix = 0; /*INVALID*/
   9932    if (size >= X10) {
   9933       switch (opcode) {
   9934          case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
   9935          case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
   9936          case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
   9937          default: break;
   9938       }
   9939    }
   9940    if (ix > 0) {
   9941       /* -------- 0,1x,01100 FCMGT d_d_#0.0, s_s_#0.0 (ix 1) -------- */
   9942       /* -------- 0,1x,01101 FCMEQ d_d_#0.0, s_s_#0.0 (ix 2) -------- */
   9943       /* -------- 0,1x,01110 FCMLT d_d_#0.0, s_s_#0.0 (ix 3) -------- */
   9944       /* -------- 1,1x,01100 FCMGE d_d_#0.0, s_s_#0.0 (ix 4) -------- */
   9945       /* -------- 1,1x,01101 FCMLE d_d_#0.0, s_s_#0.0 (ix 5) -------- */
   9946       Bool   isD     = size == X11;
   9947       IRType ity     = isD ? Ity_F64 : Ity_F32;
   9948       IROp   opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
   9949       IROp   opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
   9950       IROp   opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
   9951       IROp   opCmp   = Iop_INVALID;
   9952       Bool   swap    = False;
   9953       const HChar* nm = "??";
   9954       switch (ix) {
   9955          case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
   9956          case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
   9957          case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
   9958          case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
   9959          case 5: nm = "fcmle"; opCmp = opCmpLE; break;
   9960          default: vassert(0);
   9961       }
   9962       IRExpr* zero = mkV128(0x0000);
   9963       IRTemp res = newTempV128();
   9964       assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
   9965                        : binop(opCmp, getQReg128(nn), zero));
   9966       putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
   9967                                                              mkexpr(res))));
   9968 
   9969       DIP("%s %s, %s, #0.0\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
   9970       return True;
   9971    }
   9972 
   9973    if (opcode == BITS5(1,0,1,0,0)
   9974        || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
   9975       /* -------- 0,xx,10100: SQXTN -------- */
   9976       /* -------- 1,xx,10100: UQXTN -------- */
   9977       /* -------- 1,xx,10010: SQXTUN -------- */
   9978       if (size == X11) return False;
   9979       vassert(size < 3);
   9980       IROp  opN    = Iop_INVALID;
   9981       Bool  zWiden = True;
   9982       const HChar* nm = "??";
   9983       /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
   9984          opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
   9985       }
   9986       else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
   9987          opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
   9988       }
   9989       else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
   9990          opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
   9991       }
   9992       else vassert(0);
   9993       IRTemp src  = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
   9994                        size+1, getQReg128(nn));
   9995       IRTemp resN = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
   9996                        size, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
   9997       putQReg128(dd, mkexpr(resN));
   9998       /* This widens zero lanes to zero, and compares it against zero, so all
   9999          of the non-participating lanes make no contribution to the
   10000          Q flag state. */
   10001       IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
   10002                                               size, mkexpr(resN));
   10003       updateQCFLAGwithDifference(src, resW);
   10004       const HChar arrNarrow = "bhsd"[size];
   10005       const HChar arrWide   = "bhsd"[size+1];
   10006       DIP("%s %c%u, %c%u\n", nm, arrNarrow, dd, arrWide, nn);
   10007       return True;
   10008    }
   10009 
   10010    if (opcode == BITS5(1,0,1,1,0) && bitU == 1 && size == X01) {
   10011       /* -------- 1,01,10110 FCVTXN s_d -------- */
   10012       /* Using Irrm_NEAREST here isn't right.  The docs say "round to
   10013          odd" but I don't know what that really means. */
   10014       putQRegLO(dd,
   10015                 binop(Iop_F64toF32, mkU32(Irrm_NEAREST),
   10016                                     getQRegLO(nn, Ity_F64)));
   10017       putQRegLane(dd, 1, mkU32(0));
   10018       putQRegLane(dd, 1, mkU64(0));
   10019       DIP("fcvtxn s%u, d%u\n", dd, nn);
   10020       return True;
   10021    }
   10022 
   10023    ix = 0; /*INVALID*/
   10024    switch (opcode) {
   10025       case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
   10026       case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
   10027       case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
   10028       default: break;
   10029    }
   10030    if (ix > 0) {
   10031       /* -------- 0,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
   10032       /* -------- 0,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
   10033       /* -------- 0,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
   10034       /* -------- 0,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
   10035       /* -------- 0,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
   10036       /* -------- 1,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
   10037       /* -------- 1,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
   10038       /* -------- 1,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
   10039       /* -------- 1,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
   10040       /* -------- 1,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
   10041       Bool           isD  = (size & 1) == 1;
   10042       IRType         tyF  = isD ? Ity_F64 : Ity_F32;
   10043       IRType         tyI  = isD ? Ity_I64 : Ity_I32;
   10044       IRRoundingMode irrm = 8; /*impossible*/
   10045       HChar          ch   = '?';
   10046       switch (ix) {
   10047          case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
   10048          case 2: ch = 'm'; irrm = Irrm_NegINF;  break;
   10049          case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
   10050          case 4: ch = 'p'; irrm = Irrm_PosINF;  break;
   10051          case 5: ch = 'z'; irrm = Irrm_ZERO;    break;
   10052          default: vassert(0);
   10053       }
   10054       IROp cvt = Iop_INVALID;
   10055       if (bitU == 1) {
   10056          cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
   10057       } else {
   10058          cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
   10059       }
   10060       IRTemp src = newTemp(tyF);
   10061       IRTemp res = newTemp(tyI);
   10062       assign(src, getQRegLane(nn, 0, tyF));
   10063       assign(res, binop(cvt, mkU32(irrm), mkexpr(src)));
   10064       putQRegLane(dd, 0, mkexpr(res)); /* bits 31-0 or 63-0 */
   10065       if (!isD) {
   10066          putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
   10067       }
   10068       putQRegLane(dd, 1, mkU64(0));    /* bits 127-64 */
   10069       HChar sOrD = isD ? 'd' : 's';
   10070       DIP("fcvt%c%c %c%u, %c%u\n", ch, bitU == 1 ? 'u' : 's',
   10071           sOrD, dd, sOrD, nn);
   10072       return True;
   10073    }
   10074 
   10075    if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
   10076       /* -------- 0,0x,11101: SCVTF d_d, s_s -------- */
   10077       /* -------- 1,0x,11101: UCVTF d_d, s_s -------- */
   10078       Bool   isU = bitU == 1;
   10079       Bool   isD = (size & 1) == 1;
   10080       IRType tyI = isD ? Ity_I64 : Ity_I32;
   10081       IROp   iop = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
   10082                        : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
   10083       IRTemp rm  = mk_get_IR_rounding_mode();
   10084       putQRegLO(dd, binop(iop, mkexpr(rm), getQRegLO(nn, tyI)));
   10085       if (!isD) {
   10086          putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
   10087       }
   10088       putQRegLane(dd, 1, mkU64(0));    /* bits 127-64 */
   10089       HChar c = isD ? 'd' : 's';
   10090       DIP("%ccvtf %c%u, %c%u\n", isU ? 'u' : 's', c, dd, c, nn);
   10091       return True;
   10092    }
   10093 
   10094    if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
   10095       /* -------- 0,1x,11101: FRECPE  d_d, s_s -------- */
   10096       /* -------- 1,1x,11101: FRSQRTE d_d, s_s -------- */
   10097       Bool isSQRT = bitU == 1;
   10098       Bool isD    = (size & 1) == 1;
   10099       IROp op     = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
   10100                            : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
   10101       IRTemp resV = newTempV128();
   10102       assign(resV, unop(op, getQReg128(nn)));
   10103       putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
   10104                                                              mkexpr(resV))));
   10105       HChar c = isD ? 'd' : 's';
   10106       DIP("%s %c%u, %c%u\n", isSQRT ? "frsqrte" : "frecpe", c, dd, c, nn);
   10107       return True;
   10108    }
   10109 
   10110    if (bitU == 0 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
   10111       /* -------- 0,1x,11111: FRECPX  d_d, s_s -------- */
   10112       Bool   isD = (size & 1) == 1;
   10113       IRType ty  = isD ? Ity_F64 : Ity_F32;
   10114       IROp   op  = isD ? Iop_RecpExpF64 : Iop_RecpExpF32;
   10115       IRTemp res = newTemp(ty);
   10116       IRTemp rm  = mk_get_IR_rounding_mode();
   10117       assign(res, binop(op, mkexpr(rm), getQRegLane(nn, 0, ty)));
   10118       putQReg128(dd, mkV128(0x0000));
   10119       putQRegLane(dd, 0, mkexpr(res));
   10120       HChar c = isD ? 'd' : 's';
   10121       DIP("%s %c%u, %c%u\n", "frecpx", c, dd, c, nn);
   10122       return True;
   10123    }
   10124 
   10125    return False;
   10126 #  undef INSN
   10127 }
   10128 
   10129 
   10130 static
   10131 Bool dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult* dres, UInt insn)
   10132 {
   10133    /* 31   28    23   21 20 19 15     11   9 4
   10134       01 U 11111 size L  M  m  opcode H  0 n d
   10135       Decode fields are: u,size,opcode
   10136       M is really part of the mm register number.  Individual
   10137       cases need to inspect L and H though.
   10138    */
   10139 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   10140    if (INSN(31,30) != BITS2(0,1)
   10141        || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) !=0) {
   10142       return False;
   10143    }
   10144    UInt bitU   = INSN(29,29);
   10145    UInt size   = INSN(23,22);
   10146    UInt bitL   = INSN(21,21);
   10147    UInt bitM   = INSN(20,20);
   10148    UInt mmLO4  = INSN(19,16);
   10149    UInt opcode = INSN(15,12);
   10150    UInt bitH   = INSN(11,11);
   10151    UInt nn     = INSN(9,5);
   10152    UInt dd     = INSN(4,0);
   10153    vassert(size < 4);
   10154    vassert(bitH < 2 && bitM < 2 && bitL < 2);
   10155 
   10156    if (bitU == 0 && size >= X10
   10157        && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
   10158       /* -------- 0,1x,0001 FMLA d_d_d[], s_s_s[] -------- */
   10159       /* -------- 0,1x,0101 FMLS d_d_d[], s_s_s[] -------- */
   10160       Bool isD   = (size & 1) == 1;
   10161       Bool isSUB = opcode == BITS4(0,1,0,1);
   10162       UInt index;
   10163       if      (!isD)             index = (bitH << 1) | bitL;
   10164       else if (isD && bitL == 0) index = bitH;
   10165       else return False; // sz:L == x11 => unallocated encoding
   10166       vassert(index < (isD ? 2 : 4));
   10167       IRType ity   = isD ? Ity_F64 : Ity_F32;
   10168       IRTemp elem  = newTemp(ity);
   10169       UInt   mm    = (bitM << 4) | mmLO4;
   10170       assign(elem, getQRegLane(mm, index, ity));
   10171       IRTemp dupd  = math_DUP_TO_V128(elem, ity);
   10172       IROp   opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
   10173       IROp   opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
   10174       IROp   opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
   10175       IRTemp rm    = mk_get_IR_rounding_mode();
   10176       IRTemp t1    = newTempV128();
   10177       IRTemp t2    = newTempV128();
   10178       // FIXME: double rounding; use FMA primops instead
   10179       assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
   10180       assign(t2, triop(isSUB ? opSUB : opADD,
   10181                        mkexpr(rm), getQReg128(dd), mkexpr(t1)));
   10182       putQReg128(dd,
   10183                  mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
   10184                                                          mkexpr(t2))));
   10185       const HChar c = isD ? 'd' : 's';
   10186       DIP("%s %c%u, %c%u, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
   10187           c, dd, c, nn, nameQReg128(mm), c, index);
   10188       return True;
   10189    }
   10190 
   10191    if (size >= X10 && opcode == BITS4(1,0,0,1)) {
   10192       /* -------- 0,1x,1001 FMUL  d_d_d[], s_s_s[] -------- */
   10193       /* -------- 1,1x,1001 FMULX d_d_d[], s_s_s[] -------- */
   10194       Bool isD    = (size & 1) == 1;
   10195       Bool isMULX = bitU == 1;
   10196       UInt index;
   10197       if      (!isD)             index = (bitH << 1) | bitL;
   10198       else if (isD && bitL == 0) index = bitH;
   10199       else return False; // sz:L == x11 => unallocated encoding
   10200       vassert(index < (isD ? 2 : 4));
   10201       IRType ity   = isD ? Ity_F64 : Ity_F32;
   10202       IRTemp elem  = newTemp(ity);
   10203       UInt   mm    = (bitM << 4) | mmLO4;
   10204       assign(elem, getQRegLane(mm, index, ity));
   10205       IRTemp dupd  = math_DUP_TO_V128(elem, ity);
   10206       IROp   opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
   10207       IRTemp rm    = mk_get_IR_rounding_mode();
   10208       IRTemp t1    = newTempV128();
   10209       // KLUDGE: FMULX is treated the same way as FMUL.  That can't be right.
   10210       assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
   10211       putQReg128(dd,
   10212                  mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
   10213                                                          mkexpr(t1))));
   10214       const HChar c = isD ? 'd' : 's';
   10215       DIP("%s %c%u, %c%u, %s.%c[%u]\n", isMULX ? "fmulx" : "fmul",
   10216           c, dd, c, nn, nameQReg128(mm), c, index);
   10217       return True;
   10218    }
   10219 
   10220    if (bitU == 0
   10221        && (opcode == BITS4(1,0,1,1)
   10222            || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
   10223       /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
   10224       /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
   10225       /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
   10226       /* Widens, and size refers to the narrowed lanes. */
   10227       UInt ks = 3;
   10228       switch (opcode) {
   10229          case BITS4(1,0,1,1): ks = 0; break;
   10230          case BITS4(0,0,1,1): ks = 1; break;
   10231          case BITS4(0,1,1,1): ks = 2; break;
   10232          default: vassert(0);
   10233       }
   10234       vassert(ks >= 0 && ks <= 2);
   10235       UInt mm  = 32; // invalid
   10236       UInt ix  = 16; // invalid
   10237       switch (size) {
   10238          case X00:
   10239             return False; // h_b_b[] case is not allowed
   10240          case X01:
   10241             mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
   10242          case X10:
   10243             mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
   10244          case X11:
   10245             return False; // q_d_d[] case is not allowed
   10246          default:
   10247             vassert(0);
   10248       }
   10249       vassert(mm < 32 && ix < 16);
   10250       IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
   10251       vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
   10252       newTempsV128_2(&vecN, &vecD);
   10253       assign(vecN, getQReg128(nn));
   10254       IRTemp vecM  = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
   10255       assign(vecD, getQReg128(dd));
   10256       math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
   10257                        False/*!is2*/, size, "mas"[ks],
   10258                        vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
   10259       IROp opZHI = mkVecZEROHIxxOFV128(size+1);
   10260       putQReg128(dd, unop(opZHI, mkexpr(res)));
   10261       vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
   10262       updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
   10263       if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
   10264          updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
   10265       }
   10266       const HChar* nm        = ks == 0 ? "sqmull"
   10267                                        : (ks == 1 ? "sqdmlal" : "sqdmlsl");
   10268       const HChar  arrNarrow = "bhsd"[size];
   10269       const HChar  arrWide   = "bhsd"[size+1];
   10270       DIP("%s %c%d, %c%d, v%d.%c[%u]\n",
   10271           nm, arrWide, dd, arrNarrow, nn, dd, arrNarrow, ix);
   10272       return True;
   10273    }
   10274 
   10275    if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
   10276       /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
   10277       /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
   10278       UInt mm  = 32; // invalid
   10279       UInt ix  = 16; // invalid
   10280       switch (size) {
   10281          case X00:
   10282             return False; // b case is not allowed
   10283          case X01:
   10284             mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
   10285          case X10:
   10286             mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
   10287          case X11:
   10288             return False; // q case is not allowed
   10289          default:
   10290             vassert(0);
   10291       }
   10292       vassert(mm < 32 && ix < 16);
   10293       Bool isR = opcode == BITS4(1,1,0,1);
   10294       IRTemp res, sat1q, sat1n, vN, vM;
   10295       res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
   10296       vN = newTempV128();
   10297       assign(vN, getQReg128(nn));
   10298       vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
   10299       math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
   10300       IROp opZHI = mkVecZEROHIxxOFV128(size);
   10301       putQReg128(dd, unop(opZHI, mkexpr(res)));
   10302       updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
   10303       const HChar* nm  = isR ? "sqrdmulh" : "sqdmulh";
   10304       HChar ch         = size == X01 ? 'h' : 's';
   10305       DIP("%s %c%d, %c%d, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, dd, ix);
   10306       return True;
   10307    }
   10308 
   10309    return False;
   10310 #  undef INSN
   10311 }
   10312 
   10313 
   10314 static
   10315 Bool dis_AdvSIMD_shift_by_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
   10316 {
   10317    /* 31    28     22   18   15     10 9 4
   10318       0 q u 011110 immh immb opcode 1  n d
   10319       Decode fields: u,opcode
   10320    */
   10321 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   10322    if (INSN(31,31) != 0
   10323        || INSN(28,23) != BITS6(0,1,1,1,1,0) || INSN(10,10) != 1) {
   10324       return False;
   10325    }
   10326    UInt bitQ   = INSN(30,30);
   10327    UInt bitU   = INSN(29,29);
   10328    UInt immh   = INSN(22,19);
   10329    UInt immb   = INSN(18,16);
   10330    UInt opcode = INSN(15,11);
   10331    UInt nn     = INSN(9,5);
   10332    UInt dd     = INSN(4,0);
   10333 
   10334    if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0)) {
   10335       /* -------- 0,00000 SSHR std7_std7_#imm -------- */
   10336       /* -------- 1,00000 USHR std7_std7_#imm -------- */
   10337       /* -------- 0,00010 SSRA std7_std7_#imm -------- */
   10338       /* -------- 1,00010 USRA std7_std7_#imm -------- */
   10339       /* laneTy, shift = case immh:immb of
   10340                          0001:xxx -> B, SHR:8-xxx
   10341                          001x:xxx -> H, SHR:16-xxxx
   10342                          01xx:xxx -> S, SHR:32-xxxxx
   10343                          1xxx:xxx -> D, SHR:64-xxxxxx
   10344                          other    -> invalid
   10345       */
   10346       UInt size  = 0;
   10347       UInt shift = 0;
   10348       Bool isQ   = bitQ == 1;
   10349       Bool isU   = bitU == 1;
   10350       Bool isAcc = opcode == BITS5(0,0,0,1,0);
   10351       Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
   10352       if (!ok || (bitQ == 0 && size == X11)) return False;
   10353       vassert(size >= 0 && size <= 3);
   10354       UInt lanebits = 8 << size;
   10355       vassert(shift >= 1 && shift <= lanebits);
   10356       IROp    op  = isU ? mkVecSHRN(size) : mkVecSARN(size);
   10357       IRExpr* src = getQReg128(nn);
   10358       IRTemp  shf = newTempV128();
   10359       IRTemp  res = newTempV128();
   10360       if (shift == lanebits && isU) {
   10361          assign(shf, mkV128(0x0000));
   10362       } else {
   10363          UInt nudge = 0;
   10364          if (shift == lanebits) {
   10365             vassert(!isU);
   10366             nudge = 1;
   10367          }
   10368          assign(shf, binop(op, src, mkU8(shift - nudge)));
   10369       }
   10370       assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
   10371                         : mkexpr(shf));
   10372       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   10373       HChar laneCh = "bhsd"[size];
   10374       UInt  nLanes = (isQ ? 128 : 64) / lanebits;
   10375       const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
   10376                               : (isU ? "ushr" : "sshr");
   10377       DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
   10378           nameQReg128(dd), nLanes, laneCh,
   10379           nameQReg128(nn), nLanes, laneCh, shift);
   10380       return True;
   10381    }
   10382 
   10383    if (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0)) {
   10384       /* -------- 0,00100 SRSHR std7_std7_#imm -------- */
   10385       /* -------- 1,00100 URSHR std7_std7_#imm -------- */
   10386       /* -------- 0,00110 SRSRA std7_std7_#imm -------- */
   10387       /* -------- 1,00110 URSRA std7_std7_#imm -------- */
   10388       /* laneTy, shift = case immh:immb of
   10389                          0001:xxx -> B, SHR:8-xxx
   10390                          001x:xxx -> H, SHR:16-xxxx
   10391                          01xx:xxx -> S, SHR:32-xxxxx
   10392                          1xxx:xxx -> D, SHR:64-xxxxxx
   10393                          other    -> invalid
   10394       */
   10395       UInt size  = 0;
   10396       UInt shift = 0;
   10397       Bool isQ   = bitQ == 1;
   10398       Bool isU   = bitU == 1;
   10399       Bool isAcc = opcode == BITS5(0,0,1,1,0);
   10400       Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
   10401       if (!ok || (bitQ == 0 && size == X11)) return False;
   10402       vassert(size >= 0 && size <= 3);
   10403       UInt lanebits = 8 << size;
   10404       vassert(shift >= 1 && shift <= lanebits);
   10405       IROp    op   = isU ? mkVecRSHU(size) : mkVecRSHS(size);
   10406       IRExpr* src  = getQReg128(nn);
   10407       IRTemp  imm8 = newTemp(Ity_I8);
   10408       assign(imm8, mkU8((UChar)(-shift)));
   10409       IRExpr* amt  = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
   10410       IRTemp  shf  = newTempV128();
   10411       IRTemp  res  = newTempV128();
   10412       assign(shf, binop(op, src, amt));
   10413       assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
   10414                         : mkexpr(shf));
   10415       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   10416       HChar laneCh = "bhsd"[size];
   10417       UInt  nLanes = (isQ ? 128 : 64) / lanebits;
   10418       const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
   10419                               : (isU ? "urshr" : "srshr");
   10420       DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
   10421           nameQReg128(dd), nLanes, laneCh,
   10422           nameQReg128(nn), nLanes, laneCh, shift);
   10423       return True;
   10424    }
   10425 
   10426    if (bitU == 1 && opcode == BITS5(0,1,0,0,0)) {
   10427       /* -------- 1,01000 SRI std7_std7_#imm -------- */
   10428       /* laneTy, shift = case immh:immb of
   10429                          0001:xxx -> B, SHR:8-xxx
   10430                          001x:xxx -> H, SHR:16-xxxx
   10431                          01xx:xxx -> S, SHR:32-xxxxx
   10432                          1xxx:xxx -> D, SHR:64-xxxxxx
   10433                          other    -> invalid
   10434       */
   10435       UInt size  = 0;
   10436       UInt shift = 0;
   10437       Bool isQ   = bitQ == 1;
   10438       Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
   10439       if (!ok || (bitQ == 0 && size == X11)) return False;
   10440       vassert(size >= 0 && size <= 3);
   10441       UInt lanebits = 8 << size;
   10442       vassert(shift >= 1 && shift <= lanebits);
   10443       IRExpr* src = getQReg128(nn);
   10444       IRTemp  res = newTempV128();
   10445       if (shift == lanebits) {
   10446          assign(res, getQReg128(dd));
   10447       } else {
   10448          assign(res, binop(mkVecSHRN(size), src, mkU8(shift)));
   10449          IRExpr* nmask = binop(mkVecSHLN(size),
   10450                                mkV128(0xFFFF), mkU8(lanebits - shift));
   10451          IRTemp  tmp   = newTempV128();
   10452          assign(tmp, binop(Iop_OrV128,
   10453                            mkexpr(res),
   10454                            binop(Iop_AndV128, getQReg128(dd), nmask)));
   10455          res = tmp;
   10456       }
   10457       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   10458       HChar laneCh = "bhsd"[size];
   10459       UInt  nLanes = (isQ ? 128 : 64) / lanebits;
   10460       DIP("%s %s.%u%c, %s.%u%c, #%u\n", "sri",
   10461           nameQReg128(dd), nLanes, laneCh,
   10462           nameQReg128(nn), nLanes, laneCh, shift);
   10463       return True;
   10464    }
   10465 
   10466    if (opcode == BITS5(0,1,0,1,0)) {
   10467       /* -------- 0,01010 SHL std7_std7_#imm -------- */
   10468       /* -------- 1,01010 SLI std7_std7_#imm -------- */
   10469       /* laneTy, shift = case immh:immb of
   10470                          0001:xxx -> B, xxx
   10471                          001x:xxx -> H, xxxx
   10472                          01xx:xxx -> S, xxxxx
   10473                          1xxx:xxx -> D, xxxxxx
   10474                          other    -> invalid
   10475       */
   10476       UInt size  = 0;
   10477       UInt shift = 0;
   10478       Bool isSLI = bitU == 1;
   10479       Bool isQ   = bitQ == 1;
   10480       Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
   10481       if (!ok || (bitQ == 0 && size == X11)) return False;
   10482       vassert(size >= 0 && size <= 3);
   10483       /* The shift encoding has opposite sign for the leftwards case.
   10484          Adjust shift to compensate. */
   10485       UInt lanebits = 8 << size;
   10486       shift = lanebits - shift;
   10487       vassert(shift >= 0 && shift < lanebits);
   10488       IROp    op  = mkVecSHLN(size);
   10489       IRExpr* src = getQReg128(nn);
   10490       IRTemp  res = newTempV128();
   10491       if (shift == 0) {
   10492          assign(res, src);
   10493       } else {
   10494          assign(res, binop(op, src, mkU8(shift)));
   10495          if (isSLI) {
   10496             IRExpr* nmask = binop(mkVecSHRN(size),
   10497                                   mkV128(0xFFFF), mkU8(lanebits - shift));
   10498             IRTemp  tmp   = newTempV128();
   10499             assign(tmp, binop(Iop_OrV128,
   10500                               mkexpr(res),
   10501                               binop(Iop_AndV128, getQReg128(dd), nmask)));
   10502             res = tmp;
   10503          }
   10504       }
   10505       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   10506       HChar laneCh = "bhsd"[size];
   10507       UInt  nLanes = (isQ ? 128 : 64) / lanebits;
   10508       const HChar* nm = isSLI ? "sli" : "shl";
   10509       DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
   10510           nameQReg128(dd), nLanes, laneCh,
   10511           nameQReg128(nn), nLanes, laneCh, shift);
   10512       return True;
   10513    }
   10514 
   10515    if (opcode == BITS5(0,1,1,1,0)
   10516        || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
   10517       /* -------- 0,01110  SQSHL  std7_std7_#imm -------- */
   10518       /* -------- 1,01110  UQSHL  std7_std7_#imm -------- */
   10519       /* -------- 1,01100  SQSHLU std7_std7_#imm -------- */
   10520       UInt size  = 0;
   10521       UInt shift = 0;
   10522       Bool isQ   = bitQ == 1;
   10523       Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
   10524       if (!ok || (bitQ == 0 && size == X11)) return False;
   10525       vassert(size >= 0 && size <= 3);
   10526       /* The shift encoding has opposite sign for the leftwards case.
   10527          Adjust shift to compensate. */
   10528       UInt lanebits = 8 << size;
   10529       shift = lanebits - shift;
   10530       vassert(shift >= 0 && shift < lanebits);
   10531       const HChar* nm = NULL;
   10532       /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
   10533       else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
   10534       else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
   10535       else vassert(0);
   10536       IRTemp qDiff1 = IRTemp_INVALID;
   10537       IRTemp qDiff2 = IRTemp_INVALID;
   10538       IRTemp res = IRTemp_INVALID;
   10539       IRTemp src = newTempV128();
   10540       assign(src, getQReg128(nn));
   10541       math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
   10542       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   10543       updateQCFLAGwithDifferenceZHI(qDiff1, qDiff2,
   10544                                     isQ ? Iop_INVALID : Iop_ZeroHI64ofV128);
   10545       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   10546       DIP("%s %s.%s, %s.%s, #%u\n", nm,
   10547           nameQReg128(dd), arr, nameQReg128(nn), arr, shift);
   10548       return True;
   10549    }
   10550 
   10551    if (bitU == 0
   10552        && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) {
   10553       /* -------- 0,10000  SHRN{,2} #imm -------- */
   10554       /* -------- 0,10001 RSHRN{,2} #imm -------- */
   10555       /* Narrows, and size is the narrow size. */
   10556       UInt size  = 0;
   10557       UInt shift = 0;
   10558       Bool is2   = bitQ == 1;
   10559       Bool isR   = opcode == BITS5(1,0,0,0,1);
   10560       Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
   10561       if (!ok || size == X11) return False;
   10562       vassert(shift >= 1);
   10563       IRTemp t1 = newTempV128();
   10564       IRTemp t2 = newTempV128();
   10565       IRTemp t3 = newTempV128();
   10566       assign(t1, getQReg128(nn));
   10567       assign(t2, isR ? binop(mkVecADD(size+1),
   10568                              mkexpr(t1),
   10569                              mkexpr(math_VEC_DUP_IMM(size+1, 1ULL<<(shift-1))))
   10570                      : mkexpr(t1));
   10571       assign(t3, binop(mkVecSHRN(size+1), mkexpr(t2), mkU8(shift)));
   10572       IRTemp t4 = math_NARROW_LANES(t3, t3, size);
   10573       putLO64andZUorPutHI64(is2, dd, t4);
   10574       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
   10575       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
   10576       DIP("%s %s.%s, %s.%s, #%u\n", isR ? "rshrn" : "shrn",
   10577           nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
   10578       return True;
   10579    }
   10580 
   10581    if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
   10582        || (bitU == 1
   10583            && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
   10584       /* -------- 0,10010   SQSHRN{,2} #imm -------- */
   10585       /* -------- 1,10010   UQSHRN{,2} #imm -------- */
   10586       /* -------- 0,10011  SQRSHRN{,2} #imm -------- */
   10587       /* -------- 1,10011  UQRSHRN{,2} #imm -------- */
   10588       /* -------- 1,10000  SQSHRUN{,2} #imm -------- */
   10589       /* -------- 1,10001 SQRSHRUN{,2} #imm -------- */
   10590       UInt size  = 0;
   10591       UInt shift = 0;
   10592       Bool is2   = bitQ == 1;
   10593       Bool ok    = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
   10594       if (!ok || size == X11) return False;
   10595       vassert(shift >= 1 && shift <= (8 << size));
   10596       const HChar* nm = "??";
   10597       IROp op = Iop_INVALID;
   10598       /* Decide on the name and the operation. */
   10599       /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
   10600          nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
   10601       }
   10602       else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
   10603          nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
   10604       }
   10605       else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
   10606          nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
   10607       }
   10608       else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
   10609          nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
   10610       }
   10611       else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
   10612          nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
   10613       }
   10614       else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
   10615          nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
   10616       }
   10617       else vassert(0);
   10618       /* Compute the result (Q, shifted value) pair. */
   10619       IRTemp src128 = newTempV128();
   10620       assign(src128, getQReg128(nn));
   10621       IRTemp pair = newTempV128();
   10622       assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
   10623       /* Update the result reg */
   10624       IRTemp res64in128 = newTempV128();
   10625       assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
   10626       putLO64andZUorPutHI64(is2, dd, res64in128);
   10627       /* Update the Q flag. */
   10628       IRTemp q64q64 = newTempV128();
   10629       assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
   10630       IRTemp z128 = newTempV128();
   10631       assign(z128, mkV128(0x0000));
   10632       updateQCFLAGwithDifference(q64q64, z128);
   10633       /* */
   10634       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
   10635       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
   10636       DIP("%s %s.%s, %s.%s, #%u\n", nm,
   10637           nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
   10638       return True;
   10639    }
   10640 
   10641    if (opcode == BITS5(1,0,1,0,0)) {
   10642       /* -------- 0,10100 SSHLL{,2} #imm -------- */
   10643       /* -------- 1,10100 USHLL{,2} #imm -------- */
   10644       /* 31  28     22   18   15     9 4
   10645          0q0 011110 immh immb 101001 n d  SSHLL Vd.Ta, Vn.Tb, #sh
   10646          0q1 011110 immh immb 101001 n d  USHLL Vd.Ta, Vn.Tb, #sh
   10647          where Ta,Tb,sh
   10648            = case immh of 1xxx -> invalid
   10649                           01xx -> 2d, 2s(q0)/4s(q1),  immh:immb - 32 (0..31)
   10650                           001x -> 4s, 4h(q0)/8h(q1),  immh:immb - 16 (0..15)
   10651                           0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8  (0..7)
   10652                           0000 -> AdvSIMD modified immediate (???)
   10653       */
   10654       Bool    isQ   = bitQ == 1;
   10655       Bool    isU   = bitU == 1;
   10656       UInt    immhb = (immh << 3) | immb;
   10657       IRTemp  src   = newTempV128();
   10658       IRTemp  zero  = newTempV128();
   10659       IRExpr* res   = NULL;
   10660       UInt    sh    = 0;
   10661       const HChar* ta = "??";
   10662       const HChar* tb = "??";
   10663       assign(src, getQReg128(nn));
   10664       assign(zero, mkV128(0x0000));
   10665       if (immh & 8) {
   10666          /* invalid; don't assign to res */
   10667       }
   10668       else if (immh & 4) {
   10669          sh = immhb - 32;
   10670          vassert(sh < 32); /* so 32-sh is 1..32 */
   10671          ta = "2d";
   10672          tb = isQ ? "4s" : "2s";
   10673          IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
   10674                            : mk_InterleaveLO32x4(src, zero);
   10675          res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
   10676       }
   10677       else if (immh & 2) {
   10678          sh = immhb - 16;
   10679          vassert(sh < 16); /* so 16-sh is 1..16 */
   10680          ta = "4s";
   10681          tb = isQ ? "8h" : "4h";
   10682          IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero)
   10683                            : mk_InterleaveLO16x8(src, zero);
   10684          res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
   10685       }
   10686       else if (immh & 1) {
   10687          sh = immhb - 8;
   10688          vassert(sh < 8); /* so 8-sh is 1..8 */
   10689          ta = "8h";
   10690          tb = isQ ? "16b" : "8b";
   10691          IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
   10692                            : mk_InterleaveLO8x16(src, zero);
   10693          res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
   10694       } else {
   10695          vassert(immh == 0);
   10696          /* invalid; don't assign to res */
   10697       }
   10698       /* */
   10699       if (res) {
   10700          putQReg128(dd, res);
   10701          DIP("%cshll%s %s.%s, %s.%s, #%d\n",
   10702              isU ? 'u' : 's', isQ ? "2" : "",
   10703              nameQReg128(dd), ta, nameQReg128(nn), tb, sh);
   10704          return True;
   10705       }
   10706       return False;
   10707    }
   10708 
   10709    if (opcode == BITS5(1,1,1,0,0)) {
   10710       /* -------- 0,11100 SCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
   10711       /* -------- 1,11100 UCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
   10712       /* If immh is of the form 00xx, the insn is invalid. */
   10713       if (immh < BITS4(0,1,0,0)) return False;
   10714       UInt size  = 0;
   10715       UInt fbits = 0;
   10716       Bool ok    = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
   10717       /* The following holds because immh is never zero. */
   10718       vassert(ok);
   10719       /* The following holds because immh >= 0100. */
   10720       vassert(size == X10 || size == X11);
   10721       Bool isD = size == X11;
   10722       Bool isU = bitU == 1;
   10723       Bool isQ = bitQ == 1;
   10724       if (isD && !isQ) return False; /* reject .1d case */
   10725       vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
   10726       Double  scale  = two_to_the_minus(fbits);
   10727       IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
   10728                            : IRExpr_Const(IRConst_F32( (Float)scale ));
   10729       IROp    opMUL  = isD ? Iop_MulF64 : Iop_MulF32;
   10730       IROp    opCVT  = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
   10731                            : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
   10732       IRType tyF = isD ? Ity_F64 : Ity_F32;
   10733       IRType tyI = isD ? Ity_I64 : Ity_I32;
   10734       UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
   10735       vassert(nLanes == 2 || nLanes == 4);
   10736       for (UInt i = 0; i < nLanes; i++) {
   10737          IRTemp src = newTemp(tyI);
   10738          IRTemp res = newTemp(tyF);
   10739          IRTemp rm  = mk_get_IR_rounding_mode();
   10740          assign(src, getQRegLane(nn, i, tyI));
   10741          assign(res, triop(opMUL, mkexpr(rm),
   10742                                   binop(opCVT, mkexpr(rm), mkexpr(src)),
   10743                                   scaleE));
   10744          putQRegLane(dd, i, mkexpr(res));
   10745       }
   10746       if (!isQ) {
   10747          putQRegLane(dd, 1, mkU64(0));
   10748       }
   10749       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   10750       DIP("%s %s.%s, %s.%s, #%u\n", isU ? "ucvtf" : "scvtf",
   10751           nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
   10752       return True;
   10753    }
   10754 
   10755    if (opcode == BITS5(1,1,1,1,1)) {
   10756       /* -------- 0,11111 FCVTZS {2d_2d,4s_4s,2s_2s}_imm -------- */
   10757       /* -------- 1,11111 FCVTZU {2d_2d,4s_4s,2s_2s}_imm -------- */
   10758       /* If immh is of the form 00xx, the insn is invalid. */
   10759       if (immh < BITS4(0,1,0,0)) return False;
   10760       UInt size  = 0;
   10761       UInt fbits = 0;
   10762       Bool ok    = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
   10763       /* The following holds because immh is never zero. */
   10764       vassert(ok);
   10765       /* The following holds because immh >= 0100. */
   10766       vassert(size == X10 || size == X11);
   10767       Bool isD = size == X11;
   10768       Bool isU = bitU == 1;
   10769       Bool isQ = bitQ == 1;
   10770       if (isD && !isQ) return False; /* reject .1d case */
   10771       vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
   10772       Double  scale  = two_to_the_plus(fbits);
   10773       IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
   10774                            : IRExpr_Const(IRConst_F32( (Float)scale ));
   10775       IROp    opMUL  = isD ? Iop_MulF64 : Iop_MulF32;
   10776       IROp    opCVT  = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
   10777                            : (isD ? Iop_F64toI64S : Iop_F32toI32S);
   10778       IRType tyF = isD ? Ity_F64 : Ity_F32;
   10779       IRType tyI = isD ? Ity_I64 : Ity_I32;
   10780       UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
   10781       vassert(nLanes == 2 || nLanes == 4);
   10782       for (UInt i = 0; i < nLanes; i++) {
   10783          IRTemp src = newTemp(tyF);
   10784          IRTemp res = newTemp(tyI);
   10785          IRTemp rm  = newTemp(Ity_I32);
   10786          assign(src, getQRegLane(nn, i, tyF));
   10787          assign(rm,  mkU32(Irrm_ZERO));
   10788          assign(res, binop(opCVT, mkexpr(rm),
   10789                                   triop(opMUL, mkexpr(rm),
   10790                                                mkexpr(src), scaleE)));
   10791          putQRegLane(dd, i, mkexpr(res));
   10792       }
   10793       if (!isQ) {
   10794          putQRegLane(dd, 1, mkU64(0));
   10795       }
   10796       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   10797       DIP("%s %s.%s, %s.%s, #%u\n", isU ? "fcvtzu" : "fcvtzs",
   10798           nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
   10799       return True;
   10800    }
   10801 
   10802 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   10803    return False;
   10804 #  undef INSN
   10805 }
   10806 
   10807 
   10808 static
   10809 Bool dis_AdvSIMD_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
   10810 {
   10811    /* 31 30 29 28    23   21 20 15     11 9 4
   10812       0  Q  U  01110 size 1  m  opcode 00 n d
   10813       Decode fields: u,opcode
   10814    */
   10815 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   10816    if (INSN(31,31) != 0
   10817        || INSN(28,24) != BITS5(0,1,1,1,0)
   10818        || INSN(21,21) != 1
   10819        || INSN(11,10) != BITS2(0,0)) {
   10820       return False;
   10821    }
   10822    UInt bitQ   = INSN(30,30);
   10823    UInt bitU   = INSN(29,29);
   10824    UInt size   = INSN(23,22);
   10825    UInt mm     = INSN(20,16);
   10826    UInt opcode = INSN(15,12);
   10827    UInt nn     = INSN(9,5);
   10828    UInt dd     = INSN(4,0);
   10829    vassert(size < 4);
   10830    Bool is2    = bitQ == 1;
   10831 
   10832    if (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,0,1,0)) {
   10833       /* -------- 0,0000 SADDL{2} -------- */
   10834       /* -------- 1,0000 UADDL{2} -------- */
   10835       /* -------- 0,0010 SSUBL{2} -------- */
   10836       /* -------- 1,0010 USUBL{2} -------- */
   10837       /* Widens, and size refers to the narrowed lanes. */
   10838       if (size == X11) return False;
   10839       vassert(size <= 2);
   10840       Bool   isU   = bitU == 1;
   10841       Bool   isADD = opcode == BITS4(0,0,0,0);
   10842       IRTemp argL  = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
   10843       IRTemp argR  = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
   10844       IRTemp res   = newTempV128();
   10845       assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
   10846                         mkexpr(argL), mkexpr(argR)));
   10847       putQReg128(dd, mkexpr(res));
   10848       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
   10849       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
   10850       const HChar* nm        = isADD ? (isU ? "uaddl" : "saddl")
   10851                                      : (isU ? "usubl" : "ssubl");
   10852       DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
   10853           nameQReg128(dd), arrWide,
   10854           nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
   10855       return True;
   10856    }
   10857 
   10858    if (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,0,1,1)) {
   10859       /* -------- 0,0001 SADDW{2} -------- */
   10860       /* -------- 1,0001 UADDW{2} -------- */
   10861       /* -------- 0,0011 SSUBW{2} -------- */
   10862       /* -------- 1,0011 USUBW{2} -------- */
   10863       /* Widens, and size refers to the narrowed lanes. */
   10864       if (size == X11) return False;
   10865       vassert(size <= 2);
   10866       Bool   isU   = bitU == 1;
   10867       Bool   isADD = opcode == BITS4(0,0,0,1);
   10868       IRTemp argR  = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
   10869       IRTemp res   = newTempV128();
   10870       assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
   10871                         getQReg128(nn), mkexpr(argR)));
   10872       putQReg128(dd, mkexpr(res));
   10873       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
   10874       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
   10875       const HChar* nm        = isADD ? (isU ? "uaddw" : "saddw")
   10876                                      : (isU ? "usubw" : "ssubw");
   10877       DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
   10878           nameQReg128(dd), arrWide,
   10879           nameQReg128(nn), arrWide, nameQReg128(mm), arrNarrow);
   10880       return True;
   10881    }
   10882 
   10883    if (opcode == BITS4(0,1,0,0) || opcode == BITS4(0,1,1,0)) {
   10884       /* -------- 0,0100  ADDHN{2} -------- */
   10885       /* -------- 1,0100 RADDHN{2} -------- */
   10886       /* -------- 0,0110  SUBHN{2} -------- */
   10887       /* -------- 1,0110 RSUBHN{2} -------- */
   10888       /* Narrows, and size refers to the narrowed lanes. */
   10889       if (size == X11) return False;
   10890       vassert(size <= 2);
   10891       const UInt shift[3] = { 8, 16, 32 };
   10892       Bool isADD = opcode == BITS4(0,1,0,0);
   10893       Bool isR   = bitU == 1;
   10894       /* Combined elements in wide lanes */
   10895       IRTemp  wide  = newTempV128();
   10896       IRExpr* wideE = binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
   10897                             getQReg128(nn), getQReg128(mm));
   10898       if (isR) {
   10899          wideE = binop(mkVecADD(size+1),
   10900                        wideE,
   10901                        mkexpr(math_VEC_DUP_IMM(size+1,
   10902                                                1ULL << (shift[size]-1))));
   10903       }
   10904       assign(wide, wideE);
   10905       /* Top halves of elements, still in wide lanes */
   10906       IRTemp shrd = newTempV128();
   10907       assign(shrd, binop(mkVecSHRN(size+1), mkexpr(wide), mkU8(shift[size])));
   10908       /* Elements now compacted into lower 64 bits */
   10909       IRTemp new64 = newTempV128();
   10910       assign(new64, binop(mkVecCATEVENLANES(size), mkexpr(shrd), mkexpr(shrd)));
   10911       putLO64andZUorPutHI64(is2, dd, new64);
   10912       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
   10913       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
   10914       const HChar* nm = isADD ? (isR ? "raddhn" : "addhn")
   10915                               : (isR ? "rsubhn" : "subhn");
   10916       DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
   10917           nameQReg128(dd), arrNarrow,
   10918           nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
   10919       return True;
   10920    }
   10921 
   10922    if (opcode == BITS4(0,1,0,1) || opcode == BITS4(0,1,1,1)) {
   10923       /* -------- 0,0101 SABAL{2} -------- */
   10924       /* -------- 1,0101 UABAL{2} -------- */
   10925       /* -------- 0,0111 SABDL{2} -------- */
   10926       /* -------- 1,0111 UABDL{2} -------- */
   10927       /* Widens, and size refers to the narrowed lanes. */
   10928       if (size == X11) return False;
   10929       vassert(size <= 2);
   10930       Bool   isU   = bitU == 1;
   10931       Bool   isACC = opcode == BITS4(0,1,0,1);
   10932       IRTemp argL  = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
   10933       IRTemp argR  = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
   10934       IRTemp abd   = math_ABD(isU, size+1, mkexpr(argL), mkexpr(argR));
   10935       IRTemp res   = newTempV128();
   10936       assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(abd), getQReg128(dd))
   10937                         : mkexpr(abd));
   10938       putQReg128(dd, mkexpr(res));
   10939       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
   10940       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
   10941       const HChar* nm        = isACC ? (isU ? "uabal" : "sabal")
   10942                                      : (isU ? "uabdl" : "sabdl");
   10943       DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
   10944           nameQReg128(dd), arrWide,
   10945           nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
   10946       return True;
   10947    }
   10948 
   10949    if (opcode == BITS4(1,1,0,0)
   10950        || opcode == BITS4(1,0,0,0) || opcode == BITS4(1,0,1,0)) {
   10951       /* -------- 0,1100  SMULL{2} -------- */ // 0 (ks)
   10952       /* -------- 1,1100  UMULL{2} -------- */ // 0
   10953       /* -------- 0,1000  SMLAL{2} -------- */ // 1
   10954       /* -------- 1,1000  UMLAL{2} -------- */ // 1
   10955       /* -------- 0,1010  SMLSL{2} -------- */ // 2
   10956       /* -------- 1,1010  UMLSL{2} -------- */ // 2
   10957       /* Widens, and size refers to the narrowed lanes. */
   10958       UInt ks = 3;
   10959       switch (opcode) {
   10960          case BITS4(1,1,0,0): ks = 0; break;
   10961          case BITS4(1,0,0,0): ks = 1; break;
   10962          case BITS4(1,0,1,0): ks = 2; break;
   10963          default: vassert(0);
   10964       }
   10965       vassert(ks >= 0 && ks <= 2);
   10966       if (size == X11) return False;
   10967       vassert(size <= 2);
   10968       Bool   isU  = bitU == 1;
   10969       IRTemp vecN = newTempV128();
   10970       IRTemp vecM = newTempV128();
   10971       IRTemp vecD = newTempV128();
   10972       assign(vecN, getQReg128(nn));
   10973       assign(vecM, getQReg128(mm));
   10974       assign(vecD, getQReg128(dd));
   10975       IRTemp res = IRTemp_INVALID;
   10976       math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
   10977                     vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
   10978       putQReg128(dd, mkexpr(res));
   10979       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
   10980       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
   10981       const HChar* nm        = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
   10982       DIP("%c%s%s %s.%s, %s.%s, %s.%s\n", isU ? 'u' : 's', nm, is2 ? "2" : "",
   10983           nameQReg128(dd), arrWide,
   10984           nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
   10985       return True;
   10986    }
   10987 
   10988    if (bitU == 0
   10989        && (opcode == BITS4(1,1,0,1)
   10990            || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
   10991       /* -------- 0,1101  SQDMULL{2} -------- */ // 0 (ks)
   10992       /* -------- 0,1001  SQDMLAL{2} -------- */ // 1
   10993       /* -------- 0,1011  SQDMLSL{2} -------- */ // 2
   10994       /* Widens, and size refers to the narrowed lanes. */
   10995       UInt ks = 3;
   10996       switch (opcode) {
   10997          case BITS4(1,1,0,1): ks = 0; break;
   10998          case BITS4(1,0,0,1): ks = 1; break;
   10999          case BITS4(1,0,1,1): ks = 2; break;
   11000          default: vassert(0);
   11001       }
   11002       vassert(ks >= 0 && ks <= 2);
   11003       if (size == X00 || size == X11) return False;
   11004       vassert(size <= 2);
   11005       IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
   11006       vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
   11007       newTempsV128_3(&vecN, &vecM, &vecD);
   11008       assign(vecN, getQReg128(nn));
   11009       assign(vecM, getQReg128(mm));
   11010       assign(vecD, getQReg128(dd));
   11011       math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
   11012                        is2, size, "mas"[ks],
   11013                        vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
   11014       putQReg128(dd, mkexpr(res));
   11015       vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
   11016       updateQCFLAGwithDifference(sat1q, sat1n);
   11017       if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
   11018          updateQCFLAGwithDifference(sat2q, sat2n);
   11019       }
   11020       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
   11021       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
   11022       const HChar* nm        = ks == 0 ? "sqdmull"
   11023                                        : (ks == 1 ? "sqdmlal" : "sqdmlsl");
   11024       DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
   11025           nameQReg128(dd), arrWide,
   11026           nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
   11027       return True;
   11028    }
   11029 
   11030    if (bitU == 0 && opcode == BITS4(1,1,1,0)) {
   11031       /* -------- 0,1110  PMULL{2} -------- */
   11032       /* Widens, and size refers to the narrowed lanes. */
   11033       if (size != X00) return False;
   11034       IRTemp res
   11035          = math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8,
   11036                                      getQReg128(nn), getQReg128(mm));
   11037       putQReg128(dd, mkexpr(res));
   11038       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
   11039       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
   11040       DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2 ? "2" : "",
   11041           nameQReg128(dd), arrNarrow,
   11042           nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
   11043       return True;
   11044    }
   11045 
   11046    return False;
   11047 #  undef INSN
   11048 }
   11049 
   11050 
   11051 static
   11052 Bool dis_AdvSIMD_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
   11053 {
   11054    /* 31 30 29 28    23   21 20 15     10 9 4
   11055       0  Q  U  01110 size 1  m  opcode 1  n d
   11056       Decode fields: u,size,opcode
   11057    */
   11058 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   11059    if (INSN(31,31) != 0
   11060        || INSN(28,24) != BITS5(0,1,1,1,0)
   11061        || INSN(21,21) != 1
   11062        || INSN(10,10) != 1) {
   11063       return False;
   11064    }
   11065    UInt bitQ   = INSN(30,30);
   11066    UInt bitU   = INSN(29,29);
   11067    UInt size   = INSN(23,22);
   11068    UInt mm     = INSN(20,16);
   11069    UInt opcode = INSN(15,11);
   11070    UInt nn     = INSN(9,5);
   11071    UInt dd     = INSN(4,0);
   11072    vassert(size < 4);
   11073 
   11074    if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,1,0,0)) {
   11075       /* -------- 0,xx,00000 SHADD std6_std6_std6 -------- */
   11076       /* -------- 1,xx,00000 UHADD std6_std6_std6 -------- */
   11077       /* -------- 0,xx,00100 SHSUB std6_std6_std6 -------- */
   11078       /* -------- 1,xx,00100 UHSUB std6_std6_std6 -------- */
   11079       if (size == X11) return False;
   11080       Bool isADD = opcode == BITS5(0,0,0,0,0);
   11081       Bool isU   = bitU == 1;
   11082       /* Widen both args out, do the math, narrow to final result. */
   11083       IRTemp argL   = newTempV128();
   11084       IRTemp argLhi = IRTemp_INVALID;
   11085       IRTemp argLlo = IRTemp_INVALID;
   11086       IRTemp argR   = newTempV128();
   11087       IRTemp argRhi = IRTemp_INVALID;
   11088       IRTemp argRlo = IRTemp_INVALID;
   11089       IRTemp resHi  = newTempV128();
   11090       IRTemp resLo  = newTempV128();
   11091       IRTemp res    = IRTemp_INVALID;
   11092       assign(argL, getQReg128(nn));
   11093       argLlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argL));
   11094       argLhi = math_WIDEN_LO_OR_HI_LANES(isU, True,  size, mkexpr(argL));
   11095       assign(argR, getQReg128(mm));
   11096       argRlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argR));
   11097       argRhi = math_WIDEN_LO_OR_HI_LANES(isU, True,  size, mkexpr(argR));
   11098       IROp opADDSUB = isADD ? mkVecADD(size+1) : mkVecSUB(size+1);
   11099       IROp opSxR = isU ? mkVecSHRN(size+1) : mkVecSARN(size+1);
   11100       assign(resHi, binop(opSxR,
   11101                           binop(opADDSUB, mkexpr(argLhi), mkexpr(argRhi)),
   11102                           mkU8(1)));
   11103       assign(resLo, binop(opSxR,
   11104                           binop(opADDSUB, mkexpr(argLlo), mkexpr(argRlo)),
   11105                           mkU8(1)));
   11106       res = math_NARROW_LANES ( resHi, resLo, size );
   11107       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   11108       const HChar* nm  = isADD ? (isU ? "uhadd" : "shadd")
   11109                                : (isU ? "uhsub" : "shsub");
   11110       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   11111       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
   11112           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11113       return True;
   11114    }
   11115 
   11116    if (opcode == BITS5(0,0,0,1,0)) {
   11117       /* -------- 0,xx,00010 SRHADD std7_std7_std7 -------- */
   11118       /* -------- 1,xx,00010 URHADD std7_std7_std7 -------- */
   11119       if (bitQ == 0 && size == X11) return False; // implied 1d case
   11120       Bool   isU  = bitU == 1;
   11121       IRTemp argL = newTempV128();
   11122       IRTemp argR = newTempV128();
   11123       assign(argL, getQReg128(nn));
   11124       assign(argR, getQReg128(mm));
   11125       IRTemp res = math_RHADD(size, isU, argL, argR);
   11126       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   11127       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   11128       DIP("%s %s.%s, %s.%s, %s.%s\n", isU ? "urhadd" : "srhadd",
   11129           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11130       return True;
   11131    }
   11132 
   11133    if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
   11134       /* -------- 0,xx,00001 SQADD std7_std7_std7 -------- */
   11135       /* -------- 1,xx,00001 UQADD std7_std7_std7 -------- */
   11136       /* -------- 0,xx,00101 SQSUB std7_std7_std7 -------- */
   11137       /* -------- 1,xx,00101 UQSUB std7_std7_std7 -------- */
   11138       if (bitQ == 0 && size == X11) return False; // implied 1d case
   11139       Bool isADD = opcode == BITS5(0,0,0,0,1);
   11140       Bool isU   = bitU == 1;
   11141       IROp qop   = Iop_INVALID;
   11142       IROp nop   = Iop_INVALID;
   11143       if (isADD) {
   11144          qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
   11145          nop = mkVecADD(size);
   11146       } else {
   11147          qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
   11148          nop = mkVecSUB(size);
   11149       }
   11150       IRTemp argL = newTempV128();
   11151       IRTemp argR = newTempV128();
   11152       IRTemp qres = newTempV128();
   11153       IRTemp nres = newTempV128();
   11154       assign(argL, getQReg128(nn));
   11155       assign(argR, getQReg128(mm));
   11156       assign(qres, math_MAYBE_ZERO_HI64_fromE(
   11157                       bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
   11158       assign(nres, math_MAYBE_ZERO_HI64_fromE(
   11159                       bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
   11160       putQReg128(dd, mkexpr(qres));
   11161       updateQCFLAGwithDifference(qres, nres);
   11162       const HChar* nm  = isADD ? (isU ? "uqadd" : "sqadd")
   11163                                : (isU ? "uqsub" : "sqsub");
   11164       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   11165       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
   11166           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11167       return True;
   11168    }
   11169 
   11170    if (bitU == 0 && opcode == BITS5(0,0,0,1,1)) {
   11171       /* -------- 0,00,00011 AND 16b_16b_16b, 8b_8b_8b -------- */
   11172       /* -------- 0,01,00011 BIC 16b_16b_16b, 8b_8b_8b -------- */
   11173       /* -------- 0,10,00011 ORR 16b_16b_16b, 8b_8b_8b -------- */
   11174       /* -------- 0,10,00011 ORN 16b_16b_16b, 8b_8b_8b -------- */
   11175       Bool   isORx  = (size & 2) == 2;
   11176       Bool   invert = (size & 1) == 1;
   11177       IRTemp res    = newTempV128();
   11178       assign(res, binop(isORx ? Iop_OrV128 : Iop_AndV128,
   11179                         getQReg128(nn),
   11180                         invert ? unop(Iop_NotV128, getQReg128(mm))
   11181                                : getQReg128(mm)));
   11182       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   11183       const HChar* names[4] = { "and", "bic", "orr", "orn" };
   11184       const HChar* ar = bitQ == 1 ? "16b" : "8b";
   11185       DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)],
   11186           nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar);
   11187       return True;
   11188    }
   11189 
   11190    if (bitU == 1 && opcode == BITS5(0,0,0,1,1)) {
   11191       /* -------- 1,00,00011 EOR 16b_16b_16b, 8b_8b_8b -------- */
   11192       /* -------- 1,01,00011 BSL 16b_16b_16b, 8b_8b_8b -------- */
   11193       /* -------- 1,10,00011 BIT 16b_16b_16b, 8b_8b_8b -------- */
   11194       /* -------- 1,10,00011 BIF 16b_16b_16b, 8b_8b_8b -------- */
   11195       IRTemp argD = newTempV128();
   11196       IRTemp argN = newTempV128();
   11197       IRTemp argM = newTempV128();
   11198       assign(argD, getQReg128(dd));
   11199       assign(argN, getQReg128(nn));
   11200       assign(argM, getQReg128(mm));
   11201       const IROp opXOR = Iop_XorV128;
   11202       const IROp opAND = Iop_AndV128;
   11203       const IROp opNOT = Iop_NotV128;
   11204       IRTemp res = newTempV128();
   11205       switch (size) {
   11206          case BITS2(0,0): /* EOR */
   11207             assign(res, binop(opXOR, mkexpr(argM), mkexpr(argN)));
   11208             break;
   11209          case BITS2(0,1): /* BSL */
   11210             assign(res, binop(opXOR, mkexpr(argM),
   11211                               binop(opAND,
   11212                                     binop(opXOR, mkexpr(argM), mkexpr(argN)),
   11213                                           mkexpr(argD))));
   11214             break;
   11215          case BITS2(1,0): /* BIT */
   11216             assign(res, binop(opXOR, mkexpr(argD),
   11217                               binop(opAND,
   11218                                     binop(opXOR, mkexpr(argD), mkexpr(argN)),
   11219                                     mkexpr(argM))));
   11220             break;
   11221          case BITS2(1,1): /* BIF */
   11222             assign(res, binop(opXOR, mkexpr(argD),
   11223                               binop(opAND,
   11224                                     binop(opXOR, mkexpr(argD), mkexpr(argN)),
   11225                                     unop(opNOT, mkexpr(argM)))));
   11226             break;
   11227          default:
   11228             vassert(0);
   11229       }
   11230       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   11231       const HChar* nms[4] = { "eor", "bsl", "bit", "bif" };
   11232       const HChar* arr = bitQ == 1 ? "16b" : "8b";
   11233       DIP("%s %s.%s, %s.%s, %s.%s\n", nms[size],
   11234           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11235       return True;
   11236    }
   11237 
   11238    if (opcode == BITS5(0,0,1,1,0)) {
   11239       /* -------- 0,xx,00110 CMGT std7_std7_std7 -------- */ // >s
   11240       /* -------- 1,xx,00110 CMHI std7_std7_std7 -------- */ // >u
   11241       if (bitQ == 0 && size == X11) return False; // implied 1d case
   11242       Bool   isGT  = bitU == 0;
   11243       IRExpr* argL = getQReg128(nn);
   11244       IRExpr* argR = getQReg128(mm);
   11245       IRTemp  res  = newTempV128();
   11246       assign(res,
   11247              isGT ? binop(mkVecCMPGTS(size), argL, argR)
   11248                   : binop(mkVecCMPGTU(size), argL, argR));
   11249       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   11250       const HChar* nm  = isGT ? "cmgt" : "cmhi";
   11251       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   11252       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
   11253           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11254       return True;
   11255    }
   11256 
   11257    if (opcode == BITS5(0,0,1,1,1)) {
   11258       /* -------- 0,xx,00111 CMGE std7_std7_std7 -------- */ // >=s
   11259       /* -------- 1,xx,00111 CMHS std7_std7_std7 -------- */ // >=u
   11260       if (bitQ == 0 && size == X11) return False; // implied 1d case
   11261       Bool    isGE = bitU == 0;
   11262       IRExpr* argL = getQReg128(nn);
   11263       IRExpr* argR = getQReg128(mm);
   11264       IRTemp  res  = newTempV128();
   11265       assign(res,
   11266              isGE ? unop(Iop_NotV128, binop(mkVecCMPGTS(size), argR, argL))
   11267                   : unop(Iop_NotV128, binop(mkVecCMPGTU(size), argR, argL)));
   11268       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   11269       const HChar* nm  = isGE ? "cmge" : "cmhs";
   11270       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   11271       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
   11272           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11273       return True;
   11274    }
   11275 
   11276    if (opcode == BITS5(0,1,0,0,0) || opcode == BITS5(0,1,0,1,0)) {
   11277       /* -------- 0,xx,01000 SSHL  std7_std7_std7 -------- */
   11278       /* -------- 0,xx,01010 SRSHL std7_std7_std7 -------- */
   11279       /* -------- 1,xx,01000 USHL  std7_std7_std7 -------- */
   11280       /* -------- 1,xx,01010 URSHL std7_std7_std7 -------- */
   11281       if (bitQ == 0 && size == X11) return False; // implied 1d case
   11282       Bool isU = bitU == 1;
   11283       Bool isR = opcode == BITS5(0,1,0,1,0);
   11284       IROp op  = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
   11285                      : (isU ? mkVecSHU(size)  : mkVecSHS(size));
   11286       IRTemp res = newTempV128();
   11287       assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
   11288       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   11289       const HChar* nm  = isR ? (isU ? "urshl" : "srshl")
   11290                              : (isU ? "ushl"  : "sshl");
   11291       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   11292       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
   11293           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11294       return True;
   11295    }
   11296 
   11297    if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
   11298       /* -------- 0,xx,01001 SQSHL  std7_std7_std7 -------- */
   11299       /* -------- 0,xx,01011 SQRSHL std7_std7_std7 -------- */
   11300       /* -------- 1,xx,01001 UQSHL  std7_std7_std7 -------- */
   11301       /* -------- 1,xx,01011 UQRSHL std7_std7_std7 -------- */
   11302       if (bitQ == 0 && size == X11) return False; // implied 1d case
   11303       Bool isU = bitU == 1;
   11304       Bool isR = opcode == BITS5(0,1,0,1,1);
   11305       IROp op  = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
   11306                      : (isU ? mkVecQANDUQSH(size)  : mkVecQANDSQSH(size));
   11307       /* This is a bit tricky.  If we're only interested in the lowest 64 bits
   11308          of the result (viz, bitQ == 0), then we must adjust the operands to
   11309          ensure that the upper part of the result, that we don't care about,
   11310          doesn't pollute the returned Q value.  To do this, zero out the upper
   11311          operand halves beforehand.  This works because it means, for the
   11312          lanes we don't care about, we are shifting zero by zero, which can
   11313          never saturate. */
   11314       IRTemp res256 = newTemp(Ity_V256);
   11315       IRTemp resSH  = newTempV128();
   11316       IRTemp resQ   = newTempV128();
   11317       IRTemp zero   = newTempV128();
   11318       assign(res256, binop(op,
   11319                            math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(nn)),
   11320                            math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(mm))));
   11321       assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
   11322       assign(resQ,  unop(Iop_V256toV128_1, mkexpr(res256)));
   11323       assign(zero,  mkV128(0x0000));
   11324       putQReg128(dd, mkexpr(resSH));
   11325       updateQCFLAGwithDifference(resQ, zero);
   11326       const HChar* nm  = isR ? (isU ? "uqrshl" : "sqrshl")
   11327                              : (isU ? "uqshl"  : "sqshl");
   11328       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   11329       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
   11330           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11331       return True;
   11332    }
   11333 
   11334    if (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,0,1)) {
   11335       /* -------- 0,xx,01100 SMAX std7_std7_std7 -------- */
   11336       /* -------- 1,xx,01100 UMAX std7_std7_std7 -------- */
   11337       /* -------- 0,xx,01101 SMIN std7_std7_std7 -------- */
   11338       /* -------- 1,xx,01101 UMIN std7_std7_std7 -------- */
   11339       if (bitQ == 0 && size == X11) return False; // implied 1d case
   11340       Bool isU   = bitU == 1;
   11341       Bool isMAX = (opcode & 1) == 0;
   11342       IROp op    = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
   11343                          : (isU ? mkVecMINU(size) : mkVecMINS(size));
   11344       IRTemp t   = newTempV128();
   11345       assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
   11346       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
   11347       const HChar* nm = isMAX ? (isU ? "umax" : "smax")
   11348                               : (isU ? "umin" : "smin");
   11349       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   11350       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
   11351           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11352       return True;
   11353    }
   11354 
   11355    if (opcode == BITS5(0,1,1,1,0) || opcode == BITS5(0,1,1,1,1)) {
   11356       /* -------- 0,xx,01110 SABD std6_std6_std6 -------- */
   11357       /* -------- 1,xx,01110 UABD std6_std6_std6 -------- */
   11358       /* -------- 0,xx,01111 SABA std6_std6_std6 -------- */
   11359       /* -------- 1,xx,01111 UABA std6_std6_std6 -------- */
   11360       if (size == X11) return False; // 1d/2d cases not allowed
   11361       Bool isU   = bitU == 1;
   11362       Bool isACC = opcode == BITS5(0,1,1,1,1);
   11363       vassert(size <= 2);
   11364       IRTemp t1 = math_ABD(isU, size, getQReg128(nn), getQReg128(mm));
   11365       IRTemp t2 = newTempV128();
   11366       assign(t2, isACC ? binop(mkVecADD(size), mkexpr(t1), getQReg128(dd))
   11367                        : mkexpr(t1));
   11368       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
   11369       const HChar* nm  = isACC ? (isU ? "uaba" : "saba")
   11370                                : (isU ? "uabd" : "sabd");
   11371       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   11372       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
   11373           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11374       return True;
   11375    }
   11376 
   11377    if (opcode == BITS5(1,0,0,0,0)) {
   11378       /* -------- 0,xx,10000 ADD std7_std7_std7 -------- */
   11379       /* -------- 1,xx,10000 SUB std7_std7_std7 -------- */
   11380       if (bitQ == 0 && size == X11) return False; // implied 1d case
   11381       Bool   isSUB = bitU == 1;
   11382       IROp   op    = isSUB ? mkVecSUB(size) : mkVecADD(size);
   11383       IRTemp t     = newTempV128();
   11384       assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
   11385       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
   11386       const HChar* nm  = isSUB ? "sub" : "add";
   11387       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   11388       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
   11389           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11390       return True;
   11391    }
   11392 
   11393    if (opcode == BITS5(1,0,0,0,1)) {
   11394       /* -------- 0,xx,10001 CMTST std7_std7_std7 -------- */ // &, != 0
   11395       /* -------- 1,xx,10001 CMEQ  std7_std7_std7 -------- */ // ==
   11396       if (bitQ == 0 && size == X11) return False; // implied 1d case
   11397       Bool    isEQ = bitU == 1;
   11398       IRExpr* argL = getQReg128(nn);
   11399       IRExpr* argR = getQReg128(mm);
   11400       IRTemp  res  = newTempV128();
   11401       assign(res,
   11402              isEQ ? binop(mkVecCMPEQ(size), argL, argR)
   11403                   : unop(Iop_NotV128, binop(mkVecCMPEQ(size),
   11404                                             binop(Iop_AndV128, argL, argR),
   11405                                             mkV128(0x0000))));
   11406       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   11407       const HChar* nm  = isEQ ? "cmeq" : "cmtst";
   11408       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   11409       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
   11410           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11411       return True;
   11412    }
   11413 
   11414    if (opcode == BITS5(1,0,0,1,0)) {
   11415       /* -------- 0,xx,10010 MLA std7_std7_std7 -------- */
   11416       /* -------- 1,xx,10010 MLS std7_std7_std7 -------- */
   11417       if (bitQ == 0 && size == X11) return False; // implied 1d case
   11418       Bool isMLS = bitU == 1;
   11419       IROp   opMUL    = mkVecMUL(size);
   11420       IROp   opADDSUB = isMLS ? mkVecSUB(size) : mkVecADD(size);
   11421       IRTemp res      = newTempV128();
   11422       if (opMUL != Iop_INVALID && opADDSUB != Iop_INVALID) {
   11423          assign(res, binop(opADDSUB,
   11424                            getQReg128(dd),
   11425                            binop(opMUL, getQReg128(nn), getQReg128(mm))));
   11426          putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   11427          const HChar* arr = nameArr_Q_SZ(bitQ, size);
   11428          DIP("%s %s.%s, %s.%s, %s.%s\n", isMLS ? "mls" : "mla",
   11429              nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11430          return True;
   11431       }
   11432       return False;
   11433    }
   11434 
   11435    if (opcode == BITS5(1,0,0,1,1)) {
   11436       /* -------- 0,xx,10011 MUL  std7_std7_std7 -------- */
   11437       /* -------- 1,xx,10011 PMUL 16b_16b_16b, 8b_8b_8b -------- */
   11438       if (bitQ == 0 && size == X11) return False; // implied 1d case
   11439       Bool isPMUL = bitU == 1;
   11440       const IROp opsPMUL[4]
   11441          = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
   11442       IROp   opMUL = isPMUL ? opsPMUL[size] : mkVecMUL(size);
   11443       IRTemp res   = newTempV128();
   11444       if (opMUL != Iop_INVALID) {
   11445          assign(res, binop(opMUL, getQReg128(nn), getQReg128(mm)));
   11446          putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   11447          const HChar* arr = nameArr_Q_SZ(bitQ, size);
   11448          DIP("%s %s.%s, %s.%s, %s.%s\n", isPMUL ? "pmul" : "mul",
   11449              nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11450          return True;
   11451       }
   11452       return False;
   11453    }
   11454 
   11455    if (opcode == BITS5(1,0,1,0,0) || opcode == BITS5(1,0,1,0,1)) {
   11456       /* -------- 0,xx,10100 SMAXP std6_std6_std6 -------- */
   11457       /* -------- 1,xx,10100 UMAXP std6_std6_std6 -------- */
   11458       /* -------- 0,xx,10101 SMINP std6_std6_std6 -------- */
   11459       /* -------- 1,xx,10101 UMINP std6_std6_std6 -------- */
   11460       if (size == X11) return False;
   11461       Bool isU   = bitU == 1;
   11462       Bool isMAX = opcode == BITS5(1,0,1,0,0);
   11463       IRTemp vN  = newTempV128();
   11464       IRTemp vM  = newTempV128();
   11465       IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
   11466                       : (isU ? mkVecMINU(size) : mkVecMINS(size));
   11467       assign(vN, getQReg128(nn));
   11468       assign(vM, getQReg128(mm));
   11469       IRTemp res128 = newTempV128();
   11470       assign(res128,
   11471              binop(op,
   11472                    binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
   11473                    binop(mkVecCATODDLANES(size),  mkexpr(vM), mkexpr(vN))));
   11474       /* In the half-width case, use CatEL32x4 to extract the half-width
   11475          result from the full-width result. */
   11476       IRExpr* res
   11477          = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
   11478                             binop(Iop_CatEvenLanes32x4, mkexpr(res128),
   11479                                                         mkexpr(res128)))
   11480                      : mkexpr(res128);
   11481       putQReg128(dd, res);
   11482       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   11483       const HChar* nm  = isMAX ? (isU ? "umaxp" : "smaxp")
   11484                                : (isU ? "uminp" : "sminp");
   11485       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
   11486           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11487       return True;
   11488    }
   11489 
   11490    if (opcode == BITS5(1,0,1,1,0)) {
   11491       /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
   11492       /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
   11493       if (size == X00 || size == X11) return False;
   11494       Bool isR = bitU == 1;
   11495       IRTemp res, sat1q, sat1n, vN, vM;
   11496       res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
   11497       newTempsV128_2(&vN, &vM);
   11498       assign(vN, getQReg128(nn));
   11499       assign(vM, getQReg128(mm));
   11500       math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
   11501       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   11502       IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
   11503       updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
   11504       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   11505       const HChar* nm  = isR ? "sqrdmulh" : "sqdmulh";
   11506       DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
   11507           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11508       return True;
   11509    }
   11510 
   11511    if (bitU == 0 && opcode == BITS5(1,0,1,1,1)) {
   11512       /* -------- 0,xx,10111 ADDP std7_std7_std7 -------- */
   11513       if (bitQ == 0 && size == X11) return False; // implied 1d case
   11514       IRTemp vN = newTempV128();
   11515       IRTemp vM = newTempV128();
   11516       assign(vN, getQReg128(nn));
   11517       assign(vM, getQReg128(mm));
   11518       IRTemp res128 = newTempV128();
   11519       assign(res128,
   11520              binop(mkVecADD(size),
   11521                    binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
   11522                    binop(mkVecCATODDLANES(size),  mkexpr(vM), mkexpr(vN))));
   11523       /* In the half-width case, use CatEL32x4 to extract the half-width
   11524          result from the full-width result. */
   11525       IRExpr* res
   11526          = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
   11527                             binop(Iop_CatEvenLanes32x4, mkexpr(res128),
   11528                                                         mkexpr(res128)))
   11529                      : mkexpr(res128);
   11530       putQReg128(dd, res);
   11531       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   11532       DIP("addp %s.%s, %s.%s, %s.%s\n",
   11533           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11534       return True;
   11535    }
   11536 
   11537    if (bitU == 0
   11538        && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
   11539       /* -------- 0,0x,11000 FMAXNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
   11540       /* -------- 0,1x,11000 FMINNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
   11541       /* -------- 0,0x,11110 FMAX   2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
   11542       /* -------- 0,1x,11110 FMIN   2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
   11543       /* FMAXNM, FMINNM: FIXME -- KLUDGED */
   11544       Bool   isD   = (size & 1) == 1;
   11545       if (bitQ == 0 && isD) return False; // implied 1d case
   11546       Bool   isMIN = (size & 2) == 2;
   11547       Bool   isNM  = opcode == BITS5(1,1,0,0,0);
   11548       IROp   opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? X11 : X10);
   11549       IRTemp res   = newTempV128();
   11550       assign(res, binop(opMXX, getQReg128(nn), getQReg128(mm)));
   11551       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   11552       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
   11553       DIP("%s%s %s.%s, %s.%s, %s.%s\n",
   11554           isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
   11555           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11556       return True;
   11557    }
   11558 
   11559    if (bitU == 0 && opcode == BITS5(1,1,0,0,1)) {
   11560       /* -------- 0,0x,11001 FMLA 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
   11561       /* -------- 0,1x,11001 FMLS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
   11562       Bool isD   = (size & 1) == 1;
   11563       Bool isSUB = (size & 2) == 2;
   11564       if (bitQ == 0 && isD) return False; // implied 1d case
   11565       IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
   11566       IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
   11567       IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
   11568       IRTemp rm = mk_get_IR_rounding_mode();
   11569       IRTemp t1 = newTempV128();
   11570       IRTemp t2 = newTempV128();
   11571       // FIXME: double rounding; use FMA primops instead
   11572       assign(t1, triop(opMUL,
   11573                        mkexpr(rm), getQReg128(nn), getQReg128(mm)));
   11574       assign(t2, triop(isSUB ? opSUB : opADD,
   11575                        mkexpr(rm), getQReg128(dd), mkexpr(t1)));
   11576       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
   11577       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
   11578       DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fmls" : "fmla",
   11579           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11580       return True;
   11581    }
   11582 
   11583    if (bitU == 0 && opcode == BITS5(1,1,0,1,0)) {
   11584       /* -------- 0,0x,11010 FADD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
   11585       /* -------- 0,1x,11010 FSUB 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
   11586       Bool isD   = (size & 1) == 1;
   11587       Bool isSUB = (size & 2) == 2;
   11588       if (bitQ == 0 && isD) return False; // implied 1d case
   11589       const IROp ops[4]
   11590          = { Iop_Add32Fx4, Iop_Add64Fx2, Iop_Sub32Fx4, Iop_Sub64Fx2 };
   11591       IROp   op = ops[size];
   11592       IRTemp rm = mk_get_IR_rounding_mode();
   11593       IRTemp t1 = newTempV128();
   11594       IRTemp t2 = newTempV128();
   11595       assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
   11596       assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
   11597       putQReg128(dd, mkexpr(t2));
   11598       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
   11599       DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fsub" : "fadd",
   11600           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11601       return True;
   11602    }
   11603 
   11604    if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
   11605       /* -------- 1,1x,11010 FABD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
   11606       Bool isD = (size & 1) == 1;
   11607       if (bitQ == 0 && isD) return False; // implied 1d case
   11608       IROp   opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
   11609       IROp   opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
   11610       IRTemp rm    = mk_get_IR_rounding_mode();
   11611       IRTemp t1    = newTempV128();
   11612       IRTemp t2    = newTempV128();
   11613       // FIXME: use Abd primop instead?
   11614       assign(t1, triop(opSUB, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
   11615       assign(t2, unop(opABS, mkexpr(t1)));
   11616       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
   11617       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
   11618       DIP("fabd %s.%s, %s.%s, %s.%s\n",
   11619           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11620       return True;
   11621    }
   11622 
   11623    if (size <= X01 && opcode == BITS5(1,1,0,1,1)) {
   11624       /* -------- 0,0x,11011 FMULX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
   11625       /* -------- 1,0x,11011 FMUL  2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
   11626       // KLUDGE: FMULX is treated the same way as FMUL.  That can't be right.
   11627       Bool isD    = (size & 1) == 1;
   11628       Bool isMULX = bitU == 0;
   11629       if (bitQ == 0 && isD) return False; // implied 1d case
   11630       IRTemp rm = mk_get_IR_rounding_mode();
   11631       IRTemp t1 = newTempV128();
   11632       assign(t1, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
   11633                        mkexpr(rm), getQReg128(nn), getQReg128(mm)));
   11634       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
   11635       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
   11636       DIP("%s %s.%s, %s.%s, %s.%s\n", isMULX ? "fmulx" : "fmul",
   11637           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11638       return True;
   11639    }
   11640 
   11641    if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
   11642       /* -------- 0,0x,11100 FCMEQ 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
   11643       /* -------- 1,0x,11100 FCMGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
   11644       Bool isD = (size & 1) == 1;
   11645       if (bitQ == 0 && isD) return False; // implied 1d case
   11646       Bool   isGE  = bitU == 1;
   11647       IROp   opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
   11648                           : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
   11649       IRTemp t1    = newTempV128();
   11650       assign(t1, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
   11651                       : binop(opCMP, getQReg128(nn), getQReg128(mm)));
   11652       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
   11653       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
   11654       DIP("%s %s.%s, %s.%s, %s.%s\n", isGE ? "fcmge" : "fcmeq",
   11655           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11656       return True;
   11657    }
   11658 
   11659    if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
   11660       /* -------- 1,1x,11100 FCMGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
   11661       Bool isD = (size & 1) == 1;
   11662       if (bitQ == 0 && isD) return False; // implied 1d case
   11663       IROp   opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
   11664       IRTemp t1    = newTempV128();
   11665       assign(t1, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
   11666       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
   11667       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
   11668       DIP("%s %s.%s, %s.%s, %s.%s\n", "fcmgt",
   11669           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11670       return True;
   11671    }
   11672 
   11673    if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
   11674       /* -------- 1,0x,11101 FACGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
   11675       /* -------- 1,1x,11101 FACGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
   11676       Bool isD  = (size & 1) == 1;
   11677       Bool isGT = (size & 2) == 2;
   11678       if (bitQ == 0 && isD) return False; // implied 1d case
   11679       IROp   opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
   11680                           : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
   11681       IROp   opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
   11682       IRTemp t1    = newTempV128();
   11683       assign(t1, binop(opCMP, unop(opABS, getQReg128(mm)),
   11684                               unop(opABS, getQReg128(nn)))); // swapd
   11685       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
   11686       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
   11687       DIP("%s %s.%s, %s.%s, %s.%s\n", isGT ? "facgt" : "facge",
   11688           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11689       return True;
   11690    }
   11691 
   11692    if (bitU == 1
   11693        && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
   11694       /* -------- 1,0x,11000 FMAXNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
   11695       /* -------- 1,1x,11000 FMINNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
   11696       /* -------- 1,0x,11110 FMAXP   2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
   11697       /* -------- 1,1x,11110 FMINP   2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
   11698       /* FMAXNM, FMINNM: FIXME -- KLUDGED */
   11699       Bool isD = (size & 1) == 1;
   11700       if (bitQ == 0 && isD) return False; // implied 1d case
   11701       Bool   isMIN = (size & 2) == 2;
   11702       Bool   isNM  = opcode == BITS5(1,1,0,0,0);
   11703       IROp   opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
   11704       IRTemp srcN  = newTempV128();
   11705       IRTemp srcM  = newTempV128();
   11706       IRTemp preL  = IRTemp_INVALID;
   11707       IRTemp preR  = IRTemp_INVALID;
   11708       assign(srcN, getQReg128(nn));
   11709       assign(srcM, getQReg128(mm));
   11710       math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
   11711                                            srcM, srcN, isD, bitQ);
   11712       putQReg128(
   11713          dd, math_MAYBE_ZERO_HI64_fromE(
   11714                 bitQ,
   11715                 binop(opMXX, mkexpr(preL), mkexpr(preR))));
   11716       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
   11717       DIP("%s%sp %s.%s, %s.%s, %s.%s\n",
   11718           isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
   11719           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11720       return True;
   11721    }
   11722 
   11723    if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,0,1,0)) {
   11724       /* -------- 1,0x,11010 FADDP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
   11725       Bool isD = size == X01;
   11726       if (bitQ == 0 && isD) return False; // implied 1d case
   11727       IRTemp srcN = newTempV128();
   11728       IRTemp srcM = newTempV128();
   11729       IRTemp preL = IRTemp_INVALID;
   11730       IRTemp preR = IRTemp_INVALID;
   11731       assign(srcN, getQReg128(nn));
   11732       assign(srcM, getQReg128(mm));
   11733       math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
   11734                                            srcM, srcN, isD, bitQ);
   11735       putQReg128(
   11736          dd, math_MAYBE_ZERO_HI64_fromE(
   11737                 bitQ,
   11738                 triop(mkVecADDF(isD ? 3 : 2),
   11739                       mkexpr(mk_get_IR_rounding_mode()),
   11740                       mkexpr(preL), mkexpr(preR))));
   11741       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
   11742       DIP("%s %s.%s, %s.%s, %s.%s\n", "faddp",
   11743           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11744       return True;
   11745    }
   11746 
   11747    if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,1,1,1)) {
   11748       /* -------- 1,0x,11111 FDIV 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
   11749       Bool isD = (size & 1) == 1;
   11750       if (bitQ == 0 && isD) return False; // implied 1d case
   11751       vassert(size <= 1);
   11752       const IROp ops[2] = { Iop_Div32Fx4, Iop_Div64Fx2 };
   11753       IROp   op = ops[size];
   11754       IRTemp rm = mk_get_IR_rounding_mode();
   11755       IRTemp t1 = newTempV128();
   11756       IRTemp t2 = newTempV128();
   11757       assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
   11758       assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
   11759       putQReg128(dd, mkexpr(t2));
   11760       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
   11761       DIP("%s %s.%s, %s.%s, %s.%s\n", "fdiv",
   11762           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11763       return True;
   11764    }
   11765 
   11766    if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
   11767       /* -------- 0,0x,11111: FRECPS  2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
   11768       /* -------- 0,1x,11111: FRSQRTS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
   11769       Bool isSQRT = (size & 2) == 2;
   11770       Bool isD    = (size & 1) == 1;
   11771       if (bitQ == 0 && isD) return False; // implied 1d case
   11772       IROp op     = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
   11773                            : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
   11774       IRTemp res = newTempV128();
   11775       assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
   11776       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   11777       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
   11778       DIP("%s %s.%s, %s.%s, %s.%s\n", isSQRT ? "frsqrts" : "frecps",
   11779           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
   11780       return True;
   11781    }
   11782 
   11783    return False;
   11784 #  undef INSN
   11785 }
   11786 
   11787 
   11788 static
   11789 Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
   11790 {
   11791    /* 31 30 29 28    23   21    16     11 9 4
   11792       0  Q  U  01110 size 10000 opcode 10 n d
   11793       Decode fields: U,size,opcode
   11794    */
   11795 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   11796    if (INSN(31,31) != 0
   11797        || INSN(28,24) != BITS5(0,1,1,1,0)
   11798        || INSN(21,17) != BITS5(1,0,0,0,0)
   11799        || INSN(11,10) != BITS2(1,0)) {
   11800       return False;
   11801    }
   11802    UInt bitQ   = INSN(30,30);
   11803    UInt bitU   = INSN(29,29);
   11804    UInt size   = INSN(23,22);
   11805    UInt opcode = INSN(16,12);
   11806    UInt nn     = INSN(9,5);
   11807    UInt dd     = INSN(4,0);
   11808    vassert(size < 4);
   11809 
   11810    if (bitU == 0 && size <= X10 && opcode == BITS5(0,0,0,0,0)) {
   11811       /* -------- 0,00,00000: REV64 16b_16b, 8b_8b -------- */
   11812       /* -------- 0,01,00000: REV64 8h_8h, 4h_4h -------- */
   11813       /* -------- 0,10,00000: REV64 4s_4s, 2s_2s -------- */
   11814       const IROp iops[3] = { Iop_Reverse8sIn64_x2,
   11815                              Iop_Reverse16sIn64_x2, Iop_Reverse32sIn64_x2 };
   11816       vassert(size <= 2);
   11817       IRTemp res = newTempV128();
   11818       assign(res, unop(iops[size], getQReg128(nn)));
   11819       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   11820       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   11821       DIP("%s %s.%s, %s.%s\n", "rev64",
   11822           nameQReg128(dd), arr, nameQReg128(nn), arr);
   11823       return True;
   11824    }
   11825 
   11826    if (bitU == 1 && size <= X01 && opcode == BITS5(0,0,0,0,0)) {
   11827       /* -------- 1,00,00000: REV32 16b_16b, 8b_8b -------- */
   11828       /* -------- 1,01,00000: REV32 8h_8h, 4h_4h -------- */
   11829       Bool   isH = size == X01;
   11830       IRTemp res = newTempV128();
   11831       IROp   iop = isH ? Iop_Reverse16sIn32_x4 : Iop_Reverse8sIn32_x4;
   11832       assign(res, unop(iop, getQReg128(nn)));
   11833       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   11834       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   11835       DIP("%s %s.%s, %s.%s\n", "rev32",
   11836           nameQReg128(dd), arr, nameQReg128(nn), arr);
   11837       return True;
   11838    }
   11839 
   11840    if (bitU == 0 && size == X00 && opcode == BITS5(0,0,0,0,1)) {
   11841       /* -------- 0,00,00001: REV16 16b_16b, 8b_8b -------- */
   11842       IRTemp res = newTempV128();
   11843       assign(res, unop(Iop_Reverse8sIn16_x8, getQReg128(nn)));
   11844       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   11845       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   11846       DIP("%s %s.%s, %s.%s\n", "rev16",
   11847           nameQReg128(dd), arr, nameQReg128(nn), arr);
   11848       return True;
   11849    }
   11850 
   11851    if (opcode == BITS5(0,0,0,1,0) || opcode == BITS5(0,0,1,1,0)) {
   11852       /* -------- 0,xx,00010: SADDLP std6_std6 -------- */
   11853       /* -------- 1,xx,00010: UADDLP std6_std6 -------- */
   11854       /* -------- 0,xx,00110: SADALP std6_std6 -------- */
   11855       /* -------- 1,xx,00110: UADALP std6_std6 -------- */
   11856       /* Widens, and size refers to the narrow size. */
   11857       if (size == X11) return False; // no 1d or 2d cases
   11858       Bool   isU   = bitU == 1;
   11859       Bool   isACC = opcode == BITS5(0,0,1,1,0);
   11860       IRTemp src   = newTempV128();
   11861       IRTemp sum   = newTempV128();
   11862       IRTemp res   = newTempV128();
   11863       assign(src, getQReg128(nn));
   11864       assign(sum,
   11865              binop(mkVecADD(size+1),
   11866                    mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
   11867                              isU, True/*fromOdd*/, size, mkexpr(src))),
   11868                    mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
   11869                              isU, False/*!fromOdd*/, size, mkexpr(src)))));
   11870       assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(sum), getQReg128(dd))
   11871                         : mkexpr(sum));
   11872       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   11873       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
   11874       const HChar* arrWide   = nameArr_Q_SZ(bitQ, size+1);
   11875       DIP("%s %s.%s, %s.%s\n", isACC ? (isU ? "uadalp" : "sadalp")
   11876                                      : (isU ? "uaddlp" : "saddlp"),
   11877           nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
   11878       return True;
   11879    }
   11880 
   11881    if (opcode == BITS5(0,0,0,1,1)) {
   11882       /* -------- 0,xx,00011: SUQADD std7_std7 -------- */
   11883       /* -------- 1,xx,00011: USQADD std7_std7 -------- */
   11884       if (bitQ == 0 && size == X11) return False; // implied 1d case
   11885       Bool isUSQADD = bitU == 1;
   11886       /* This is switched (in the US vs SU sense) deliberately.
   11887          SUQADD corresponds to the ExtUSsatSS variants and
   11888          USQADD corresponds to the ExtSUsatUU variants.
   11889          See libvex_ir for more details. */
   11890       IROp   qop  = isUSQADD ? mkVecQADDEXTSUSATUU(size)
   11891                              : mkVecQADDEXTUSSATSS(size);
   11892       IROp   nop  = mkVecADD(size);
   11893       IRTemp argL = newTempV128();
   11894       IRTemp argR = newTempV128();
   11895       IRTemp qres = newTempV128();
   11896       IRTemp nres = newTempV128();
   11897       /* Because the two arguments to the addition are implicitly
   11898          extended differently (one signedly, the other unsignedly) it is
   11899          important to present them to the primop in the correct order. */
   11900       assign(argL, getQReg128(nn));
   11901       assign(argR, getQReg128(dd));
   11902       assign(qres, math_MAYBE_ZERO_HI64_fromE(
   11903                       bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
   11904       assign(nres, math_MAYBE_ZERO_HI64_fromE(
   11905                       bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
   11906       putQReg128(dd, mkexpr(qres));
   11907       updateQCFLAGwithDifference(qres, nres);
   11908       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   11909       DIP("%s %s.%s, %s.%s\n", isUSQADD ? "usqadd" : "suqadd",
   11910           nameQReg128(dd), arr, nameQReg128(nn), arr);
   11911       return True;
   11912    }
   11913 
   11914    if (opcode == BITS5(0,0,1,0,0)) {
   11915       /* -------- 0,xx,00100: CLS std6_std6 -------- */
   11916       /* -------- 1,xx,00100: CLZ std6_std6 -------- */
   11917       if (size == X11) return False; // no 1d or 2d cases
   11918       const IROp opsCLS[3] = { Iop_Cls8x16, Iop_Cls16x8, Iop_Cls32x4 };
   11919       const IROp opsCLZ[3] = { Iop_Clz8x16, Iop_Clz16x8, Iop_Clz32x4 };
   11920       Bool   isCLZ = bitU == 1;
   11921       IRTemp res   = newTempV128();
   11922       vassert(size <= 2);
   11923       assign(res, unop(isCLZ ? opsCLZ[size] : opsCLS[size], getQReg128(nn)));
   11924       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   11925       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   11926       DIP("%s %s.%s, %s.%s\n", isCLZ ? "clz" : "cls",
   11927           nameQReg128(dd), arr, nameQReg128(nn), arr);
   11928       return True;
   11929    }
   11930 
   11931    if (size == X00 && opcode == BITS5(0,0,1,0,1)) {
   11932       /* -------- 0,00,00101: CNT 16b_16b, 8b_8b -------- */
   11933       /* -------- 1,00,00101: NOT 16b_16b, 8b_8b -------- */
   11934       IRTemp res = newTempV128();
   11935       assign(res, unop(bitU == 0 ? Iop_Cnt8x16 : Iop_NotV128, getQReg128(nn)));
   11936       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   11937       const HChar* arr = nameArr_Q_SZ(bitQ, 0);
   11938       DIP("%s %s.%s, %s.%s\n", bitU == 0 ? "cnt" : "not",
   11939           nameQReg128(dd), arr, nameQReg128(nn), arr);
   11940       return True;
   11941    }
   11942 
   11943    if (bitU == 1 && size == X01 && opcode == BITS5(0,0,1,0,1)) {
   11944       /* -------- 1,01,00101  RBIT 16b_16b, 8b_8b -------- */
   11945       IRTemp res = newTempV128();
   11946       assign(res, unop(Iop_Reverse1sIn8_x16, getQReg128(nn)));
   11947       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   11948       const HChar* arr = nameArr_Q_SZ(bitQ, 0);
   11949       DIP("%s %s.%s, %s.%s\n", "rbit",
   11950           nameQReg128(dd), arr, nameQReg128(nn), arr);
   11951       return True;
   11952    }
   11953 
   11954    if (opcode == BITS5(0,0,1,1,1)) {
   11955       /* -------- 0,xx,00111 SQABS std7_std7 -------- */
   11956       /* -------- 1,xx,00111 SQNEG std7_std7 -------- */
   11957       if (bitQ == 0 && size == X11) return False; // implied 1d case
   11958       Bool   isNEG  = bitU == 1;
   11959       IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
   11960       (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
   11961                                          getQReg128(nn), size );
   11962       IRTemp qres = newTempV128(), nres = newTempV128();
   11963       assign(qres, math_MAYBE_ZERO_HI64(bitQ, qresFW));
   11964       assign(nres, math_MAYBE_ZERO_HI64(bitQ, nresFW));
   11965       putQReg128(dd, mkexpr(qres));
   11966       updateQCFLAGwithDifference(qres, nres);
   11967       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   11968       DIP("%s %s.%s, %s.%s\n", isNEG ? "sqneg" : "sqabs",
   11969           nameQReg128(dd), arr, nameQReg128(nn), arr);
   11970       return True;
   11971    }
   11972 
   11973    if (opcode == BITS5(0,1,0,0,0)) {
   11974       /* -------- 0,xx,01000: CMGT std7_std7_#0 -------- */ // >s 0
   11975       /* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0
   11976       if (bitQ == 0 && size == X11) return False; // implied 1d case
   11977       Bool    isGT  = bitU == 0;
   11978       IRExpr* argL  = getQReg128(nn);
   11979       IRExpr* argR  = mkV128(0x0000);
   11980       IRTemp  res   = newTempV128();
   11981       IROp    opGTS = mkVecCMPGTS(size);
   11982       assign(res, isGT ? binop(opGTS, argL, argR)
   11983                        : unop(Iop_NotV128, binop(opGTS, argR, argL)));
   11984       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   11985       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   11986       DIP("cm%s %s.%s, %s.%s, #0\n", isGT ? "gt" : "ge",
   11987           nameQReg128(dd), arr, nameQReg128(nn), arr);
   11988       return True;
   11989    }
   11990 
   11991    if (opcode == BITS5(0,1,0,0,1)) {
   11992       /* -------- 0,xx,01001: CMEQ std7_std7_#0 -------- */ // == 0
   11993       /* -------- 1,xx,01001: CMLE std7_std7_#0 -------- */ // <=s 0
   11994       if (bitQ == 0 && size == X11) return False; // implied 1d case
   11995       Bool    isEQ = bitU == 0;
   11996       IRExpr* argL = getQReg128(nn);
   11997       IRExpr* argR = mkV128(0x0000);
   11998       IRTemp  res  = newTempV128();
   11999       assign(res, isEQ ? binop(mkVecCMPEQ(size), argL, argR)
   12000                        : unop(Iop_NotV128,
   12001                               binop(mkVecCMPGTS(size), argL, argR)));
   12002       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   12003       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   12004       DIP("cm%s %s.%s, %s.%s, #0\n", isEQ ? "eq" : "le",
   12005           nameQReg128(dd), arr, nameQReg128(nn), arr);
   12006       return True;
   12007    }
   12008 
   12009    if (bitU == 0 && opcode == BITS5(0,1,0,1,0)) {
   12010       /* -------- 0,xx,01010: CMLT std7_std7_#0 -------- */ // <s 0
   12011       if (bitQ == 0 && size == X11) return False; // implied 1d case
   12012       IRExpr* argL = getQReg128(nn);
   12013       IRExpr* argR = mkV128(0x0000);
   12014       IRTemp  res  = newTempV128();
   12015       assign(res, binop(mkVecCMPGTS(size), argR, argL));
   12016       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   12017       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   12018       DIP("cm%s %s.%s, %s.%s, #0\n", "lt",
   12019           nameQReg128(dd), arr, nameQReg128(nn), arr);
   12020       return True;
   12021    }
   12022 
   12023    if (bitU == 0 && opcode == BITS5(0,1,0,1,1)) {
   12024       /* -------- 0,xx,01011: ABS std7_std7 -------- */
   12025       if (bitQ == 0 && size == X11) return False; // implied 1d case
   12026       IRTemp res = newTempV128();
   12027       assign(res, unop(mkVecABS(size), getQReg128(nn)));
   12028       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   12029       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   12030       DIP("abs %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
   12031       return True;
   12032    }
   12033 
   12034    if (bitU == 1 && opcode == BITS5(0,1,0,1,1)) {
   12035       /* -------- 1,xx,01011: NEG std7_std7 -------- */
   12036       if (bitQ == 0 && size == X11) return False; // implied 1d case
   12037       IRTemp res = newTempV128();
   12038       assign(res, binop(mkVecSUB(size), mkV128(0x0000), getQReg128(nn)));
   12039       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   12040       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   12041       DIP("neg %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
   12042       return True;
   12043    }
   12044 
   12045    UInt ix = 0; /*INVALID*/
   12046    if (size >= X10) {
   12047       switch (opcode) {
   12048          case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
   12049          case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
   12050          case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
   12051          default: break;
   12052       }
   12053    }
   12054    if (ix > 0) {
   12055       /* -------- 0,1x,01100 FCMGT 2d_2d,4s_4s,2s_2s _#0.0 (ix 1) -------- */
   12056       /* -------- 0,1x,01101 FCMEQ 2d_2d,4s_4s,2s_2s _#0.0 (ix 2) -------- */
   12057       /* -------- 0,1x,01110 FCMLT 2d_2d,4s_4s,2s_2s _#0.0 (ix 3) -------- */
   12058       /* -------- 1,1x,01100 FCMGE 2d_2d,4s_4s,2s_2s _#0.0 (ix 4) -------- */
   12059       /* -------- 1,1x,01101 FCMLE 2d_2d,4s_4s,2s_2s _#0.0 (ix 5) -------- */
   12060       if (bitQ == 0 && size == X11) return False; // implied 1d case
   12061       Bool   isD     = size == X11;
   12062       IROp   opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
   12063       IROp   opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
   12064       IROp   opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
   12065       IROp   opCmp   = Iop_INVALID;
   12066       Bool   swap    = False;
   12067       const HChar* nm = "??";
   12068       switch (ix) {
   12069          case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
   12070          case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
   12071          case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
   12072          case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
   12073          case 5: nm = "fcmle"; opCmp = opCmpLE; break;
   12074          default: vassert(0);
   12075       }
   12076       IRExpr* zero = mkV128(0x0000);
   12077       IRTemp res = newTempV128();
   12078       assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
   12079                        : binop(opCmp, getQReg128(nn), zero));
   12080       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   12081       const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
   12082       DIP("%s %s.%s, %s.%s, #0.0\n", nm,
   12083           nameQReg128(dd), arr, nameQReg128(nn), arr);
   12084       return True;
   12085    }
   12086 
   12087    if (size >= X10 && opcode == BITS5(0,1,1,1,1)) {
   12088       /* -------- 0,1x,01111: FABS 2d_2d, 4s_4s, 2s_2s -------- */
   12089       /* -------- 1,1x,01111: FNEG 2d_2d, 4s_4s, 2s_2s -------- */
   12090       if (bitQ == 0 && size == X11) return False; // implied 1d case
   12091       Bool   isFNEG = bitU == 1;
   12092       IROp   op     = isFNEG ? (size == X10 ? Iop_Neg32Fx4 : Iop_Neg64Fx2)
   12093                              : (size == X10 ? Iop_Abs32Fx4 : Iop_Abs64Fx2);
   12094       IRTemp res = newTempV128();
   12095       assign(res, unop(op, getQReg128(nn)));
   12096       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   12097       const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
   12098       DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
   12099           nameQReg128(dd), arr, nameQReg128(nn), arr);
   12100       return True;
   12101    }
   12102 
   12103    if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
   12104       /* -------- 0,xx,10010: XTN{,2} -------- */
   12105       if (size == X11) return False;
   12106       vassert(size < 3);
   12107       Bool   is2  = bitQ == 1;
   12108       IROp   opN  = mkVecNARROWUN(size);
   12109       IRTemp resN = newTempV128();
   12110       assign(resN, unop(Iop_64UtoV128, unop(opN, getQReg128(nn))));
   12111       putLO64andZUorPutHI64(is2, dd, resN);
   12112       const HChar* nm        = "xtn";
   12113       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
   12114       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
   12115       DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
   12116           nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
   12117       return True;
   12118    }
   12119 
   12120    if (opcode == BITS5(1,0,1,0,0)
   12121        || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
   12122       /* -------- 0,xx,10100: SQXTN{,2} -------- */
   12123       /* -------- 1,xx,10100: UQXTN{,2} -------- */
   12124       /* -------- 1,xx,10010: SQXTUN{,2} -------- */
   12125       if (size == X11) return False;
   12126       vassert(size < 3);
   12127       Bool  is2    = bitQ == 1;
   12128       IROp  opN    = Iop_INVALID;
   12129       Bool  zWiden = True;
   12130       const HChar* nm = "??";
   12131       /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
   12132          opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
   12133       }
   12134       else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
   12135          opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
   12136       }
   12137       else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
   12138          opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
   12139       }
   12140       else vassert(0);
   12141       IRTemp src  = newTempV128();
   12142       assign(src, getQReg128(nn));
   12143       IRTemp resN = newTempV128();
   12144       assign(resN, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
   12145       putLO64andZUorPutHI64(is2, dd, resN);
   12146       IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
   12147                                               size, mkexpr(resN));
   12148       updateQCFLAGwithDifference(src, resW);
   12149       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
   12150       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
   12151       DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
   12152           nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
   12153       return True;
   12154    }
   12155 
   12156    if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
   12157       /* -------- 1,xx,10011 SHLL{2} #lane-width -------- */
   12158       /* Widens, and size is the narrow size. */
   12159       if (size == X11) return False;
   12160       Bool is2   = bitQ == 1;
   12161       IROp opINT = is2 ? mkVecINTERLEAVEHI(size) : mkVecINTERLEAVELO(size);
   12162       IROp opSHL = mkVecSHLN(size+1);
   12163       IRTemp src = newTempV128();
   12164       IRTemp res = newTempV128();
   12165       assign(src, getQReg128(nn));
   12166       assign(res, binop(opSHL, binop(opINT, mkexpr(src), mkexpr(src)),
   12167                                mkU8(8 << size)));
   12168       putQReg128(dd, mkexpr(res));
   12169       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
   12170       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
   12171       DIP("shll%s %s.%s, %s.%s, #%u\n", is2 ? "2" : "",
   12172           nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow, 8 << size);
   12173       return True;
   12174    }
   12175 
   12176    if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,0)) {
   12177       /* -------- 0,0x,10110: FCVTN 4h/8h_4s, 2s/4s_2d -------- */
   12178       UInt   nLanes = size == X00 ? 4 : 2;
   12179       IRType srcTy  = size == X00 ? Ity_F32 : Ity_F64;
   12180       IROp   opCvt  = size == X00 ? Iop_F32toF16 : Iop_F64toF32;
   12181       IRTemp rm     = mk_get_IR_rounding_mode();
   12182       IRTemp src[nLanes];
   12183       for (UInt i = 0; i < nLanes; i++) {
   12184          src[i] = newTemp(srcTy);
   12185          assign(src[i], getQRegLane(nn, i, srcTy));
   12186       }
   12187       for (UInt i = 0; i < nLanes; i++) {
   12188          putQRegLane(dd, nLanes * bitQ + i,
   12189                          binop(opCvt, mkexpr(rm), mkexpr(src[i])));
   12190       }
   12191       if (bitQ == 0) {
   12192          putQRegLane(dd, 1, mkU64(0));
   12193       }
   12194       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
   12195       const HChar* arrWide   = nameArr_Q_SZ(1,    1+size+1);
   12196       DIP("fcvtn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
   12197           nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
   12198       return True;
   12199    }
   12200 
   12201    if (bitU == 1 && size == X01 && opcode == BITS5(1,0,1,1,0)) {
   12202       /* -------- 1,01,10110: FCVTXN 2s/4s_2d -------- */
   12203       /* Using Irrm_NEAREST here isn't right.  The docs say "round to
   12204          odd" but I don't know what that really means. */
   12205       IRType srcTy = Ity_F64;
   12206       IROp   opCvt = Iop_F64toF32;
   12207       IRTemp src[2];
   12208       for (UInt i = 0; i < 2; i++) {
   12209          src[i] = newTemp(srcTy);
   12210          assign(src[i], getQRegLane(nn, i, srcTy));
   12211       }
   12212       for (UInt i = 0; i < 2; i++) {
   12213          putQRegLane(dd, 2 * bitQ + i,
   12214                          binop(opCvt, mkU32(Irrm_NEAREST), mkexpr(src[i])));
   12215       }
   12216       if (bitQ == 0) {
   12217          putQRegLane(dd, 1, mkU64(0));
   12218       }
   12219       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
   12220       const HChar* arrWide   = nameArr_Q_SZ(1,    1+size+1);
   12221       DIP("fcvtxn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
   12222           nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
   12223       return True;
   12224    }
   12225 
   12226    if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,1)) {
   12227       /* -------- 0,0x,10111: FCVTL 4s_4h/8h, 2d_2s/4s -------- */
   12228       UInt   nLanes = size == X00 ? 4 : 2;
   12229       IRType srcTy  = size == X00 ? Ity_F16 : Ity_F32;
   12230       IROp   opCvt  = size == X00 ? Iop_F16toF32 : Iop_F32toF64;
   12231       IRTemp src[nLanes];
   12232       for (UInt i = 0; i < nLanes; i++) {
   12233          src[i] = newTemp(srcTy);
   12234          assign(src[i], getQRegLane(nn, nLanes * bitQ + i, srcTy));
   12235       }
   12236       for (UInt i = 0; i < nLanes; i++) {
   12237          putQRegLane(dd, i, unop(opCvt, mkexpr(src[i])));
   12238       }
   12239       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
   12240       const HChar* arrWide   = nameArr_Q_SZ(1,    1+size+1);
   12241       DIP("fcvtl%s %s.%s, %s.%s\n", bitQ ? "2" : "",
   12242           nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
   12243       return True;
   12244    }
   12245 
   12246    ix = 0;
   12247    if (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,0,0,1)) {
   12248       ix = 1 + ((((bitU & 1) << 2) | ((size & 2) << 0)) | ((opcode & 1) << 0));
   12249       // = 1 + bitU[0]:size[1]:opcode[0]
   12250       vassert(ix >= 1 && ix <= 8);
   12251       if (ix == 7) ix = 0;
   12252    }
   12253    if (ix > 0) {
   12254       /* -------- 0,0x,11000 FRINTN 2d_2d, 4s_4s, 2s_2s (1) -------- */
   12255       /* -------- 0,0x,11001 FRINTM 2d_2d, 4s_4s, 2s_2s (2) -------- */
   12256       /* -------- 0,1x,11000 FRINTP 2d_2d, 4s_4s, 2s_2s (3) -------- */
   12257       /* -------- 0,1x,11001 FRINTZ 2d_2d, 4s_4s, 2s_2s (4) -------- */
   12258       /* -------- 1,0x,11000 FRINTA 2d_2d, 4s_4s, 2s_2s (5) -------- */
   12259       /* -------- 1,0x,11001 FRINTX 2d_2d, 4s_4s, 2s_2s (6) -------- */
   12260       /* -------- 1,1x,11000 (apparently unassigned)    (7) -------- */
   12261       /* -------- 1,1x,11001 FRINTI 2d_2d, 4s_4s, 2s_2s (8) -------- */
   12262       /* rm plan:
   12263          FRINTN: tieeven -- !! FIXME KLUDGED !!
   12264          FRINTM: -inf
   12265          FRINTP: +inf
   12266          FRINTZ: zero
   12267          FRINTA: tieaway -- !! FIXME KLUDGED !!
   12268          FRINTX: per FPCR + "exact = TRUE"
   12269          FRINTI: per FPCR
   12270       */
   12271       Bool isD = (size & 1) == 1;
   12272       if (bitQ == 0 && isD) return False; // implied 1d case
   12273 
   12274       IRTemp irrmRM = mk_get_IR_rounding_mode();
   12275 
   12276       UChar ch = '?';
   12277       IRTemp irrm = newTemp(Ity_I32);
   12278       switch (ix) {
   12279          case 1: ch = 'n'; assign(irrm, mkU32(Irrm_NEAREST)); break;
   12280          case 2: ch = 'm'; assign(irrm, mkU32(Irrm_NegINF)); break;
   12281          case 3: ch = 'p'; assign(irrm, mkU32(Irrm_PosINF)); break;
   12282          case 4: ch = 'z'; assign(irrm, mkU32(Irrm_ZERO)); break;
   12283          // The following is a kludge.  Should be: Irrm_NEAREST_TIE_AWAY_0
   12284          case 5: ch = 'a'; assign(irrm, mkU32(Irrm_NEAREST)); break;
   12285          // I am unsure about the following, due to the "integral exact"
   12286          // description in the manual.  What does it mean? (frintx, that is)
   12287          case 6: ch = 'x'; assign(irrm, mkexpr(irrmRM)); break;
   12288          case 8: ch = 'i'; assign(irrm, mkexpr(irrmRM)); break;
   12289          default: vassert(0);
   12290       }
   12291 
   12292       IROp opRND = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt;
   12293       if (isD) {
   12294          for (UInt i = 0; i < 2; i++) {
   12295             putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
   12296                                             getQRegLane(nn, i, Ity_F64)));
   12297          }
   12298       } else {
   12299          UInt n = bitQ==1 ? 4 : 2;
   12300          for (UInt i = 0; i < n; i++) {
   12301             putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
   12302                                             getQRegLane(nn, i, Ity_F32)));
   12303          }
   12304          if (bitQ == 0)
   12305             putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
   12306       }
   12307       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   12308       DIP("frint%c %s.%s, %s.%s\n", ch,
   12309           nameQReg128(dd), arr, nameQReg128(nn), arr);
   12310       return True;
   12311    }
   12312 
   12313    ix = 0; /*INVALID*/
   12314    switch (opcode) {
   12315       case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
   12316       case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
   12317       case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
   12318       default: break;
   12319    }
   12320    if (ix > 0) {
   12321       /* -------- 0,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
   12322       /* -------- 0,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
   12323       /* -------- 0,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
   12324       /* -------- 0,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
   12325       /* -------- 0,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
   12326       /* -------- 1,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
   12327       /* -------- 1,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
   12328       /* -------- 1,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
   12329       /* -------- 1,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
   12330       /* -------- 1,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
   12331       Bool isD = (size & 1) == 1;
   12332       if (bitQ == 0 && isD) return False; // implied 1d case
   12333 
   12334       IRRoundingMode irrm = 8; /*impossible*/
   12335       HChar          ch   = '?';
   12336       switch (ix) {
   12337          case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
   12338          case 2: ch = 'm'; irrm = Irrm_NegINF;  break;
   12339          case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
   12340          case 4: ch = 'p'; irrm = Irrm_PosINF;  break;
   12341          case 5: ch = 'z'; irrm = Irrm_ZERO;    break;
   12342          default: vassert(0);
   12343       }
   12344       IROp cvt = Iop_INVALID;
   12345       if (bitU == 1) {
   12346          cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
   12347       } else {
   12348          cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
   12349       }
   12350       if (isD) {
   12351          for (UInt i = 0; i < 2; i++) {
   12352             putQRegLane(dd, i, binop(cvt, mkU32(irrm),
   12353                                             getQRegLane(nn, i, Ity_F64)));
   12354          }
   12355       } else {
   12356          UInt n = bitQ==1 ? 4 : 2;
   12357          for (UInt i = 0; i < n; i++) {
   12358             putQRegLane(dd, i, binop(cvt, mkU32(irrm),
   12359                                             getQRegLane(nn, i, Ity_F32)));
   12360          }
   12361          if (bitQ == 0)
   12362             putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
   12363       }
   12364       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   12365       DIP("fcvt%c%c %s.%s, %s.%s\n", ch, bitU == 1 ? 'u' : 's',
   12366           nameQReg128(dd), arr, nameQReg128(nn), arr);
   12367       return True;
   12368    }
   12369 
   12370    if (size == X10 && opcode == BITS5(1,1,1,0,0)) {
   12371       /* -------- 0,10,11100: URECPE  4s_4s, 2s_2s -------- */
   12372       /* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */
   12373       Bool isREC = bitU == 0;
   12374       IROp op    = isREC ? Iop_RecipEst32Ux4 : Iop_RSqrtEst32Ux4;
   12375       IRTemp res = newTempV128();
   12376       assign(res, unop(op, getQReg128(nn)));
   12377       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   12378       const HChar* nm  = isREC ? "urecpe" : "ursqrte";
   12379       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   12380       DIP("%s %s.%s, %s.%s\n", nm,
   12381           nameQReg128(dd), arr, nameQReg128(nn), arr);
   12382       return True;
   12383    }
   12384 
   12385    if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
   12386       /* -------- 0,0x,11101: SCVTF -------- */
   12387       /* -------- 1,0x,11101: UCVTF -------- */
   12388       /* 31  28      22 21       15     9 4
   12389          0q0 01110 0 sz 1  00001 110110 n d  SCVTF Vd, Vn
   12390          0q1 01110 0 sz 1  00001 110110 n d  UCVTF Vd, Vn
   12391          with laneage:
   12392          case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
   12393       */
   12394       Bool isQ   = bitQ == 1;
   12395       Bool isU   = bitU == 1;
   12396       Bool isF64 = (size & 1) == 1;
   12397       if (isQ || !isF64) {
   12398          IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
   12399          UInt   nLanes = 0;
   12400          Bool   zeroHI = False;
   12401          const HChar* arrSpec = NULL;
   12402          Bool   ok  = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
   12403                                        isQ, isF64 );
   12404          IROp   iop = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
   12405                           : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
   12406          IRTemp rm  = mk_get_IR_rounding_mode();
   12407          UInt   i;
   12408          vassert(ok); /* the 'if' above should ensure this */
   12409          for (i = 0; i < nLanes; i++) {
   12410             putQRegLane(dd, i,
   12411                         binop(iop, mkexpr(rm), getQRegLane(nn, i, tyI)));
   12412          }
   12413          if (zeroHI) {
   12414             putQRegLane(dd, 1, mkU64(0));
   12415          }
   12416          DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
   12417              nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
   12418          return True;
   12419       }
   12420       /* else fall through */
   12421    }
   12422 
   12423    if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
   12424       /* -------- 0,1x,11101: FRECPE  2d_2d, 4s_4s, 2s_2s -------- */
   12425       /* -------- 1,1x,11101: FRSQRTE 2d_2d, 4s_4s, 2s_2s -------- */
   12426       Bool isSQRT = bitU == 1;
   12427       Bool isD    = (size & 1) == 1;
   12428       IROp op     = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
   12429                            : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
   12430       if (bitQ == 0 && isD) return False; // implied 1d case
   12431       IRTemp resV = newTempV128();
   12432       assign(resV, unop(op, getQReg128(nn)));
   12433       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
   12434       const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
   12435       DIP("%s %s.%s, %s.%s\n", isSQRT ? "frsqrte" : "frecpe",
   12436           nameQReg128(dd), arr, nameQReg128(nn), arr);
   12437       return True;
   12438    }
   12439 
   12440    if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
   12441       /* -------- 1,1x,11111: FSQRT 2d_2d, 4s_4s, 2s_2s -------- */
   12442       Bool isD = (size & 1) == 1;
   12443       IROp op  = isD ? Iop_Sqrt64Fx2 : Iop_Sqrt32Fx4;
   12444       if (bitQ == 0 && isD) return False; // implied 1d case
   12445       IRTemp resV = newTempV128();
   12446       assign(resV, binop(op, mkexpr(mk_get_IR_rounding_mode()),
   12447                              getQReg128(nn)));
   12448       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
   12449       const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
   12450       DIP("%s %s.%s, %s.%s\n", "fsqrt",
   12451           nameQReg128(dd), arr, nameQReg128(nn), arr);
   12452       return True;
   12453    }
   12454 
   12455    return False;
   12456 #  undef INSN
   12457 }
   12458 
   12459 
   12460 static
   12461 Bool dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult* dres, UInt insn)
   12462 {
   12463    /* 31    28    23   21 20 19 15     11   9 4
   12464       0 Q U 01111 size L  M  m  opcode H  0 n d
   12465       Decode fields are: u,size,opcode
   12466       M is really part of the mm register number.  Individual
   12467       cases need to inspect L and H though.
   12468    */
   12469 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   12470    if (INSN(31,31) != 0
   12471        || INSN(28,24) != BITS5(0,1,1,1,1) || INSN(10,10) !=0) {
   12472       return False;
   12473    }
   12474    UInt bitQ   = INSN(30,30);
   12475    UInt bitU   = INSN(29,29);
   12476    UInt size   = INSN(23,22);
   12477    UInt bitL   = INSN(21,21);
   12478    UInt bitM   = INSN(20,20);
   12479    UInt mmLO4  = INSN(19,16);
   12480    UInt opcode = INSN(15,12);
   12481    UInt bitH   = INSN(11,11);
   12482    UInt nn     = INSN(9,5);
   12483    UInt dd     = INSN(4,0);
   12484    vassert(size < 4);
   12485    vassert(bitH < 2 && bitM < 2 && bitL < 2);
   12486 
   12487    if (bitU == 0 && size >= X10
   12488        && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
   12489       /* -------- 0,1x,0001 FMLA 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
   12490       /* -------- 0,1x,0101 FMLS 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
   12491       if (bitQ == 0 && size == X11) return False; // implied 1d case
   12492       Bool isD   = (size & 1) == 1;
   12493       Bool isSUB = opcode == BITS4(0,1,0,1);
   12494       UInt index;
   12495       if      (!isD)             index = (bitH << 1) | bitL;
   12496       else if (isD && bitL == 0) index = bitH;
   12497       else return False; // sz:L == x11 => unallocated encoding
   12498       vassert(index < (isD ? 2 : 4));
   12499       IRType ity   = isD ? Ity_F64 : Ity_F32;
   12500       IRTemp elem  = newTemp(ity);
   12501       UInt   mm    = (bitM << 4) | mmLO4;
   12502       assign(elem, getQRegLane(mm, index, ity));
   12503       IRTemp dupd  = math_DUP_TO_V128(elem, ity);
   12504       IROp   opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
   12505       IROp   opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
   12506       IROp   opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
   12507       IRTemp rm    = mk_get_IR_rounding_mode();
   12508       IRTemp t1    = newTempV128();
   12509       IRTemp t2    = newTempV128();
   12510       // FIXME: double rounding; use FMA primops instead
   12511       assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
   12512       assign(t2, triop(isSUB ? opSUB : opADD,
   12513                        mkexpr(rm), getQReg128(dd), mkexpr(t1)));
   12514       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
   12515       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
   12516       DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
   12517           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm),
   12518           isD ? 'd' : 's', index);
   12519       return True;
   12520    }
   12521 
   12522    if (size >= X10 && opcode == BITS4(1,0,0,1)) {
   12523       /* -------- 0,1x,1001 FMUL  2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
   12524       /* -------- 1,1x,1001 FMULX 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
   12525       if (bitQ == 0 && size == X11) return False; // implied 1d case
   12526       Bool isD    = (size & 1) == 1;
   12527       Bool isMULX = bitU == 1;
   12528       UInt index;
   12529       if      (!isD)             index = (bitH << 1) | bitL;
   12530       else if (isD && bitL == 0) index = bitH;
   12531       else return False; // sz:L == x11 => unallocated encoding
   12532       vassert(index < (isD ? 2 : 4));
   12533       IRType ity  = isD ? Ity_F64 : Ity_F32;
   12534       IRTemp elem = newTemp(ity);
   12535       UInt   mm   = (bitM << 4) | mmLO4;
   12536       assign(elem, getQRegLane(mm, index, ity));
   12537       IRTemp dupd = math_DUP_TO_V128(elem, ity);
   12538       // KLUDGE: FMULX is treated the same way as FMUL.  That can't be right.
   12539       IRTemp res  = newTempV128();
   12540       assign(res, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
   12541                         mkexpr(mk_get_IR_rounding_mode()),
   12542                         getQReg128(nn), mkexpr(dupd)));
   12543       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   12544       const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
   12545       DIP("%s %s.%s, %s.%s, %s.%c[%u]\n",
   12546           isMULX ? "fmulx" : "fmul", nameQReg128(dd), arr,
   12547           nameQReg128(nn), arr, nameQReg128(mm), isD ? 'd' : 's', index);
   12548       return True;
   12549    }
   12550 
   12551    if ((bitU == 1 && (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,1,0,0)))
   12552        || (bitU == 0 && opcode == BITS4(1,0,0,0))) {
   12553       /* -------- 1,xx,0000 MLA s/h variants only -------- */
   12554       /* -------- 1,xx,0100 MLS s/h variants only -------- */
   12555       /* -------- 0,xx,1000 MUL s/h variants only -------- */
   12556       Bool isMLA = opcode == BITS4(0,0,0,0);
   12557       Bool isMLS = opcode == BITS4(0,1,0,0);
   12558       UInt mm    = 32; // invalid
   12559       UInt ix    = 16; // invalid
   12560       switch (size) {
   12561          case X00:
   12562             return False; // b case is not allowed
   12563          case X01:
   12564             mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
   12565          case X10:
   12566             mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
   12567          case X11:
   12568             return False; // d case is not allowed
   12569          default:
   12570             vassert(0);
   12571       }
   12572       vassert(mm < 32 && ix < 16);
   12573       IROp   opMUL = mkVecMUL(size);
   12574       IROp   opADD = mkVecADD(size);
   12575       IROp   opSUB = mkVecSUB(size);
   12576       HChar  ch    = size == X01 ? 'h' : 's';
   12577       IRTemp vecM  = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
   12578       IRTemp vecD  = newTempV128();
   12579       IRTemp vecN  = newTempV128();
   12580       IRTemp res   = newTempV128();
   12581       assign(vecD, getQReg128(dd));
   12582       assign(vecN, getQReg128(nn));
   12583       IRExpr* prod = binop(opMUL, mkexpr(vecN), mkexpr(vecM));
   12584       if (isMLA || isMLS) {
   12585          assign(res, binop(isMLA ? opADD : opSUB, mkexpr(vecD), prod));
   12586       } else {
   12587          assign(res, prod);
   12588       }
   12589       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   12590       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   12591       DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isMLA ? "mla"
   12592                                                 : (isMLS ? "mls" : "mul"),
   12593           nameQReg128(dd), arr,
   12594           nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
   12595       return True;
   12596    }
   12597 
   12598    if (opcode == BITS4(1,0,1,0)
   12599        || opcode == BITS4(0,0,1,0) || opcode == BITS4(0,1,1,0)) {
   12600       /* -------- 0,xx,1010 SMULL s/h variants only -------- */ // 0 (ks)
   12601       /* -------- 1,xx,1010 UMULL s/h variants only -------- */ // 0
   12602       /* -------- 0,xx,0010 SMLAL s/h variants only -------- */ // 1
   12603       /* -------- 1,xx,0010 UMLAL s/h variants only -------- */ // 1
   12604       /* -------- 0,xx,0110 SMLSL s/h variants only -------- */ // 2
   12605       /* -------- 1,xx,0110 SMLSL s/h variants only -------- */ // 2
   12606       /* Widens, and size refers to the narrowed lanes. */
   12607       UInt ks = 3;
   12608       switch (opcode) {
   12609          case BITS4(1,0,1,0): ks = 0; break;
   12610          case BITS4(0,0,1,0): ks = 1; break;
   12611          case BITS4(0,1,1,0): ks = 2; break;
   12612          default: vassert(0);
   12613       }
   12614       vassert(ks >= 0 && ks <= 2);
   12615       Bool isU = bitU == 1;
   12616       Bool is2 = bitQ == 1;
   12617       UInt mm  = 32; // invalid
   12618       UInt ix  = 16; // invalid
   12619       switch (size) {
   12620          case X00:
   12621             return False; // h_b_b[] case is not allowed
   12622          case X01:
   12623             mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
   12624          case X10:
   12625             mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
   12626          case X11:
   12627             return False; // q_d_d[] case is not allowed
   12628          default:
   12629             vassert(0);
   12630       }
   12631       vassert(mm < 32 && ix < 16);
   12632       IRTemp vecN  = newTempV128();
   12633       IRTemp vecM  = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
   12634       IRTemp vecD  = newTempV128();
   12635       assign(vecN, getQReg128(nn));
   12636       assign(vecD, getQReg128(dd));
   12637       IRTemp res = IRTemp_INVALID;
   12638       math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
   12639                     vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
   12640       putQReg128(dd, mkexpr(res));
   12641       const HChar* nm        = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
   12642       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
   12643       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
   12644       HChar ch               = size == X01 ? 'h' : 's';
   12645       DIP("%c%s%s %s.%s, %s.%s, %s.%c[%u]\n",
   12646           isU ? 'u' : 's', nm, is2 ? "2" : "",
   12647           nameQReg128(dd), arrWide,
   12648           nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
   12649       return True;
   12650    }
   12651 
   12652    if (bitU == 0
   12653        && (opcode == BITS4(1,0,1,1)
   12654            || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
   12655       /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
   12656       /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
   12657       /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
   12658       /* Widens, and size refers to the narrowed lanes. */
   12659       UInt ks = 3;
   12660       switch (opcode) {
   12661          case BITS4(1,0,1,1): ks = 0; break;
   12662          case BITS4(0,0,1,1): ks = 1; break;
   12663          case BITS4(0,1,1,1): ks = 2; break;
   12664          default: vassert(0);
   12665       }
   12666       vassert(ks >= 0 && ks <= 2);
   12667       Bool is2 = bitQ == 1;
   12668       UInt mm  = 32; // invalid
   12669       UInt ix  = 16; // invalid
   12670       switch (size) {
   12671          case X00:
   12672             return False; // h_b_b[] case is not allowed
   12673          case X01:
   12674             mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
   12675          case X10:
   12676             mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
   12677          case X11:
   12678             return False; // q_d_d[] case is not allowed
   12679          default:
   12680             vassert(0);
   12681       }
   12682       vassert(mm < 32 && ix < 16);
   12683       IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
   12684       vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
   12685       newTempsV128_2(&vecN, &vecD);
   12686       assign(vecN, getQReg128(nn));
   12687       IRTemp vecM  = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
   12688       assign(vecD, getQReg128(dd));
   12689       math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
   12690                        is2, size, "mas"[ks],
   12691                        vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
   12692       putQReg128(dd, mkexpr(res));
   12693       vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
   12694       updateQCFLAGwithDifference(sat1q, sat1n);
   12695       if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
   12696          updateQCFLAGwithDifference(sat2q, sat2n);
   12697       }
   12698       const HChar* nm        = ks == 0 ? "sqdmull"
   12699                                        : (ks == 1 ? "sqdmlal" : "sqdmlsl");
   12700       const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
   12701       const HChar* arrWide   = nameArr_Q_SZ(1,    size+1);
   12702       HChar ch               = size == X01 ? 'h' : 's';
   12703       DIP("%s%s %s.%s, %s.%s, %s.%c[%u]\n",
   12704           nm, is2 ? "2" : "",
   12705           nameQReg128(dd), arrWide,
   12706           nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
   12707       return True;
   12708    }
   12709 
   12710    if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
   12711       /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
   12712       /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
   12713       UInt mm  = 32; // invalid
   12714       UInt ix  = 16; // invalid
   12715       switch (size) {
   12716          case X00:
   12717             return False; // b case is not allowed
   12718          case X01:
   12719             mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
   12720          case X10:
   12721             mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
   12722          case X11:
   12723             return False; // q case is not allowed
   12724          default:
   12725             vassert(0);
   12726       }
   12727       vassert(mm < 32 && ix < 16);
   12728       Bool isR = opcode == BITS4(1,1,0,1);
   12729       IRTemp res, sat1q, sat1n, vN, vM;
   12730       res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
   12731       vN = newTempV128();
   12732       assign(vN, getQReg128(nn));
   12733       vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
   12734       math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
   12735       putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
   12736       IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
   12737       updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
   12738       const HChar* nm  = isR ? "sqrdmulh" : "sqdmulh";
   12739       const HChar* arr = nameArr_Q_SZ(bitQ, size);
   12740       HChar ch         = size == X01 ? 'h' : 's';
   12741       DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm,
   12742           nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
   12743       return True;
   12744    }
   12745 
   12746    return False;
   12747 #  undef INSN
   12748 }
   12749 
   12750 
   12751 static
   12752 Bool dis_AdvSIMD_crypto_aes(/*MB_OUT*/DisResult* dres, UInt insn)
   12753 {
   12754 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   12755    return False;
   12756 #  undef INSN
   12757 }
   12758 
   12759 
   12760 static
   12761 Bool dis_AdvSIMD_crypto_three_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
   12762 {
   12763 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   12764    return False;
   12765 #  undef INSN
   12766 }
   12767 
   12768 
   12769 static
   12770 Bool dis_AdvSIMD_crypto_two_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
   12771 {
   12772 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   12773    return False;
   12774 #  undef INSN
   12775 }
   12776 
   12777 
   12778 static
   12779 Bool dis_AdvSIMD_fp_compare(/*MB_OUT*/DisResult* dres, UInt insn)
   12780 {
   12781    /* 31  28    23 21 20 15 13   9 4
   12782       000 11110 ty 1  m  op 1000 n opcode2
   12783       The first 3 bits are really "M 0 S", but M and S are always zero.
   12784       Decode fields are: ty,op,opcode2
   12785    */
   12786 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   12787    if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
   12788        || INSN(21,21) != 1 || INSN(13,10) != BITS4(1,0,0,0)) {
   12789       return False;
   12790    }
   12791    UInt ty      = INSN(23,22);
   12792    UInt mm      = INSN(20,16);
   12793    UInt op      = INSN(15,14);
   12794    UInt nn      = INSN(9,5);
   12795    UInt opcode2 = INSN(4,0);
   12796    vassert(ty < 4);
   12797 
   12798    if (ty <= X01 && op == X00
   12799        && (opcode2 & BITS5(0,0,1,1,1)) == BITS5(0,0,0,0,0)) {
   12800       /* -------- 0x,00,00000 FCMP  d_d,   s_s -------- */
   12801       /* -------- 0x,00,01000 FCMP  d_#0, s_#0 -------- */
   12802       /* -------- 0x,00,10000 FCMPE d_d,   s_s -------- */
   12803       /* -------- 0x,00,11000 FCMPE d_#0, s_#0 -------- */
   12804       /* 31        23   20    15      9 4
   12805          000 11110 01 1     m 00 1000 n 10 000  FCMPE Dn, Dm
   12806          000 11110 01 1 00000 00 1000 n 11 000  FCMPE Dn, #0.0
   12807          000 11110 01 1     m 00 1000 n 00 000  FCMP  Dn, Dm
   12808          000 11110 01 1 00000 00 1000 n 01 000  FCMP  Dn, #0.0
   12809 
   12810          000 11110 00 1     m 00 1000 n 10 000  FCMPE Sn, Sm
   12811          000 11110 00 1 00000 00 1000 n 11 000  FCMPE Sn, #0.0
   12812          000 11110 00 1     m 00 1000 n 00 000  FCMP  Sn, Sm
   12813          000 11110 00 1 00000 00 1000 n 01 000  FCMP  Sn, #0.0
   12814 
   12815          FCMPE generates Invalid Operation exn if either arg is any kind
   12816          of NaN.  FCMP generates Invalid Operation exn if either arg is a
   12817          signalling NaN.  We ignore this detail here and produce the same
   12818          IR for both.
   12819       */
   12820       Bool   isD     = (ty & 1) == 1;
   12821       Bool   isCMPE  = (opcode2 & 16) == 16;
   12822       Bool   cmpZero = (opcode2 & 8) == 8;
   12823       IRType ity     = isD ? Ity_F64 : Ity_F32;
   12824       Bool   valid   = True;
   12825       if (cmpZero && mm != 0) valid = False;
   12826       if (valid) {
   12827          IRTemp argL  = newTemp(ity);
   12828          IRTemp argR  = newTemp(ity);
   12829          IRTemp irRes = newTemp(Ity_I32);
   12830          assign(argL, getQRegLO(nn, ity));
   12831          assign(argR,
   12832                 cmpZero
   12833                    ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
   12834                    : getQRegLO(mm, ity));
   12835          assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
   12836                              mkexpr(argL), mkexpr(argR)));
   12837          IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
   12838          IRTemp nzcv_28x0 = newTemp(Ity_I64);
   12839          assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
   12840          setFlags_COPY(nzcv_28x0);
   12841          DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ity),
   12842              cmpZero ? "#0.0" : nameQRegLO(mm, ity));
   12843          return True;
   12844       }
   12845       return False;
   12846    }
   12847 
   12848    return False;
   12849 #  undef INSN
   12850 }
   12851 
   12852 
   12853 static
   12854 Bool dis_AdvSIMD_fp_conditional_compare(/*MB_OUT*/DisResult* dres, UInt insn)
   12855 {
   12856    /* 31  28    23 21 20 15   11 9 4  3
   12857       000 11110 ty 1  m  cond 01 n op nzcv
   12858       The first 3 bits are really "M 0 S", but M and S are always zero.
   12859       Decode fields are: ty,op
   12860    */
   12861 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   12862    if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
   12863        || INSN(21,21) != 1 || INSN(11,10) != BITS2(0,1)) {
   12864       return False;
   12865    }
   12866    UInt ty   = INSN(23,22);
   12867    UInt mm   = INSN(20,16);
   12868    UInt cond = INSN(15,12);
   12869    UInt nn   = INSN(9,5);
   12870    UInt op   = INSN(4,4);
   12871    UInt nzcv = INSN(3,0);
   12872    vassert(ty < 4 && op <= 1);
   12873 
   12874    if (ty <= BITS2(0,1)) {
   12875       /* -------- 00,0 FCCMP  s_s -------- */
   12876       /* -------- 00,1 FCCMPE s_s -------- */
   12877       /* -------- 01,0 FCCMP  d_d -------- */
   12878       /* -------- 01,1 FCCMPE d_d -------- */
   12879 
   12880       /* FCCMPE generates Invalid Operation exn if either arg is any kind
   12881          of NaN.  FCCMP generates Invalid Operation exn if either arg is a
   12882          signalling NaN.  We ignore this detail here and produce the same
   12883          IR for both.
   12884       */
   12885       Bool   isD    = (ty & 1) == 1;
   12886       Bool   isCMPE = op == 1;
   12887       IRType ity    = isD ? Ity_F64 : Ity_F32;
   12888       IRTemp argL   = newTemp(ity);
   12889       IRTemp argR   = newTemp(ity);
   12890       IRTemp irRes  = newTemp(Ity_I32);
   12891       assign(argL,  getQRegLO(nn, ity));
   12892       assign(argR,  getQRegLO(mm, ity));
   12893       assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
   12894                           mkexpr(argL), mkexpr(argR)));
   12895       IRTemp condT = newTemp(Ity_I1);
   12896       assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
   12897       IRTemp nzcvT = mk_convert_IRCmpF64Result_to_NZCV(irRes);
   12898 
   12899       IRTemp nzcvT_28x0 = newTemp(Ity_I64);
   12900       assign(nzcvT_28x0, binop(Iop_Shl64, mkexpr(nzcvT), mkU8(28)));
   12901 
   12902       IRExpr* nzcvF_28x0 = mkU64(((ULong)nzcv) << 28);
   12903 
   12904       IRTemp nzcv_28x0 = newTemp(Ity_I64);
   12905       assign(nzcv_28x0, IRExpr_ITE(mkexpr(condT),
   12906                                    mkexpr(nzcvT_28x0), nzcvF_28x0));
   12907       setFlags_COPY(nzcv_28x0);
   12908       DIP("fccmp%s %s, %s, #%u, %s\n", isCMPE ? "e" : "",
   12909           nameQRegLO(nn, ity), nameQRegLO(mm, ity), nzcv, nameCC(cond));
   12910       return True;
   12911    }
   12912 
   12913    return False;
   12914 #  undef INSN
   12915 }
   12916 
   12917 
   12918 static
   12919 Bool dis_AdvSIMD_fp_conditional_select(/*MB_OUT*/DisResult* dres, UInt insn)
   12920 {
   12921    /* 31        23 21 20 15   11 9 5
   12922       000 11110 ty 1  m  cond 11 n d
   12923       The first 3 bits are really "M 0 S", but M and S are always zero.
   12924       Decode fields: ty
   12925    */
   12926 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   12927    if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) || INSN(21,21) != 1
   12928        || INSN(11,10) != BITS2(1,1)) {
   12929       return False;
   12930    }
   12931    UInt ty   = INSN(23,22);
   12932    UInt mm   = INSN(20,16);
   12933    UInt cond = INSN(15,12);
   12934    UInt nn   = INSN(9,5);
   12935    UInt dd   = INSN(4,0);
   12936    if (ty <= X01) {
   12937       /* -------- 00: FCSEL s_s -------- */
   12938       /* -------- 00: FCSEL d_d -------- */
   12939       IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
   12940       IRTemp srcT = newTemp(ity);
   12941       IRTemp srcF = newTemp(ity);
   12942       IRTemp res  = newTemp(ity);
   12943       assign(srcT, getQRegLO(nn, ity));
   12944       assign(srcF, getQRegLO(mm, ity));
   12945       assign(res, IRExpr_ITE(
   12946                      unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
   12947                      mkexpr(srcT), mkexpr(srcF)));
   12948       putQReg128(dd, mkV128(0x0000));
   12949       putQRegLO(dd, mkexpr(res));
   12950       DIP("fcsel %s, %s, %s, %s\n",
   12951           nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity),
   12952           nameCC(cond));
   12953       return True;
   12954    }
   12955    return False;
   12956 #  undef INSN
   12957 }
   12958 
   12959 
   12960 static
   12961 Bool dis_AdvSIMD_fp_data_proc_1_source(/*MB_OUT*/DisResult* dres, UInt insn)
   12962 {
   12963    /* 31  28    23 21 20     14    9 4
   12964       000 11110 ty 1  opcode 10000 n d
   12965       The first 3 bits are really "M 0 S", but M and S are always zero.
   12966       Decode fields: ty,opcode
   12967    */
   12968 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   12969    if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
   12970        || INSN(21,21) != 1 || INSN(14,10) != BITS5(1,0,0,0,0)) {
   12971       return False;
   12972    }
   12973    UInt ty     = INSN(23,22);
   12974    UInt opcode = INSN(20,15);
   12975    UInt nn     = INSN(9,5);
   12976    UInt dd     = INSN(4,0);
   12977 
   12978    if (ty <= X01 && opcode <= BITS6(0,0,0,0,1,1)) {
   12979       /* -------- 0x,000000: FMOV  d_d, s_s -------- */
   12980       /* -------- 0x,000001: FABS  d_d, s_s -------- */
   12981       /* -------- 0x,000010: FNEG  d_d, s_s -------- */
   12982       /* -------- 0x,000011: FSQRT d_d, s_s -------- */
   12983       IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
   12984       IRTemp src = newTemp(ity);
   12985       IRTemp res = newTemp(ity);
   12986       const HChar* nm = "??";
   12987       assign(src, getQRegLO(nn, ity));
   12988       switch (opcode) {
   12989          case BITS6(0,0,0,0,0,0):
   12990             nm = "fmov"; assign(res, mkexpr(src)); break;
   12991          case BITS6(0,0,0,0,0,1):
   12992             nm = "fabs"; assign(res, unop(mkABSF(ity), mkexpr(src))); break;
   12993          case BITS6(0,0,0,0,1,0):
   12994             nm = "fabs"; assign(res, unop(mkNEGF(ity), mkexpr(src))); break;
   12995          case BITS6(0,0,0,0,1,1):
   12996             nm = "fsqrt";
   12997             assign(res, binop(mkSQRTF(ity),
   12998                               mkexpr(mk_get_IR_rounding_mode()),
   12999                               mkexpr(src))); break;
   13000          default:
   13001             vassert(0);
   13002       }
   13003       putQReg128(dd, mkV128(0x0000));
   13004       putQRegLO(dd, mkexpr(res));
   13005       DIP("%s %s, %s\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
   13006       return True;
   13007    }
   13008 
   13009    if (   (ty == X11 && (opcode == BITS6(0,0,0,1,0,0)
   13010                          || opcode == BITS6(0,0,0,1,0,1)))
   13011        || (ty == X00 && (opcode == BITS6(0,0,0,1,1,1)
   13012                          || opcode == BITS6(0,0,0,1,0,1)))
   13013        || (ty == X01 && (opcode == BITS6(0,0,0,1,1,1)
   13014                          || opcode == BITS6(0,0,0,1,0,0)))) {
   13015       /* -------- 11,000100: FCVT s_h -------- */
   13016       /* -------- 11,000101: FCVT d_h -------- */
   13017       /* -------- 00,000111: FCVT h_s -------- */
   13018       /* -------- 00,000101: FCVT d_s -------- */
   13019       /* -------- 01,000111: FCVT h_d -------- */
   13020       /* -------- 01,000100: FCVT s_d -------- */
   13021       /* 31        23 21    16 14    9 4
   13022          000 11110 11 10001 00 10000 n d   FCVT Sd, Hn
   13023          --------- 11 ----- 01 ---------   FCVT Dd, Hn
   13024          --------- 00 ----- 11 ---------   FCVT Hd, Sn
   13025          --------- 00 ----- 01 ---------   FCVT Dd, Sn
   13026          --------- 01 ----- 11 ---------   FCVT Hd, Dn
   13027          --------- 01 ----- 00 ---------   FCVT Sd, Dn
   13028          Rounding, when dst is smaller than src, is per the FPCR.
   13029       */
   13030       UInt b2322 = ty;
   13031       UInt b1615 = opcode & BITS2(1,1);
   13032       switch ((b2322 << 2) | b1615) {
   13033          case BITS4(0,0,0,1):   // S -> D
   13034          case BITS4(1,1,0,1): { // H -> D
   13035             Bool   srcIsH = b2322 == BITS2(1,1);
   13036             IRType srcTy  = srcIsH ? Ity_F16 : Ity_F32;
   13037             IRTemp res    = newTemp(Ity_F64);
   13038             assign(res, unop(srcIsH ? Iop_F16toF64 : Iop_F32toF64,
   13039                              getQRegLO(nn, srcTy)));
   13040             putQReg128(dd, mkV128(0x0000));
   13041             putQRegLO(dd, mkexpr(res));
   13042             DIP("fcvt %s, %s\n",
   13043                 nameQRegLO(dd, Ity_F64), nameQRegLO(nn, srcTy));
   13044             return True;
   13045          }
   13046          case BITS4(0,1,0,0):   // D -> S
   13047          case BITS4(0,1,1,1): { // D -> H
   13048             Bool   dstIsH = b1615 == BITS2(1,1);
   13049             IRType dstTy  = dstIsH ? Ity_F16 : Ity_F32;
   13050             IRTemp res    = newTemp(dstTy);
   13051             assign(res, binop(dstIsH ? Iop_F64toF16 : Iop_F64toF32,
   13052                               mkexpr(mk_get_IR_rounding_mode()),
   13053                               getQRegLO(nn, Ity_F64)));
   13054             putQReg128(dd, mkV128(0x0000));
   13055             putQRegLO(dd, mkexpr(res));
   13056             DIP("fcvt %s, %s\n",
   13057                 nameQRegLO(dd, dstTy), nameQRegLO(nn, Ity_F64));
   13058             return True;
   13059          }
   13060          case BITS4(0,0,1,1):   // S -> H
   13061          case BITS4(1,1,0,0): { // H -> S
   13062             Bool   toH   = b1615 == BITS2(1,1);
   13063             IRType srcTy = toH ? Ity_F32 : Ity_F16;
   13064             IRType dstTy = toH ? Ity_F16 : Ity_F32;
   13065             IRTemp res = newTemp(dstTy);
   13066             if (toH) {
   13067                assign(res, binop(Iop_F32toF16,
   13068                                  mkexpr(mk_get_IR_rounding_mode()),
   13069                                  getQRegLO(nn, srcTy)));
   13070 
   13071             } else {
   13072                assign(res, unop(Iop_F16toF32,
   13073                                 getQRegLO(nn, srcTy)));
   13074             }
   13075             putQReg128(dd, mkV128(0x0000));
   13076             putQRegLO(dd, mkexpr(res));
   13077             DIP("fcvt %s, %s\n",
   13078                 nameQRegLO(dd, dstTy), nameQRegLO(nn, srcTy));
   13079             return True;
   13080          }
   13081          default:
   13082             break;
   13083       }
   13084       /* else unhandled */
   13085       return False;
   13086    }
   13087 
   13088    if (ty <= X01
   13089        && opcode >= BITS6(0,0,1,0,0,0) && opcode <= BITS6(0,0,1,1,1,1)
   13090        && opcode != BITS6(0,0,1,1,0,1)) {
   13091       /* -------- 0x,001000 FRINTN d_d, s_s -------- */
   13092       /* -------- 0x,001001 FRINTP d_d, s_s -------- */
   13093       /* -------- 0x,001010 FRINTM d_d, s_s -------- */
   13094       /* -------- 0x,001011 FRINTZ d_d, s_s -------- */
   13095       /* -------- 0x,001100 FRINTA d_d, s_s -------- */
   13096       /* -------- 0x,001110 FRINTX d_d, s_s -------- */
   13097       /* -------- 0x,001111 FRINTI d_d, s_s -------- */
   13098       /* 31        23 21   17  14    9 4
   13099          000 11110 0x 1001 111 10000 n d  FRINTI Fd, Fm (round per FPCR)
   13100                            rm
   13101          x==0 => S-registers, x==1 => D-registers
   13102          rm (17:15) encodings:
   13103             111 per FPCR  (FRINTI)
   13104             001 +inf      (FRINTP)
   13105             010 -inf      (FRINTM)
   13106             011 zero      (FRINTZ)
   13107             000 tieeven   (FRINTN) -- !! FIXME KLUDGED !!
   13108             100 tieaway   (FRINTA) -- !! FIXME KLUDGED !!
   13109             110 per FPCR + "exact = TRUE" (FRINTX)
   13110             101 unallocated
   13111       */
   13112       Bool    isD   = (ty & 1) == 1;
   13113       UInt    rm    = opcode & BITS6(0,0,0,1,1,1);
   13114       IRType  ity   = isD ? Ity_F64 : Ity_F32;
   13115       IRExpr* irrmE = NULL;
   13116       UChar   ch    = '?';
   13117       switch (rm) {
   13118          case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
   13119          case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
   13120          case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
   13121          // The following is a kludge.  Should be: Irrm_NEAREST_TIE_AWAY_0
   13122          case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
   13123          // I am unsure about the following, due to the "integral exact"
   13124          // description in the manual.  What does it mean? (frintx, that is)
   13125          case BITS3(1,1,0):
   13126             ch = 'x'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
   13127          case BITS3(1,1,1):
   13128             ch = 'i'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
   13129          // The following is a kludge.  There's no Irrm_ value to represent
   13130          // this ("to nearest, with ties to even")
   13131          case BITS3(0,0,0): ch = 'n'; irrmE = mkU32(Irrm_NEAREST); break;
   13132          default: break;
   13133       }
   13134       if (irrmE) {
   13135          IRTemp src = newTemp(ity);
   13136          IRTemp dst = newTemp(ity);
   13137          assign(src, getQRegLO(nn, ity));
   13138          assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
   13139                            irrmE, mkexpr(src)));
   13140          putQReg128(dd, mkV128(0x0000));
   13141          putQRegLO(dd, mkexpr(dst));
   13142          DIP("frint%c %s, %s\n",
   13143              ch, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
   13144          return True;
   13145       }
   13146       return False;
   13147    }
   13148 
   13149    return False;
   13150 #  undef INSN
   13151 }
   13152 
   13153 
   13154 static
   13155 Bool dis_AdvSIMD_fp_data_proc_2_source(/*MB_OUT*/DisResult* dres, UInt insn)
   13156 {
   13157    /* 31  28    23 21 20 15     11 9 4
   13158       000 11110 ty 1  m  opcode 10 n d
   13159       The first 3 bits are really "M 0 S", but M and S are always zero.
   13160       Decode fields: ty, opcode
   13161    */
   13162 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   13163    if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
   13164        || INSN(21,21) != 1 || INSN(11,10) != BITS2(1,0)) {
   13165       return False;
   13166    }
   13167    UInt ty     = INSN(23,22);
   13168    UInt mm     = INSN(20,16);
   13169    UInt opcode = INSN(15,12);
   13170    UInt nn     = INSN(9,5);
   13171    UInt dd     = INSN(4,0);
   13172 
   13173    if (ty <= X01 && opcode <= BITS4(0,1,1,1)) {
   13174       /* ------- 0x,0000: FMUL d_d, s_s ------- */
   13175       /* ------- 0x,0001: FDIV d_d, s_s ------- */
   13176       /* ------- 0x,0010: FADD d_d, s_s ------- */
   13177       /* ------- 0x,0011: FSUB d_d, s_s ------- */
   13178       /* ------- 0x,0100: FMAX d_d, s_s ------- */
   13179       /* ------- 0x,0101: FMIN d_d, s_s ------- */
   13180       /* ------- 0x,0110: FMAXNM d_d, s_s ------- (FIXME KLUDGED) */
   13181       /* ------- 0x,0111: FMINNM d_d, s_s ------- (FIXME KLUDGED) */
   13182       IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
   13183       IROp   iop = Iop_INVALID;
   13184       const HChar* nm = "???";
   13185       switch (opcode) {
   13186          case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ity); break;
   13187          case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ity); break;
   13188          case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ity); break;
   13189          case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ity); break;
   13190          case BITS4(0,1,0,0): nm = "fmax"; iop = mkVecMAXF(ty+2); break;
   13191          case BITS4(0,1,0,1): nm = "fmin"; iop = mkVecMINF(ty+2); break;
   13192          case BITS4(0,1,1,0): nm = "fmaxnm"; iop = mkVecMAXF(ty+2); break; //!!
   13193          case BITS4(0,1,1,1): nm = "fminnm"; iop = mkVecMINF(ty+2); break; //!!
   13194          default: vassert(0);
   13195       }
   13196       if (opcode <= BITS4(0,0,1,1)) {
   13197          // This is really not good code.  TODO: avoid width-changing
   13198          IRTemp res = newTemp(ity);
   13199          assign(res, triop(iop, mkexpr(mk_get_IR_rounding_mode()),
   13200                                 getQRegLO(nn, ity), getQRegLO(mm, ity)));
   13201          putQReg128(dd, mkV128(0));
   13202          putQRegLO(dd, mkexpr(res));
   13203       } else {
   13204          putQReg128(dd, unop(mkVecZEROHIxxOFV128(ty+2),
   13205                              binop(iop, getQReg128(nn), getQReg128(mm))));
   13206       }
   13207       DIP("%s %s, %s, %s\n",
   13208           nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
   13209       return True;
   13210    }
   13211 
   13212    if (ty <= X01 && opcode == BITS4(1,0,0,0)) {
   13213       /* ------- 0x,1000: FNMUL d_d, s_s ------- */
   13214       IRType ity  = ty == X00 ? Ity_F32 : Ity_F64;
   13215       IROp   iop  = mkMULF(ity);
   13216       IROp   iopn = mkNEGF(ity);
   13217       const HChar* nm = "fnmul";
   13218       IRExpr* resE = unop(iopn,
   13219                           triop(iop, mkexpr(mk_get_IR_rounding_mode()),
   13220                                 getQRegLO(nn, ity), getQRegLO(mm, ity)));
   13221       IRTemp  res  = newTemp(ity);
   13222       assign(res, resE);
   13223       putQReg128(dd, mkV128(0));
   13224       putQRegLO(dd, mkexpr(res));
   13225       DIP("%s %s, %s, %s\n",
   13226           nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
   13227       return True;
   13228    }
   13229 
   13230    return False;
   13231 #  undef INSN
   13232 }
   13233 
   13234 
   13235 static
   13236 Bool dis_AdvSIMD_fp_data_proc_3_source(/*MB_OUT*/DisResult* dres, UInt insn)
   13237 {
   13238    /* 31  28    23 21 20 15 14 9 4
   13239       000 11111 ty o1 m  o0 a  n d
   13240       The first 3 bits are really "M 0 S", but M and S are always zero.
   13241       Decode fields: ty,o1,o0
   13242    */
   13243 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   13244    if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,1)) {
   13245       return False;
   13246    }
   13247    UInt ty    = INSN(23,22);
   13248    UInt bitO1 = INSN(21,21);
   13249    UInt mm    = INSN(20,16);
   13250    UInt bitO0 = INSN(15,15);
   13251    UInt aa    = INSN(14,10);
   13252    UInt nn    = INSN(9,5);
   13253    UInt dd    = INSN(4,0);
   13254    vassert(ty < 4);
   13255 
   13256    if (ty <= X01) {
   13257       /* -------- 0x,0,0 FMADD  d_d_d_d, s_s_s_s -------- */
   13258       /* -------- 0x,0,1 FMSUB  d_d_d_d, s_s_s_s -------- */
   13259       /* -------- 0x,1,0 FNMADD d_d_d_d, s_s_s_s -------- */
   13260       /* -------- 0x,1,1 FNMSUB d_d_d_d, s_s_s_s -------- */
   13261       /* -------------------- F{N}M{ADD,SUB} -------------------- */
   13262       /* 31          22   20 15 14 9 4   ix
   13263          000 11111 0 sz 0 m  0  a  n d   0   FMADD  Fd,Fn,Fm,Fa
   13264          000 11111 0 sz 0 m  1  a  n d   1   FMSUB  Fd,Fn,Fm,Fa
   13265          000 11111 0 sz 1 m  0  a  n d   2   FNMADD Fd,Fn,Fm,Fa
   13266          000 11111 0 sz 1 m  1  a  n d   3   FNMSUB Fd,Fn,Fm,Fa
   13267          where Fx=Dx when sz=1, Fx=Sx when sz=0
   13268 
   13269                   -----SPEC------    ----IMPL----
   13270          fmadd       a +    n * m    a + n * m
   13271          fmsub       a + (-n) * m    a - n * m
   13272          fnmadd   (-a) + (-n) * m    -(a + n * m)
   13273          fnmsub   (-a) +    n * m    -(a - n * m)
   13274       */
   13275       Bool    isD   = (ty & 1) == 1;
   13276       UInt    ix    = (bitO1 << 1) | bitO0;
   13277       IRType  ity   = isD ? Ity_F64 : Ity_F32;
   13278       IROp    opADD = mkADDF(ity);
   13279       IROp    opSUB = mkSUBF(ity);
   13280       IROp    opMUL = mkMULF(ity);
   13281       IROp    opNEG = mkNEGF(ity);
   13282       IRTemp  res   = newTemp(ity);
   13283       IRExpr* eA    = getQRegLO(aa, ity);
   13284       IRExpr* eN    = getQRegLO(nn, ity);
   13285       IRExpr* eM    = getQRegLO(mm, ity);
   13286       IRExpr* rm    = mkexpr(mk_get_IR_rounding_mode());
   13287       IRExpr* eNxM  = triop(opMUL, rm, eN, eM);
   13288       switch (ix) {
   13289          case 0:  assign(res, triop(opADD, rm, eA, eNxM)); break;
   13290          case 1:  assign(res, triop(opSUB, rm, eA, eNxM)); break;
   13291          case 2:  assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
   13292          case 3:  assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
   13293          default: vassert(0);
   13294       }
   13295       putQReg128(dd, mkV128(0x0000));
   13296       putQRegLO(dd, mkexpr(res));
   13297       const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
   13298       DIP("%s %s, %s, %s, %s\n",
   13299           names[ix], nameQRegLO(dd, ity), nameQRegLO(nn, ity),
   13300                      nameQRegLO(mm, ity), nameQRegLO(aa, ity));
   13301       return True;
   13302    }
   13303 
   13304    return False;
   13305 #  undef INSN
   13306 }
   13307 
   13308 
   13309 static
   13310 Bool dis_AdvSIMD_fp_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
   13311 {
   13312    /* 31  28    23 21 20   12  9    4
   13313       000 11110 ty 1  imm8 100 imm5 d
   13314       The first 3 bits are really "M 0 S", but M and S are always zero.
   13315    */
   13316 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   13317    if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
   13318        || INSN(21,21) != 1 || INSN(12,10) != BITS3(1,0,0)) {
   13319       return False;
   13320    }
   13321    UInt ty     = INSN(23,22);
   13322    UInt imm8   = INSN(20,13);
   13323    UInt imm5   = INSN(9,5);
   13324    UInt dd     = INSN(4,0);
   13325 
   13326    /* ------- 00,00000: FMOV s_imm ------- */
   13327    /* ------- 01,00000: FMOV d_imm ------- */
   13328    if (ty <= X01 && imm5 == BITS5(0,0,0,0,0)) {
   13329       Bool  isD  = (ty & 1) == 1;
   13330       ULong imm  = VFPExpandImm(imm8, isD ? 64 : 32);
   13331       if (!isD) {
   13332          vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
   13333       }
   13334       putQReg128(dd, mkV128(0));
   13335       putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
   13336       DIP("fmov %s, #0x%llx\n",
   13337           nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
   13338       return True;
   13339    }
   13340 
   13341    return False;
   13342 #  undef INSN
   13343 }
   13344 
   13345 
   13346 static
   13347 Bool dis_AdvSIMD_fp_to_from_fixedp_conv(/*MB_OUT*/DisResult* dres, UInt insn)
   13348 {
   13349 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   13350    /* 31 30 29 28    23   21 20    18     15    9 4
   13351       sf  0  0 11110 type 0  rmode opcode scale n d
   13352       The first 3 bits are really "sf 0 S", but S is always zero.
   13353       Decode fields: sf,type,rmode,opcode
   13354    */
   13355 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   13356    if (INSN(30,29) != BITS2(0,0)
   13357        || INSN(28,24) != BITS5(1,1,1,1,0)
   13358        || INSN(21,21) != 0) {
   13359       return False;
   13360    }
   13361    UInt bitSF = INSN(31,31);
   13362    UInt ty    = INSN(23,22); // type
   13363    UInt rm    = INSN(20,19); // rmode
   13364    UInt op    = INSN(18,16); // opcode
   13365    UInt sc    = INSN(15,10); // scale
   13366    UInt nn    = INSN(9,5);
   13367    UInt dd    = INSN(4,0);
   13368 
   13369    if (ty <= X01 && rm == X11
   13370        && (op == BITS3(0,0,0) || op == BITS3(0,0,1))) {
   13371       /* -------- (ix) sf ty rm opc -------- */
   13372       /* -------- 0    0  00 11 000: FCVTZS w_s_#fbits -------- */
   13373       /* -------- 1    0  01 11 000: FCVTZS w_d_#fbits -------- */
   13374       /* -------- 2    1  00 11 000: FCVTZS x_s_#fbits -------- */
   13375       /* -------- 3    1  01 11 000: FCVTZS x_d_#fbits -------- */
   13376 
   13377       /* -------- 4    0  00 11 001: FCVTZU w_s_#fbits -------- */
   13378       /* -------- 5    0  01 11 001: FCVTZU w_d_#fbits -------- */
   13379       /* -------- 6    1  00 11 001: FCVTZU x_s_#fbits -------- */
   13380       /* -------- 7    1  01 11 001: FCVTZU x_d_#fbits -------- */
   13381       Bool isI64 = bitSF == 1;
   13382       Bool isF64 = (ty & 1) == 1;
   13383       Bool isU   = (op & 1) == 1;
   13384       UInt ix    = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
   13385 
   13386       Int fbits = 64 - sc;
   13387       vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
   13388 
   13389       Double  scale  = two_to_the_plus(fbits);
   13390       IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
   13391                              : IRExpr_Const(IRConst_F32( (Float)scale ));
   13392       IROp    opMUL  = isF64 ? Iop_MulF64 : Iop_MulF32;
   13393 
   13394       const IROp ops[8]
   13395         = { Iop_F32toI32S, Iop_F64toI32S, Iop_F32toI64S, Iop_F64toI64S,
   13396             Iop_F32toI32U, Iop_F64toI32U, Iop_F32toI64U, Iop_F64toI64U };
   13397       IRTemp irrm = newTemp(Ity_I32);
   13398       assign(irrm, mkU32(Irrm_ZERO));
   13399 
   13400       IRExpr* src = getQRegLO(nn, isF64 ? Ity_F64 : Ity_F32);
   13401       IRExpr* res = binop(ops[ix], mkexpr(irrm),
   13402                                    triop(opMUL, mkexpr(irrm), src, scaleE));
   13403       putIRegOrZR(isI64, dd, res);
   13404 
   13405       DIP("fcvtz%c %s, %s, #%d\n",
   13406           isU ? 'u' : 's', nameIRegOrZR(isI64, dd),
   13407           nameQRegLO(nn, isF64 ? Ity_F64 : Ity_F32), fbits);
   13408       return True;
   13409    }
   13410 
   13411    /* ------ sf,ty,rm,opc ------ */
   13412    /* ------ x,0x,00,010  SCVTF s/d, w/x, #fbits  ------ */
   13413    /* ------ x,0x,00,011  UCVTF s/d, w/x, #fbits  ------ */
   13414    /* (ix) sf  S 28    ty   rm opc 15    9 4
   13415       0    0 0 0 11110 00 0 00 010 scale n d  SCVTF Sd, Wn, #fbits
   13416       1    0 0 0 11110 01 0 00 010 scale n d  SCVTF Dd, Wn, #fbits
   13417       2    1 0 0 11110 00 0 00 010 scale n d  SCVTF Sd, Xn, #fbits
   13418       3    1 0 0 11110 01 0 00 010 scale n d  SCVTF Dd, Xn, #fbits
   13419 
   13420       4    0 0 0 11110 00 0 00 011 scale n d  UCVTF Sd, Wn, #fbits
   13421       5    0 0 0 11110 01 0 00 011 scale n d  UCVTF Dd, Wn, #fbits
   13422       6    1 0 0 11110 00 0 00 011 scale n d  UCVTF Sd, Xn, #fbits
   13423       7    1 0 0 11110 01 0 00 011 scale n d  UCVTF Dd, Xn, #fbits
   13424 
   13425       These are signed/unsigned conversion from integer registers to
   13426       FP registers, all 4 32/64-bit combinations, rounded per FPCR,
   13427       scaled per |scale|.
   13428    */
   13429    if (ty <= X01 && rm == X00
   13430        && (op == BITS3(0,1,0) || op == BITS3(0,1,1))
   13431        && (bitSF == 1 || ((sc >> 5) & 1) == 1)) {
   13432       Bool isI64 = bitSF == 1;
   13433       Bool isF64 = (ty & 1) == 1;
   13434       Bool isU   = (op & 1) == 1;
   13435       UInt ix    = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
   13436 
   13437       Int fbits = 64 - sc;
   13438       vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
   13439 
   13440       Double  scale  = two_to_the_minus(fbits);
   13441       IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
   13442                              : IRExpr_Const(IRConst_F32( (Float)scale ));
   13443       IROp    opMUL  = isF64 ? Iop_MulF64 : Iop_MulF32;
   13444 
   13445       const IROp ops[8]
   13446         = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
   13447             Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
   13448       IRExpr* src = getIRegOrZR(isI64, nn);
   13449       IRExpr* res = (isF64 && !isI64)
   13450                        ? unop(ops[ix], src)
   13451                        : binop(ops[ix],
   13452                                mkexpr(mk_get_IR_rounding_mode()), src);
   13453       putQReg128(dd, mkV128(0));
   13454       putQRegLO(dd, triop(opMUL, mkU32(Irrm_NEAREST), res, scaleE));
   13455 
   13456       DIP("%ccvtf %s, %s, #%d\n",
   13457           isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
   13458           nameIRegOrZR(isI64, nn), fbits);
   13459       return True;
   13460    }
   13461 
   13462    return False;
   13463 #  undef INSN
   13464 }
   13465 
   13466 
   13467 static
   13468 Bool dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult* dres, UInt insn)
   13469 {
   13470    /* 31 30 29 28    23   21 20    18     15     9 4
   13471       sf  0  0 11110 type 1  rmode opcode 000000 n d
   13472       The first 3 bits are really "sf 0 S", but S is always zero.
   13473       Decode fields: sf,type,rmode,opcode
   13474    */
   13475 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   13476    if (INSN(30,29) != BITS2(0,0)
   13477        || INSN(28,24) != BITS5(1,1,1,1,0)
   13478        || INSN(21,21) != 1
   13479        || INSN(15,10) != BITS6(0,0,0,0,0,0)) {
   13480       return False;
   13481    }
   13482    UInt bitSF = INSN(31,31);
   13483    UInt ty    = INSN(23,22); // type
   13484    UInt rm    = INSN(20,19); // rmode
   13485    UInt op    = INSN(18,16); // opcode
   13486    UInt nn    = INSN(9,5);
   13487    UInt dd    = INSN(4,0);
   13488 
   13489    // op = 000, 001
   13490    /* -------- FCVT{N,P,M,Z,A}{S,U} (scalar, integer) -------- */
   13491    /*    30       23   20 18  15     9 4
   13492       sf 00 11110 0x 1 00 000 000000 n d  FCVTNS Rd, Fn (round to
   13493       sf 00 11110 0x 1 00 001 000000 n d  FCVTNU Rd, Fn  nearest)
   13494       ---------------- 01 --------------  FCVTP-------- (round to +inf)
   13495       ---------------- 10 --------------  FCVTM-------- (round to -inf)
   13496       ---------------- 11 --------------  FCVTZ-------- (round to zero)
   13497       ---------------- 00 100 ----------  FCVTAS------- (nearest, ties away)
   13498       ---------------- 00 101 ----------  FCVTAU------- (nearest, ties away)
   13499 
   13500       Rd is Xd when sf==1, Wd when sf==0
   13501       Fn is Dn when x==1, Sn when x==0
   13502       20:19 carry the rounding mode, using the same encoding as FPCR
   13503    */
   13504    if (ty <= X01
   13505        && (   ((op == BITS3(0,0,0) || op == BITS3(0,0,1)) && True)
   13506            || ((op == BITS3(1,0,0) || op == BITS3(1,0,1)) && rm == BITS2(0,0))
   13507           )
   13508       ) {
   13509       Bool isI64 = bitSF == 1;
   13510       Bool isF64 = (ty & 1) == 1;
   13511       Bool isU   = (op & 1) == 1;
   13512       /* Decide on the IR rounding mode to use. */
   13513       IRRoundingMode irrm = 8; /*impossible*/
   13514       HChar ch = '?';
   13515       if (op == BITS3(0,0,0) || op == BITS3(0,0,1)) {
   13516          switch (rm) {
   13517             case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
   13518             case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
   13519             case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
   13520             case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
   13521             default: vassert(0);
   13522          }
   13523       } else {
   13524          vassert(op == BITS3(1,0,0) || op == BITS3(1,0,1));
   13525          switch (rm) {
   13526             case BITS2(0,0): ch = 'a'; irrm = Irrm_NEAREST; break;
   13527             default: vassert(0);
   13528          }
   13529       }
   13530       vassert(irrm != 8);
   13531       /* Decide on the conversion primop, based on the source size,
   13532          dest size and signedness (8 possibilities).  Case coding:
   13533             F32 ->s I32   0
   13534             F32 ->u I32   1
   13535             F32 ->s I64   2
   13536             F32 ->u I64   3
   13537             F64 ->s I32   4
   13538             F64 ->u I32   5
   13539             F64 ->s I64   6
   13540             F64 ->u I64   7
   13541       */
   13542       UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0);
   13543       vassert(ix < 8);
   13544       const IROp iops[8]
   13545          = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
   13546              Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
   13547       IROp iop = iops[ix];
   13548       // A bit of ATCery: bounce all cases we haven't seen an example of.
   13549       if (/* F32toI32S */
   13550              (iop == Iop_F32toI32S && irrm == Irrm_ZERO)   /* FCVTZS Wd,Sn */
   13551           || (iop == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
   13552           || (iop == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
   13553           || (iop == Iop_F32toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,S */
   13554           /* F32toI32U */
   13555           || (iop == Iop_F32toI32U && irrm == Irrm_ZERO)   /* FCVTZU Wd,Sn */
   13556           || (iop == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
   13557           || (iop == Iop_F32toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Sn */
   13558           || (iop == Iop_F32toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,S */
   13559           /* F32toI64S */
   13560           || (iop == Iop_F32toI64S && irrm == Irrm_ZERO)   /* FCVTZS Xd,Sn */
   13561           || (iop == Iop_F32toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Sn */
   13562           || (iop == Iop_F32toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Sn */
   13563           || (iop == Iop_F32toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,S */
   13564           /* F32toI64U */
   13565           || (iop == Iop_F32toI64U && irrm == Irrm_ZERO)   /* FCVTZU Xd,Sn */
   13566           || (iop == Iop_F32toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Sn */
   13567           || (iop == Iop_F32toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Sn */
   13568           || (iop == Iop_F32toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,S */
   13569           /* F64toI32S */
   13570           || (iop == Iop_F64toI32S && irrm == Irrm_ZERO)   /* FCVTZS Wd,Dn */
   13571           || (iop == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
   13572           || (iop == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
   13573           || (iop == Iop_F64toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,D */
   13574           /* F64toI32U */
   13575           || (iop == Iop_F64toI32U && irrm == Irrm_ZERO)   /* FCVTZU Wd,Dn */
   13576           || (iop == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
   13577           || (iop == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
   13578           || (iop == Iop_F64toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,D */
   13579           /* F64toI64S */
   13580           || (iop == Iop_F64toI64S && irrm == Irrm_ZERO)   /* FCVTZS Xd,Dn */
   13581           || (iop == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
   13582           || (iop == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
   13583           || (iop == Iop_F64toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,D */
   13584           /* F64toI64U */
   13585           || (iop == Iop_F64toI64U && irrm == Irrm_ZERO)   /* FCVTZU Xd,Dn */
   13586           || (iop == Iop_F64toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Dn */
   13587           || (iop == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
   13588           || (iop == Iop_F64toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,D */
   13589          ) {
   13590         /* validated */
   13591       } else {
   13592         return False;
   13593       }
   13594       IRType srcTy  = isF64 ? Ity_F64 : Ity_F32;
   13595       IRType dstTy  = isI64 ? Ity_I64 : Ity_I32;
   13596       IRTemp src    = newTemp(srcTy);
   13597       IRTemp dst    = newTemp(dstTy);
   13598       assign(src, getQRegLO(nn, srcTy));
   13599       assign(dst, binop(iop, mkU32(irrm), mkexpr(src)));
   13600       putIRegOrZR(isI64, dd, mkexpr(dst));
   13601       DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
   13602           nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
   13603       return True;
   13604    }
   13605 
   13606    // op = 010, 011
   13607    /* -------------- {S,U}CVTF (scalar, integer) -------------- */
   13608    /* (ix) sf  S 28    ty   rm op  15     9 4
   13609       0    0 0 0 11110 00 1 00 010 000000 n d  SCVTF Sd, Wn
   13610       1    0 0 0 11110 01 1 00 010 000000 n d  SCVTF Dd, Wn
   13611       2    1 0 0 11110 00 1 00 010 000000 n d  SCVTF Sd, Xn
   13612       3    1 0 0 11110 01 1 00 010 000000 n d  SCVTF Dd, Xn
   13613 
   13614       4    0 0 0 11110 00 1 00 011 000000 n d  UCVTF Sd, Wn
   13615       5    0 0 0 11110 01 1 00 011 000000 n d  UCVTF Dd, Wn
   13616       6    1 0 0 11110 00 1 00 011 000000 n d  UCVTF Sd, Xn
   13617       7    1 0 0 11110 01 1 00 011 000000 n d  UCVTF Dd, Xn
   13618 
   13619       These are signed/unsigned conversion from integer registers to
   13620       FP registers, all 4 32/64-bit combinations, rounded per FPCR.
   13621    */
   13622    if (ty <= X01 && rm == X00 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))) {
   13623       Bool isI64 = bitSF == 1;
   13624       Bool isF64 = (ty & 1) == 1;
   13625       Bool isU   = (op & 1) == 1;
   13626       UInt ix    = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
   13627       const IROp ops[8]
   13628         = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
   13629             Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
   13630       IRExpr* src = getIRegOrZR(isI64, nn);
   13631       IRExpr* res = (isF64 && !isI64)
   13632                        ? unop(ops[ix], src)
   13633                        : binop(ops[ix],
   13634                                mkexpr(mk_get_IR_rounding_mode()), src);
   13635       putQReg128(dd, mkV128(0));
   13636       putQRegLO(dd, res);
   13637       DIP("%ccvtf %s, %s\n",
   13638           isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
   13639           nameIRegOrZR(isI64, nn));
   13640       return True;
   13641    }
   13642 
   13643    // op = 110, 111
   13644    /* -------- FMOV (general) -------- */
   13645    /* case sf  S       ty   rm op  15     9 4
   13646        (1) 0 0 0 11110 00 1 00 111 000000 n d     FMOV Sd,      Wn
   13647        (2) 1 0 0 11110 01 1 00 111 000000 n d     FMOV Dd,      Xn
   13648        (3) 1 0 0 11110 10 1 01 111 000000 n d     FMOV Vd.D[1], Xn
   13649 
   13650        (4) 0 0 0 11110 00 1 00 110 000000 n d     FMOV Wd, Sn
   13651        (5) 1 0 0 11110 01 1 00 110 000000 n d     FMOV Xd, Dn
   13652        (6) 1 0 0 11110 10 1 01 110 000000 n d     FMOV Xd, Vn.D[1]
   13653    */
   13654    if (1) {
   13655       UInt ix = 0; // case
   13656       if (bitSF == 0) {
   13657          if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
   13658             ix = 1;
   13659          else
   13660          if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
   13661             ix = 4;
   13662       } else {
   13663          vassert(bitSF == 1);
   13664          if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
   13665             ix = 2;
   13666          else
   13667          if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
   13668             ix = 5;
   13669          else
   13670          if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
   13671             ix = 3;
   13672          else
   13673          if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
   13674             ix = 6;
   13675       }
   13676       if (ix > 0) {
   13677          switch (ix) {
   13678             case 1:
   13679                putQReg128(dd, mkV128(0));
   13680                putQRegLO(dd, getIReg32orZR(nn));
   13681                DIP("fmov s%u, w%u\n", dd, nn);
   13682                break;
   13683             case 2:
   13684                putQReg128(dd, mkV128(0));
   13685                putQRegLO(dd, getIReg64orZR(nn));
   13686                DIP("fmov d%u, x%u\n", dd, nn);
   13687                break;
   13688             case 3:
   13689                putQRegHI64(dd, getIReg64orZR(nn));
   13690                DIP("fmov v%u.d[1], x%u\n", dd, nn);
   13691                break;
   13692             case 4:
   13693                putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
   13694                DIP("fmov w%u, s%u\n", dd, nn);
   13695                break;
   13696             case 5:
   13697                putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
   13698                DIP("fmov x%u, d%u\n", dd, nn);
   13699                break;
   13700             case 6:
   13701                putIReg64orZR(dd, getQRegHI64(nn));
   13702                DIP("fmov x%u, v%u.d[1]\n", dd, nn);
   13703                break;
   13704             default:
   13705                vassert(0);
   13706          }
   13707          return True;
   13708       }
   13709       /* undecodable; fall through */
   13710    }
   13711 
   13712    return False;
   13713 #  undef INSN
   13714 }
   13715 
   13716 
   13717 static
   13718 Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
   13719 {
   13720    Bool ok;
   13721    ok = dis_AdvSIMD_EXT(dres, insn);
   13722    if (UNLIKELY(ok)) return True;
   13723    ok = dis_AdvSIMD_TBL_TBX(dres, insn);
   13724    if (UNLIKELY(ok)) return True;
   13725    ok = dis_AdvSIMD_ZIP_UZP_TRN(dres, insn);
   13726    if (UNLIKELY(ok)) return True;
   13727    ok = dis_AdvSIMD_across_lanes(dres, insn);
   13728    if (UNLIKELY(ok)) return True;
   13729    ok = dis_AdvSIMD_copy(dres, insn);
   13730    if (UNLIKELY(ok)) return True;
   13731    ok = dis_AdvSIMD_modified_immediate(dres, insn);
   13732    if (UNLIKELY(ok)) return True;
   13733    ok = dis_AdvSIMD_scalar_copy(dres, insn);
   13734    if (UNLIKELY(ok)) return True;
   13735    ok = dis_AdvSIMD_scalar_pairwise(dres, insn);
   13736    if (UNLIKELY(ok)) return True;
   13737    ok = dis_AdvSIMD_scalar_shift_by_imm(dres, insn);
   13738    if (UNLIKELY(ok)) return True;
   13739    ok = dis_AdvSIMD_scalar_three_different(dres, insn);
   13740    if (UNLIKELY(ok)) return True;
   13741    ok = dis_AdvSIMD_scalar_three_same(dres, insn);
   13742    if (UNLIKELY(ok)) return True;
   13743    ok = dis_AdvSIMD_scalar_two_reg_misc(dres, insn);
   13744    if (UNLIKELY(ok)) return True;
   13745    ok = dis_AdvSIMD_scalar_x_indexed_element(dres, insn);
   13746    if (UNLIKELY(ok)) return True;
   13747    ok = dis_AdvSIMD_shift_by_immediate(dres, insn);
   13748    if (UNLIKELY(ok)) return True;
   13749    ok = dis_AdvSIMD_three_different(dres, insn);
   13750    if (UNLIKELY(ok)) return True;
   13751    ok = dis_AdvSIMD_three_same(dres, insn);
   13752    if (UNLIKELY(ok)) return True;
   13753    ok = dis_AdvSIMD_two_reg_misc(dres, insn);
   13754    if (UNLIKELY(ok)) return True;
   13755    ok = dis_AdvSIMD_vector_x_indexed_elem(dres, insn);
   13756    if (UNLIKELY(ok)) return True;
   13757    ok = dis_AdvSIMD_crypto_aes(dres, insn);
   13758    if (UNLIKELY(ok)) return True;
   13759    ok = dis_AdvSIMD_crypto_three_reg_sha(dres, insn);
   13760    if (UNLIKELY(ok)) return True;
   13761    ok = dis_AdvSIMD_crypto_two_reg_sha(dres, insn);
   13762    if (UNLIKELY(ok)) return True;
   13763    ok = dis_AdvSIMD_fp_compare(dres, insn);
   13764    if (UNLIKELY(ok)) return True;
   13765    ok = dis_AdvSIMD_fp_conditional_compare(dres, insn);
   13766    if (UNLIKELY(ok)) return True;
   13767    ok = dis_AdvSIMD_fp_conditional_select(dres, insn);
   13768    if (UNLIKELY(ok)) return True;
   13769    ok = dis_AdvSIMD_fp_data_proc_1_source(dres, insn);
   13770    if (UNLIKELY(ok)) return True;
   13771    ok = dis_AdvSIMD_fp_data_proc_2_source(dres, insn);
   13772    if (UNLIKELY(ok)) return True;
   13773    ok = dis_AdvSIMD_fp_data_proc_3_source(dres, insn);
   13774    if (UNLIKELY(ok)) return True;
   13775    ok = dis_AdvSIMD_fp_immediate(dres, insn);
   13776    if (UNLIKELY(ok)) return True;
   13777    ok = dis_AdvSIMD_fp_to_from_fixedp_conv(dres, insn);
   13778    if (UNLIKELY(ok)) return True;
   13779    ok = dis_AdvSIMD_fp_to_from_int_conv(dres, insn);
   13780    if (UNLIKELY(ok)) return True;
   13781    return False;
   13782 }
   13783 
   13784 
   13785 /*------------------------------------------------------------*/
   13786 /*--- Disassemble a single ARM64 instruction               ---*/
   13787 /*------------------------------------------------------------*/
   13788 
   13789 /* Disassemble a single ARM64 instruction into IR.  The instruction
   13790    has is located at |guest_instr| and has guest IP of
   13791    |guest_PC_curr_instr|, which will have been set before the call
   13792    here.  Returns True iff the instruction was decoded, in which case
   13793    *dres will be set accordingly, or False, in which case *dres should
   13794    be ignored by the caller. */
   13795 
   13796 static
   13797 Bool disInstr_ARM64_WRK (
   13798         /*MB_OUT*/DisResult* dres,
   13799         Bool         (*resteerOkFn) ( /*opaque*/void*, Addr ),
   13800         Bool         resteerCisOk,
   13801         void*        callback_opaque,
   13802         const UChar* guest_instr,
   13803         const VexArchInfo* archinfo,
   13804         const VexAbiInfo*  abiinfo
   13805      )
   13806 {
   13807    // A macro to fish bits out of 'insn'.
   13808 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
   13809 
   13810 //ZZ    DisResult dres;
   13811 //ZZ    UInt      insn;
   13812 //ZZ    //Bool      allow_VFP = False;
   13813 //ZZ    //UInt      hwcaps = archinfo->hwcaps;
   13814 //ZZ    IRTemp    condT; /* :: Ity_I32 */
   13815 //ZZ    UInt      summary;
   13816 //ZZ    HChar     dis_buf[128];  // big enough to hold LDMIA etc text
   13817 //ZZ
   13818 //ZZ    /* What insn variants are we supporting today? */
   13819 //ZZ    //allow_VFP  = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
   13820 //ZZ    // etc etc
   13821 
   13822    /* Set result defaults. */
   13823    dres->whatNext    = Dis_Continue;
   13824    dres->len         = 4;
   13825    dres->continueAt  = 0;
   13826    dres->jk_StopHere = Ijk_INVALID;
   13827 
   13828    /* At least this is simple on ARM64: insns are all 4 bytes long, and
   13829       4-aligned.  So just fish the whole thing out of memory right now
   13830       and have done. */
   13831    UInt insn = getUIntLittleEndianly( guest_instr );
   13832 
   13833    if (0) vex_printf("insn: 0x%x\n", insn);
   13834 
   13835    DIP("\t(arm64) 0x%llx:  ", (ULong)guest_PC_curr_instr);
   13836 
   13837    vassert(0 == (guest_PC_curr_instr & 3ULL));
   13838 
   13839    /* ----------------------------------------------------------- */
   13840 
   13841    /* Spot "Special" instructions (see comment at top of file). */
   13842    {
   13843       const UChar* code = guest_instr;
   13844       /* Spot the 16-byte preamble:
   13845             93CC0D8C   ror x12, x12, #3
   13846             93CC358C   ror x12, x12, #13
   13847             93CCCD8C   ror x12, x12, #51
   13848             93CCF58C   ror x12, x12, #61
   13849       */
   13850       UInt word1 = 0x93CC0D8C;
   13851       UInt word2 = 0x93CC358C;
   13852       UInt word3 = 0x93CCCD8C;
   13853       UInt word4 = 0x93CCF58C;
   13854       if (getUIntLittleEndianly(code+ 0) == word1 &&
   13855           getUIntLittleEndianly(code+ 4) == word2 &&
   13856           getUIntLittleEndianly(code+ 8) == word3 &&
   13857           getUIntLittleEndianly(code+12) == word4) {
   13858          /* Got a "Special" instruction preamble.  Which one is it? */
   13859          if (getUIntLittleEndianly(code+16) == 0xAA0A014A
   13860                                                /* orr x10,x10,x10 */) {
   13861             /* X3 = client_request ( X4 ) */
   13862             DIP("x3 = client_request ( x4 )\n");
   13863             putPC(mkU64( guest_PC_curr_instr + 20 ));
   13864             dres->jk_StopHere = Ijk_ClientReq;
   13865             dres->whatNext    = Dis_StopHere;
   13866             return True;
   13867          }
   13868          else
   13869          if (getUIntLittleEndianly(code+16) == 0xAA0B016B
   13870                                                /* orr x11,x11,x11 */) {
   13871             /* X3 = guest_NRADDR */
   13872             DIP("x3 = guest_NRADDR\n");
   13873             dres->len = 20;
   13874             putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
   13875             return True;
   13876          }
   13877          else
   13878          if (getUIntLittleEndianly(code+16) == 0xAA0C018C
   13879                                                /* orr x12,x12,x12 */) {
   13880             /*  branch-and-link-to-noredir X8 */
   13881             DIP("branch-and-link-to-noredir x8\n");
   13882             putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
   13883             putPC(getIReg64orZR(8));
   13884             dres->jk_StopHere = Ijk_NoRedir;
   13885             dres->whatNext    = Dis_StopHere;
   13886             return True;
   13887          }
   13888          else
   13889          if (getUIntLittleEndianly(code+16) == 0xAA090129
   13890                                                /* orr x9,x9,x9 */) {
   13891             /* IR injection */
   13892             DIP("IR injection\n");
   13893             vex_inject_ir(irsb, Iend_LE);
   13894             // Invalidate the current insn. The reason is that the IRop we're
   13895             // injecting here can change. In which case the translation has to
   13896             // be redone. For ease of handling, we simply invalidate all the
   13897             // time.
   13898             stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr)));
   13899             stmt(IRStmt_Put(OFFB_CMLEN,   mkU64(20)));
   13900             putPC(mkU64( guest_PC_curr_instr + 20 ));
   13901             dres->whatNext    = Dis_StopHere;
   13902             dres->jk_StopHere = Ijk_InvalICache;
   13903             return True;
   13904          }
   13905          /* We don't know what it is. */
   13906          return False;
   13907          /*NOTREACHED*/
   13908       }
   13909    }
   13910 
   13911    /* ----------------------------------------------------------- */
   13912 
   13913    /* Main ARM64 instruction decoder starts here. */
   13914 
   13915    Bool ok = False;
   13916 
   13917    /* insn[28:25] determines the top-level grouping, so let's start
   13918       off with that.
   13919 
   13920       For all of these dis_ARM64_ functions, we pass *dres with the
   13921       normal default results "insn OK, 4 bytes long, keep decoding" so
   13922       they don't need to change it.  However, decodes of control-flow
   13923       insns may cause *dres to change.
   13924    */
   13925    switch (INSN(28,25)) {
   13926       case BITS4(1,0,0,0): case BITS4(1,0,0,1):
   13927          // Data processing - immediate
   13928          ok = dis_ARM64_data_processing_immediate(dres, insn);
   13929          break;
   13930       case BITS4(1,0,1,0): case BITS4(1,0,1,1):
   13931          // Branch, exception generation and system instructions
   13932          ok = dis_ARM64_branch_etc(dres, insn, archinfo);
   13933          break;
   13934       case BITS4(0,1,0,0): case BITS4(0,1,1,0):
   13935       case BITS4(1,1,0,0): case BITS4(1,1,1,0):
   13936          // Loads and stores
   13937          ok = dis_ARM64_load_store(dres, insn);
   13938          break;
   13939       case BITS4(0,1,0,1): case BITS4(1,1,0,1):
   13940          // Data processing - register
   13941          ok = dis_ARM64_data_processing_register(dres, insn);
   13942          break;
   13943       case BITS4(0,1,1,1): case BITS4(1,1,1,1):
   13944          // Data processing - SIMD and floating point
   13945          ok = dis_ARM64_simd_and_fp(dres, insn);
   13946          break;
   13947       case BITS4(0,0,0,0): case BITS4(0,0,0,1):
   13948       case BITS4(0,0,1,0): case BITS4(0,0,1,1):
   13949          // UNALLOCATED
   13950          break;
   13951       default:
   13952          vassert(0); /* Can't happen */
   13953    }
   13954 
   13955    /* If the next-level down decoders failed, make sure |dres| didn't
   13956       get changed. */
   13957    if (!ok) {
   13958       vassert(dres->whatNext    == Dis_Continue);
   13959       vassert(dres->len         == 4);
   13960       vassert(dres->continueAt  == 0);
   13961       vassert(dres->jk_StopHere == Ijk_INVALID);
   13962    }
   13963 
   13964    return ok;
   13965 
   13966 #  undef INSN
   13967 }
   13968 
   13969 
   13970 /*------------------------------------------------------------*/
   13971 /*--- Top-level fn                                         ---*/
   13972 /*------------------------------------------------------------*/
   13973 
   13974 /* Disassemble a single instruction into IR.  The instruction
   13975    is located in host memory at &guest_code[delta]. */
   13976 
   13977 DisResult disInstr_ARM64 ( IRSB*        irsb_IN,
   13978                            Bool         (*resteerOkFn) ( void*, Addr ),
   13979                            Bool         resteerCisOk,
   13980                            void*        callback_opaque,
   13981                            const UChar* guest_code_IN,
   13982                            Long         delta_IN,
   13983                            Addr         guest_IP,
   13984                            VexArch      guest_arch,
   13985                            const VexArchInfo* archinfo,
   13986                            const VexAbiInfo*  abiinfo,
   13987                            VexEndness   host_endness_IN,
   13988                            Bool         sigill_diag_IN )
   13989 {
   13990    DisResult dres;
   13991    vex_bzero(&dres, sizeof(dres));
   13992 
   13993    /* Set globals (see top of this file) */
   13994    vassert(guest_arch == VexArchARM64);
   13995 
   13996    irsb                = irsb_IN;
   13997    host_endness        = host_endness_IN;
   13998    guest_PC_curr_instr = (Addr64)guest_IP;
   13999 
   14000    /* Sanity checks */
   14001    /* (x::UInt - 2) <= 15   ===   x >= 2 && x <= 17 (I hope) */
   14002    vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15);
   14003    vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15);
   14004 
   14005    /* Try to decode */
   14006    Bool ok = disInstr_ARM64_WRK( &dres,
   14007                                  resteerOkFn, resteerCisOk, callback_opaque,
   14008                                  &guest_code_IN[delta_IN],
   14009                                  archinfo, abiinfo );
   14010    if (ok) {
   14011       /* All decode successes end up here. */
   14012       vassert(dres.len == 4 || dres.len == 20);
   14013       switch (dres.whatNext) {
   14014          case Dis_Continue:
   14015             putPC( mkU64(dres.len + guest_PC_curr_instr) );
   14016             break;
   14017          case Dis_ResteerU:
   14018          case Dis_ResteerC:
   14019             putPC(mkU64(dres.continueAt));
   14020             break;
   14021          case Dis_StopHere:
   14022             break;
   14023          default:
   14024             vassert(0);
   14025       }
   14026       DIP("\n");
   14027    } else {
   14028       /* All decode failures end up here. */
   14029       if (sigill_diag_IN) {
   14030          Int   i, j;
   14031          UChar buf[64];
   14032          UInt  insn
   14033                   = getUIntLittleEndianly( &guest_code_IN[delta_IN] );
   14034          vex_bzero(buf, sizeof(buf));
   14035          for (i = j = 0; i < 32; i++) {
   14036             if (i > 0) {
   14037               if ((i & 7) == 0) buf[j++] = ' ';
   14038               else if ((i & 3) == 0) buf[j++] = '\'';
   14039             }
   14040             buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
   14041          }
   14042          vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
   14043          vex_printf("disInstr(arm64): %s\n", buf);
   14044       }
   14045 
   14046       /* Tell the dispatcher that this insn cannot be decoded, and so
   14047          has not been executed, and (is currently) the next to be
   14048          executed.  PC should be up-to-date since it is made so at the
   14049          start of each insn, but nevertheless be paranoid and update
   14050          it again right now. */
   14051       putPC( mkU64(guest_PC_curr_instr) );
   14052       dres.len         = 0;
   14053       dres.whatNext    = Dis_StopHere;
   14054       dres.jk_StopHere = Ijk_NoDecode;
   14055       dres.continueAt  = 0;
   14056    }
   14057    return dres;
   14058 }
   14059 
   14060 
   14061 /*--------------------------------------------------------------------*/
   14062 /*--- end                                       guest_arm64_toIR.c ---*/
   14063 /*--------------------------------------------------------------------*/
   14064