Home | History | Annotate | Download | only in priv
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                 host_arm64_defs.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2013-2015 OpenWorks
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 #include "libvex_basictypes.h"
     32 #include "libvex.h"
     33 #include "libvex_trc_values.h"
     34 
     35 #include "main_util.h"
     36 #include "host_generic_regs.h"
     37 #include "host_arm64_defs.h"
     38 
     39 
     40 /* --------- Registers. --------- */
     41 
     42 /* The usual HReg abstraction.  We use the following classes only:
     43      X regs (64 bit int)
     44      D regs (64 bit float, also used for 32 bit float)
     45      Q regs (128 bit vector)
     46 */
     47 
     48 const RRegUniverse* getRRegUniverse_ARM64 ( void )
     49 {
     50    /* The real-register universe is a big constant, so we just want to
     51       initialise it once. */
     52    static RRegUniverse rRegUniverse_ARM64;
     53    static Bool         rRegUniverse_ARM64_initted = False;
     54 
     55    /* Handy shorthand, nothing more */
     56    RRegUniverse* ru = &rRegUniverse_ARM64;
     57 
     58    /* This isn't thread-safe.  Sigh. */
     59    if (LIKELY(rRegUniverse_ARM64_initted))
     60       return ru;
     61 
     62    RRegUniverse__init(ru);
     63 
     64    /* Add the registers.  The initial segment of this array must be
     65       those available for allocation by reg-alloc, and those that
     66       follow are not available for allocation. */
     67 
     68    ru->regs[ru->size++] = hregARM64_X22();
     69    ru->regs[ru->size++] = hregARM64_X23();
     70    ru->regs[ru->size++] = hregARM64_X24();
     71    ru->regs[ru->size++] = hregARM64_X25();
     72    ru->regs[ru->size++] = hregARM64_X26();
     73    ru->regs[ru->size++] = hregARM64_X27();
     74    ru->regs[ru->size++] = hregARM64_X28();
     75 
     76    ru->regs[ru->size++] = hregARM64_X0();
     77    ru->regs[ru->size++] = hregARM64_X1();
     78    ru->regs[ru->size++] = hregARM64_X2();
     79    ru->regs[ru->size++] = hregARM64_X3();
     80    ru->regs[ru->size++] = hregARM64_X4();
     81    ru->regs[ru->size++] = hregARM64_X5();
     82    ru->regs[ru->size++] = hregARM64_X6();
     83    ru->regs[ru->size++] = hregARM64_X7();
     84    // X8 is used as a ProfInc temporary, not available to regalloc.
     85    // X9 is a chaining/spill temporary, not available to regalloc.
     86 
     87    // Do we really need all these?
     88    //ru->regs[ru->size++] = hregARM64_X10();
     89    //ru->regs[ru->size++] = hregARM64_X11();
     90    //ru->regs[ru->size++] = hregARM64_X12();
     91    //ru->regs[ru->size++] = hregARM64_X13();
     92    //ru->regs[ru->size++] = hregARM64_X14();
     93    //ru->regs[ru->size++] = hregARM64_X15();
     94    // X21 is the guest state pointer, not available to regalloc.
     95 
     96    // vector regs.  Unfortunately not callee-saved.
     97    ru->regs[ru->size++] = hregARM64_Q16();
     98    ru->regs[ru->size++] = hregARM64_Q17();
     99    ru->regs[ru->size++] = hregARM64_Q18();
    100    ru->regs[ru->size++] = hregARM64_Q19();
    101    ru->regs[ru->size++] = hregARM64_Q20();
    102 
    103    // F64 regs, all of which are callee-saved
    104    ru->regs[ru->size++] = hregARM64_D8();
    105    ru->regs[ru->size++] = hregARM64_D9();
    106    ru->regs[ru->size++] = hregARM64_D10();
    107    ru->regs[ru->size++] = hregARM64_D11();
    108    ru->regs[ru->size++] = hregARM64_D12();
    109    ru->regs[ru->size++] = hregARM64_D13();
    110 
    111    ru->allocable = ru->size;
    112    /* And other regs, not available to the allocator. */
    113 
    114    // unavail: x21 as GSP
    115    // x8 is used as a ProfInc temporary
    116    // x9 is used as a spill/reload/chaining/call temporary
    117    // x30 as LR
    118    // x31 because dealing with the SP-vs-ZR overloading is too
    119    // confusing, and we don't need to do so, so let's just avoid
    120    // the problem
    121    //
    122    // Currently, we have 15 allocatable integer registers:
    123    // 0 1 2 3 4 5 6 7 22 23 24 25 26 27 28
    124    //
    125    // Hence for the allocatable integer registers we have:
    126    //
    127    // callee-saved: 22 23 24 25 26 27 28
    128    // caller-saved: 0 1 2 3 4 5 6 7
    129    //
    130    // If the set of available registers changes or if the e/r status
    131    // changes, be sure to re-check/sync the definition of
    132    // getRegUsage for ARM64Instr_Call too.
    133 
    134    ru->regs[ru->size++] = hregARM64_X8();
    135    ru->regs[ru->size++] = hregARM64_X9();
    136    ru->regs[ru->size++] = hregARM64_X21();
    137 
    138    rRegUniverse_ARM64_initted = True;
    139 
    140    RRegUniverse__check_is_sane(ru);
    141    return ru;
    142 }
    143 
    144 
    145 void ppHRegARM64 ( HReg reg )  {
    146    Int r;
    147    /* Be generic for all virtual regs. */
    148    if (hregIsVirtual(reg)) {
    149       ppHReg(reg);
    150       return;
    151    }
    152    /* But specific for real regs. */
    153    switch (hregClass(reg)) {
    154       case HRcInt64:
    155          r = hregEncoding(reg);
    156          vassert(r >= 0 && r < 31);
    157          vex_printf("x%d", r);
    158          return;
    159       case HRcFlt64:
    160          r = hregEncoding(reg);
    161          vassert(r >= 0 && r < 32);
    162          vex_printf("d%d", r);
    163          return;
    164       case HRcVec128:
    165          r = hregEncoding(reg);
    166          vassert(r >= 0 && r < 32);
    167          vex_printf("q%d", r);
    168          return;
    169       default:
    170          vpanic("ppHRegARM64");
    171    }
    172 }
    173 
    174 static void ppHRegARM64asSreg ( HReg reg ) {
    175    ppHRegARM64(reg);
    176    vex_printf("(S-reg)");
    177 }
    178 
    179 static void ppHRegARM64asHreg ( HReg reg ) {
    180    ppHRegARM64(reg);
    181    vex_printf("(H-reg)");
    182 }
    183 
    184 
    185 /* --------- Condition codes, ARM64 encoding. --------- */
    186 
    187 static const HChar* showARM64CondCode ( ARM64CondCode cond ) {
    188    switch (cond) {
    189        case ARM64cc_EQ:  return "eq";
    190        case ARM64cc_NE:  return "ne";
    191        case ARM64cc_CS:  return "cs";
    192        case ARM64cc_CC:  return "cc";
    193        case ARM64cc_MI:  return "mi";
    194        case ARM64cc_PL:  return "pl";
    195        case ARM64cc_VS:  return "vs";
    196        case ARM64cc_VC:  return "vc";
    197        case ARM64cc_HI:  return "hi";
    198        case ARM64cc_LS:  return "ls";
    199        case ARM64cc_GE:  return "ge";
    200        case ARM64cc_LT:  return "lt";
    201        case ARM64cc_GT:  return "gt";
    202        case ARM64cc_LE:  return "le";
    203        case ARM64cc_AL:  return "al"; // default
    204        case ARM64cc_NV:  return "nv";
    205        default: vpanic("showARM64CondCode");
    206    }
    207 }
    208 
    209 
    210 /* --------- Memory address expressions (amodes). --------- */
    211 
    212 ARM64AMode* ARM64AMode_RI9  ( HReg reg, Int simm9 ) {
    213    ARM64AMode* am        = LibVEX_Alloc_inline(sizeof(ARM64AMode));
    214    am->tag               = ARM64am_RI9;
    215    am->ARM64am.RI9.reg   = reg;
    216    am->ARM64am.RI9.simm9 = simm9;
    217    vassert(-256 <= simm9 && simm9 <= 255);
    218    return am;
    219 }
    220 
    221 ARM64AMode* ARM64AMode_RI12 ( HReg reg, Int uimm12, UChar szB ) {
    222    ARM64AMode* am          = LibVEX_Alloc_inline(sizeof(ARM64AMode));
    223    am->tag                 = ARM64am_RI12;
    224    am->ARM64am.RI12.reg    = reg;
    225    am->ARM64am.RI12.uimm12 = uimm12;
    226    am->ARM64am.RI12.szB    = szB;
    227    vassert(uimm12 >= 0 && uimm12 <= 4095);
    228    switch (szB) {
    229       case 1: case 2: case 4: case 8: break;
    230       default: vassert(0);
    231    }
    232    return am;
    233 }
    234 
    235 ARM64AMode* ARM64AMode_RR ( HReg base, HReg index ) {
    236    ARM64AMode* am       = LibVEX_Alloc_inline(sizeof(ARM64AMode));
    237    am->tag              = ARM64am_RR;
    238    am->ARM64am.RR.base  = base;
    239    am->ARM64am.RR.index = index;
    240    return am;
    241 }
    242 
    243 static void ppARM64AMode ( ARM64AMode* am ) {
    244    switch (am->tag) {
    245       case ARM64am_RI9:
    246          vex_printf("%d(", am->ARM64am.RI9.simm9);
    247          ppHRegARM64(am->ARM64am.RI9.reg);
    248          vex_printf(")");
    249          break;
    250       case ARM64am_RI12:
    251          vex_printf("%u(", (UInt)am->ARM64am.RI12.szB
    252                            * (UInt)am->ARM64am.RI12.uimm12);
    253          ppHRegARM64(am->ARM64am.RI12.reg);
    254          vex_printf(")");
    255          break;
    256       case ARM64am_RR:
    257          vex_printf("(");
    258          ppHRegARM64(am->ARM64am.RR.base);
    259          vex_printf(",");
    260          ppHRegARM64(am->ARM64am.RR.index);
    261          vex_printf(")");
    262          break;
    263       default:
    264          vassert(0);
    265    }
    266 }
    267 
    268 static void addRegUsage_ARM64AMode ( HRegUsage* u, ARM64AMode* am ) {
    269    switch (am->tag) {
    270       case ARM64am_RI9:
    271          addHRegUse(u, HRmRead, am->ARM64am.RI9.reg);
    272          return;
    273       case ARM64am_RI12:
    274          addHRegUse(u, HRmRead, am->ARM64am.RI12.reg);
    275          return;
    276       case ARM64am_RR:
    277          addHRegUse(u, HRmRead, am->ARM64am.RR.base);
    278          addHRegUse(u, HRmRead, am->ARM64am.RR.index);
    279          return;
    280       default:
    281          vpanic("addRegUsage_ARM64Amode");
    282    }
    283 }
    284 
    285 static void mapRegs_ARM64AMode ( HRegRemap* m, ARM64AMode* am ) {
    286    switch (am->tag) {
    287       case ARM64am_RI9:
    288          am->ARM64am.RI9.reg = lookupHRegRemap(m, am->ARM64am.RI9.reg);
    289          return;
    290       case ARM64am_RI12:
    291          am->ARM64am.RI12.reg = lookupHRegRemap(m, am->ARM64am.RI12.reg);
    292          return;
    293       case ARM64am_RR:
    294          am->ARM64am.RR.base  = lookupHRegRemap(m, am->ARM64am.RR.base);
    295          am->ARM64am.RR.index = lookupHRegRemap(m, am->ARM64am.RR.index);
    296          return;
    297       default:
    298          vpanic("mapRegs_ARM64Amode");
    299    }
    300 }
    301 
    302 
    303 /* --------- Reg or uimm12<<{0,12} operands --------- */
    304 
    305 ARM64RIA* ARM64RIA_I12 ( UShort imm12, UChar shift ) {
    306    ARM64RIA* riA           = LibVEX_Alloc_inline(sizeof(ARM64RIA));
    307    riA->tag                = ARM64riA_I12;
    308    riA->ARM64riA.I12.imm12 = imm12;
    309    riA->ARM64riA.I12.shift = shift;
    310    vassert(imm12 < 4096);
    311    vassert(shift == 0 || shift == 12);
    312    return riA;
    313 }
    314 ARM64RIA* ARM64RIA_R ( HReg reg ) {
    315    ARM64RIA* riA       = LibVEX_Alloc_inline(sizeof(ARM64RIA));
    316    riA->tag            = ARM64riA_R;
    317    riA->ARM64riA.R.reg = reg;
    318    return riA;
    319 }
    320 
    321 static void ppARM64RIA ( ARM64RIA* riA ) {
    322    switch (riA->tag) {
    323       case ARM64riA_I12:
    324          vex_printf("#%u",(UInt)(riA->ARM64riA.I12.imm12
    325                                  << riA->ARM64riA.I12.shift));
    326          break;
    327       case ARM64riA_R:
    328          ppHRegARM64(riA->ARM64riA.R.reg);
    329          break;
    330       default:
    331          vassert(0);
    332    }
    333 }
    334 
    335 static void addRegUsage_ARM64RIA ( HRegUsage* u, ARM64RIA* riA ) {
    336    switch (riA->tag) {
    337       case ARM64riA_I12:
    338          return;
    339       case ARM64riA_R:
    340          addHRegUse(u, HRmRead, riA->ARM64riA.R.reg);
    341          return;
    342       default:
    343          vpanic("addRegUsage_ARM64RIA");
    344    }
    345 }
    346 
    347 static void mapRegs_ARM64RIA ( HRegRemap* m, ARM64RIA* riA ) {
    348    switch (riA->tag) {
    349       case ARM64riA_I12:
    350          return;
    351       case ARM64riA_R:
    352          riA->ARM64riA.R.reg = lookupHRegRemap(m, riA->ARM64riA.R.reg);
    353          return;
    354       default:
    355          vpanic("mapRegs_ARM64RIA");
    356    }
    357 }
    358 
    359 
    360 /* --------- Reg or "bitfield" (logic immediate) operands --------- */
    361 
    362 ARM64RIL* ARM64RIL_I13 ( UChar bitN, UChar immR, UChar immS ) {
    363    ARM64RIL* riL          = LibVEX_Alloc_inline(sizeof(ARM64RIL));
    364    riL->tag               = ARM64riL_I13;
    365    riL->ARM64riL.I13.bitN = bitN;
    366    riL->ARM64riL.I13.immR = immR;
    367    riL->ARM64riL.I13.immS = immS;
    368    vassert(bitN < 2);
    369    vassert(immR < 64);
    370    vassert(immS < 64);
    371    return riL;
    372 }
    373 ARM64RIL* ARM64RIL_R ( HReg reg ) {
    374    ARM64RIL* riL       = LibVEX_Alloc_inline(sizeof(ARM64RIL));
    375    riL->tag            = ARM64riL_R;
    376    riL->ARM64riL.R.reg = reg;
    377    return riL;
    378 }
    379 
    380 static void ppARM64RIL ( ARM64RIL* riL ) {
    381    switch (riL->tag) {
    382       case ARM64riL_I13:
    383          vex_printf("#nrs(%u,%u,%u)",
    384                      (UInt)riL->ARM64riL.I13.bitN,
    385                      (UInt)riL->ARM64riL.I13.immR,
    386                      (UInt)riL->ARM64riL.I13.immS);
    387          break;
    388       case ARM64riL_R:
    389          ppHRegARM64(riL->ARM64riL.R.reg);
    390          break;
    391       default:
    392          vassert(0);
    393    }
    394 }
    395 
    396 static void addRegUsage_ARM64RIL ( HRegUsage* u, ARM64RIL* riL ) {
    397    switch (riL->tag) {
    398       case ARM64riL_I13:
    399          return;
    400       case ARM64riL_R:
    401          addHRegUse(u, HRmRead, riL->ARM64riL.R.reg);
    402          return;
    403       default:
    404          vpanic("addRegUsage_ARM64RIL");
    405    }
    406 }
    407 
    408 static void mapRegs_ARM64RIL ( HRegRemap* m, ARM64RIL* riL ) {
    409    switch (riL->tag) {
    410       case ARM64riL_I13:
    411          return;
    412       case ARM64riL_R:
    413          riL->ARM64riL.R.reg = lookupHRegRemap(m, riL->ARM64riL.R.reg);
    414          return;
    415       default:
    416          vpanic("mapRegs_ARM64RIL");
    417    }
    418 }
    419 
    420 
    421 /* --------------- Reg or uimm6 operands --------------- */
    422 
    423 ARM64RI6* ARM64RI6_I6 ( UInt imm6 ) {
    424    ARM64RI6* ri6         = LibVEX_Alloc_inline(sizeof(ARM64RI6));
    425    ri6->tag              = ARM64ri6_I6;
    426    ri6->ARM64ri6.I6.imm6 = imm6;
    427    vassert(imm6 > 0 && imm6 < 64);
    428    return ri6;
    429 }
    430 ARM64RI6* ARM64RI6_R ( HReg reg ) {
    431    ARM64RI6* ri6       = LibVEX_Alloc_inline(sizeof(ARM64RI6));
    432    ri6->tag            = ARM64ri6_R;
    433    ri6->ARM64ri6.R.reg = reg;
    434    return ri6;
    435 }
    436 
    437 static void ppARM64RI6 ( ARM64RI6* ri6 ) {
    438    switch (ri6->tag) {
    439       case ARM64ri6_I6:
    440          vex_printf("#%u", ri6->ARM64ri6.I6.imm6);
    441          break;
    442       case ARM64ri6_R:
    443          ppHRegARM64(ri6->ARM64ri6.R.reg);
    444          break;
    445       default:
    446          vassert(0);
    447    }
    448 }
    449 
    450 static void addRegUsage_ARM64RI6 ( HRegUsage* u, ARM64RI6* ri6 ) {
    451    switch (ri6->tag) {
    452       case ARM64ri6_I6:
    453          return;
    454       case ARM64ri6_R:
    455          addHRegUse(u, HRmRead, ri6->ARM64ri6.R.reg);
    456          return;
    457       default:
    458          vpanic("addRegUsage_ARM64RI6");
    459    }
    460 }
    461 
    462 static void mapRegs_ARM64RI6 ( HRegRemap* m, ARM64RI6* ri6 ) {
    463    switch (ri6->tag) {
    464       case ARM64ri6_I6:
    465          return;
    466       case ARM64ri6_R:
    467          ri6->ARM64ri6.R.reg = lookupHRegRemap(m, ri6->ARM64ri6.R.reg);
    468          return;
    469       default:
    470          vpanic("mapRegs_ARM64RI6");
    471    }
    472 }
    473 
    474 
    475 /* --------- Instructions. --------- */
    476 
    477 static const HChar* showARM64LogicOp ( ARM64LogicOp op ) {
    478    switch (op) {
    479       case ARM64lo_AND: return "and";
    480       case ARM64lo_OR:  return "orr";
    481       case ARM64lo_XOR: return "eor";
    482       default: vpanic("showARM64LogicOp");
    483    }
    484 }
    485 
    486 static const HChar* showARM64ShiftOp ( ARM64ShiftOp op ) {
    487    switch (op) {
    488       case ARM64sh_SHL: return "lsl";
    489       case ARM64sh_SHR: return "lsr";
    490       case ARM64sh_SAR: return "asr";
    491       default: vpanic("showARM64ShiftOp");
    492    }
    493 }
    494 
    495 static const HChar* showARM64UnaryOp ( ARM64UnaryOp op ) {
    496    switch (op) {
    497       case ARM64un_NEG: return "neg";
    498       case ARM64un_NOT: return "not";
    499       case ARM64un_CLZ: return "clz";
    500       default: vpanic("showARM64UnaryOp");
    501    }
    502 }
    503 
    504 static const HChar* showARM64MulOp ( ARM64MulOp op ) {
    505    switch (op) {
    506       case ARM64mul_PLAIN: return "mul  ";
    507       case ARM64mul_ZX:    return "umulh";
    508       case ARM64mul_SX:    return "smulh";
    509       default: vpanic("showARM64MulOp");
    510    }
    511 }
    512 
    513 static void characteriseARM64CvtOp ( /*OUT*/HChar* syn,
    514                                      /*OUT*/UInt* fszB, /*OUT*/UInt* iszB,
    515                                      ARM64CvtOp op ) {
    516    switch (op) {
    517       case ARM64cvt_F32_I32S:
    518          *syn = 's'; *fszB = 4; *iszB = 4; break;
    519       case ARM64cvt_F64_I32S:
    520          *syn = 's'; *fszB = 8; *iszB = 4; break;
    521       case ARM64cvt_F32_I64S:
    522          *syn = 's'; *fszB = 4; *iszB = 8; break;
    523       case ARM64cvt_F64_I64S:
    524          *syn = 's'; *fszB = 8; *iszB = 8; break;
    525       case ARM64cvt_F32_I32U:
    526          *syn = 'u'; *fszB = 4; *iszB = 4; break;
    527       case ARM64cvt_F64_I32U:
    528          *syn = 'u'; *fszB = 8; *iszB = 4; break;
    529       case ARM64cvt_F32_I64U:
    530          *syn = 'u'; *fszB = 4; *iszB = 8; break;
    531       case ARM64cvt_F64_I64U:
    532          *syn = 'u'; *fszB = 8; *iszB = 8; break;
    533       default:
    534          vpanic("characteriseARM64CvtOp");
    535   }
    536 }
    537 
    538 static const HChar* showARM64FpBinOp ( ARM64FpBinOp op ) {
    539    switch (op) {
    540       case ARM64fpb_ADD: return "add";
    541       case ARM64fpb_SUB: return "sub";
    542       case ARM64fpb_MUL: return "mul";
    543       case ARM64fpb_DIV: return "div";
    544       default: vpanic("showARM64FpBinOp");
    545    }
    546 }
    547 
    548 static const HChar* showARM64FpUnaryOp ( ARM64FpUnaryOp op ) {
    549    switch (op) {
    550       case ARM64fpu_NEG:   return "neg  ";
    551       case ARM64fpu_ABS:   return "abs  ";
    552       case ARM64fpu_SQRT:  return "sqrt ";
    553       case ARM64fpu_RINT:  return "rinti";
    554       case ARM64fpu_RECPX: return "recpx";
    555       default: vpanic("showARM64FpUnaryOp");
    556    }
    557 }
    558 
    559 static void showARM64VecBinOp(/*OUT*/const HChar** nm,
    560                               /*OUT*/const HChar** ar, ARM64VecBinOp op ) {
    561    switch (op) {
    562       case ARM64vecb_ADD64x2:      *nm = "add   ";    *ar = "2d";   return;
    563       case ARM64vecb_ADD32x4:      *nm = "add   ";    *ar = "4s";   return;
    564       case ARM64vecb_ADD16x8:      *nm = "add   ";    *ar = "8h";   return;
    565       case ARM64vecb_ADD8x16:      *nm = "add   ";    *ar = "16b";  return;
    566       case ARM64vecb_SUB64x2:      *nm = "sub   ";    *ar = "2d";   return;
    567       case ARM64vecb_SUB32x4:      *nm = "sub   ";    *ar = "4s";   return;
    568       case ARM64vecb_SUB16x8:      *nm = "sub   ";    *ar = "8h";   return;
    569       case ARM64vecb_SUB8x16:      *nm = "sub   ";    *ar = "16b";  return;
    570       case ARM64vecb_MUL32x4:      *nm = "mul   ";    *ar = "4s";   return;
    571       case ARM64vecb_MUL16x8:      *nm = "mul   ";    *ar = "8h";   return;
    572       case ARM64vecb_MUL8x16:      *nm = "mul   ";    *ar = "16b";  return;
    573       case ARM64vecb_FADD64x2:     *nm = "fadd  ";    *ar = "2d";   return;
    574       case ARM64vecb_FSUB64x2:     *nm = "fsub  ";    *ar = "2d";   return;
    575       case ARM64vecb_FMUL64x2:     *nm = "fmul  ";    *ar = "2d";   return;
    576       case ARM64vecb_FDIV64x2:     *nm = "fdiv  ";    *ar = "2d";   return;
    577       case ARM64vecb_FADD32x4:     *nm = "fadd  ";    *ar = "4s";   return;
    578       case ARM64vecb_FSUB32x4:     *nm = "fsub  ";    *ar = "4s";   return;
    579       case ARM64vecb_FMUL32x4:     *nm = "fmul  ";    *ar = "4s";   return;
    580       case ARM64vecb_FDIV32x4:     *nm = "fdiv  ";    *ar = "4s";   return;
    581       case ARM64vecb_FMAX64x2:     *nm = "fmax  ";    *ar = "2d";   return;
    582       case ARM64vecb_FMAX32x4:     *nm = "fmax  ";    *ar = "4s";   return;
    583       case ARM64vecb_FMIN64x2:     *nm = "fmin  ";    *ar = "2d";   return;
    584       case ARM64vecb_FMIN32x4:     *nm = "fmin  ";    *ar = "4s";   return;
    585       case ARM64vecb_UMAX32x4:     *nm = "umax  ";    *ar = "4s";   return;
    586       case ARM64vecb_UMAX16x8:     *nm = "umax  ";    *ar = "8h";   return;
    587       case ARM64vecb_UMAX8x16:     *nm = "umax  ";    *ar = "16b";  return;
    588       case ARM64vecb_UMIN32x4:     *nm = "umin  ";    *ar = "4s";   return;
    589       case ARM64vecb_UMIN16x8:     *nm = "umin  ";    *ar = "8h";   return;
    590       case ARM64vecb_UMIN8x16:     *nm = "umin  ";    *ar = "16b";  return;
    591       case ARM64vecb_SMAX32x4:     *nm = "smax  ";    *ar = "4s";   return;
    592       case ARM64vecb_SMAX16x8:     *nm = "smax  ";    *ar = "8h";   return;
    593       case ARM64vecb_SMAX8x16:     *nm = "smax  ";    *ar = "16b";  return;
    594       case ARM64vecb_SMIN32x4:     *nm = "smin  ";    *ar = "4s";   return;
    595       case ARM64vecb_SMIN16x8:     *nm = "smin  ";    *ar = "8h";   return;
    596       case ARM64vecb_SMIN8x16:     *nm = "smin  ";    *ar = "16b";  return;
    597       case ARM64vecb_AND:          *nm = "and   ";    *ar = "16b";  return;
    598       case ARM64vecb_ORR:          *nm = "orr   ";    *ar = "16b";  return;
    599       case ARM64vecb_XOR:          *nm = "eor   ";    *ar = "16b";  return;
    600       case ARM64vecb_CMEQ64x2:     *nm = "cmeq  ";    *ar = "2d";   return;
    601       case ARM64vecb_CMEQ32x4:     *nm = "cmeq  ";    *ar = "4s";   return;
    602       case ARM64vecb_CMEQ16x8:     *nm = "cmeq  ";    *ar = "8h";   return;
    603       case ARM64vecb_CMEQ8x16:     *nm = "cmeq  ";    *ar = "16b";  return;
    604       case ARM64vecb_CMHI64x2:     *nm = "cmhi  ";    *ar = "2d";   return;
    605       case ARM64vecb_CMHI32x4:     *nm = "cmhi  ";    *ar = "4s";   return;
    606       case ARM64vecb_CMHI16x8:     *nm = "cmhi  ";    *ar = "8h";   return;
    607       case ARM64vecb_CMHI8x16:     *nm = "cmhi  ";    *ar = "16b";  return;
    608       case ARM64vecb_CMGT64x2:     *nm = "cmgt  ";    *ar = "2d";   return;
    609       case ARM64vecb_CMGT32x4:     *nm = "cmgt  ";    *ar = "4s";   return;
    610       case ARM64vecb_CMGT16x8:     *nm = "cmgt  ";    *ar = "8h";   return;
    611       case ARM64vecb_CMGT8x16:     *nm = "cmgt  ";    *ar = "16b";  return;
    612       case ARM64vecb_FCMEQ64x2:    *nm = "fcmeq ";    *ar = "2d";   return;
    613       case ARM64vecb_FCMEQ32x4:    *nm = "fcmeq ";    *ar = "4s";   return;
    614       case ARM64vecb_FCMGE64x2:    *nm = "fcmge ";    *ar = "2d";   return;
    615       case ARM64vecb_FCMGE32x4:    *nm = "fcmge ";    *ar = "4s";   return;
    616       case ARM64vecb_FCMGT64x2:    *nm = "fcmgt ";    *ar = "2d";   return;
    617       case ARM64vecb_FCMGT32x4:    *nm = "fcmgt ";    *ar = "4s";   return;
    618       case ARM64vecb_TBL1:         *nm = "tbl   ";    *ar = "16b";  return;
    619       case ARM64vecb_UZP164x2:     *nm = "uzp1  ";    *ar = "2d";   return;
    620       case ARM64vecb_UZP132x4:     *nm = "uzp1  ";    *ar = "4s";   return;
    621       case ARM64vecb_UZP116x8:     *nm = "uzp1  ";    *ar = "8h";   return;
    622       case ARM64vecb_UZP18x16:     *nm = "uzp1  ";    *ar = "16b";  return;
    623       case ARM64vecb_UZP264x2:     *nm = "uzp2  ";    *ar = "2d";   return;
    624       case ARM64vecb_UZP232x4:     *nm = "uzp2  ";    *ar = "4s";   return;
    625       case ARM64vecb_UZP216x8:     *nm = "uzp2  ";    *ar = "8h";   return;
    626       case ARM64vecb_UZP28x16:     *nm = "uzp2  ";    *ar = "16b";  return;
    627       case ARM64vecb_ZIP132x4:     *nm = "zip1  ";    *ar = "4s";   return;
    628       case ARM64vecb_ZIP116x8:     *nm = "zip1  ";    *ar = "8h";   return;
    629       case ARM64vecb_ZIP18x16:     *nm = "zip1  ";    *ar = "16b";  return;
    630       case ARM64vecb_ZIP232x4:     *nm = "zip2  ";    *ar = "4s";   return;
    631       case ARM64vecb_ZIP216x8:     *nm = "zip2  ";    *ar = "8h";   return;
    632       case ARM64vecb_ZIP28x16:     *nm = "zip2  ";    *ar = "16b";  return;
    633       case ARM64vecb_PMUL8x16:     *nm = "pmul  ";    *ar = "16b";  return;
    634       case ARM64vecb_PMULL8x8:     *nm = "pmull ";    *ar = "8hbb"; return;
    635       case ARM64vecb_UMULL2DSS:    *nm = "umull ";    *ar = "2dss"; return;
    636       case ARM64vecb_UMULL4SHH:    *nm = "umull ";    *ar = "4shh"; return;
    637       case ARM64vecb_UMULL8HBB:    *nm = "umull ";    *ar = "8hbb"; return;
    638       case ARM64vecb_SMULL2DSS:    *nm = "smull ";    *ar = "2dss"; return;
    639       case ARM64vecb_SMULL4SHH:    *nm = "smull ";    *ar = "4shh"; return;
    640       case ARM64vecb_SMULL8HBB:    *nm = "smull ";    *ar = "8hbb"; return;
    641       case ARM64vecb_SQADD64x2:    *nm = "sqadd ";    *ar = "2d";   return;
    642       case ARM64vecb_SQADD32x4:    *nm = "sqadd ";    *ar = "4s";   return;
    643       case ARM64vecb_SQADD16x8:    *nm = "sqadd ";    *ar = "8h";   return;
    644       case ARM64vecb_SQADD8x16:    *nm = "sqadd ";    *ar = "16b";  return;
    645       case ARM64vecb_UQADD64x2:    *nm = "uqadd ";    *ar = "2d";   return;
    646       case ARM64vecb_UQADD32x4:    *nm = "uqadd ";    *ar = "4s";   return;
    647       case ARM64vecb_UQADD16x8:    *nm = "uqadd ";    *ar = "8h";   return;
    648       case ARM64vecb_UQADD8x16:    *nm = "uqadd ";    *ar = "16b";  return;
    649       case ARM64vecb_SQSUB64x2:    *nm = "sqsub ";    *ar = "2d";   return;
    650       case ARM64vecb_SQSUB32x4:    *nm = "sqsub ";    *ar = "4s";   return;
    651       case ARM64vecb_SQSUB16x8:    *nm = "sqsub ";    *ar = "8h";   return;
    652       case ARM64vecb_SQSUB8x16:    *nm = "sqsub ";    *ar = "16b";  return;
    653       case ARM64vecb_UQSUB64x2:    *nm = "uqsub ";    *ar = "2d";   return;
    654       case ARM64vecb_UQSUB32x4:    *nm = "uqsub ";    *ar = "4s";   return;
    655       case ARM64vecb_UQSUB16x8:    *nm = "uqsub ";    *ar = "8h";   return;
    656       case ARM64vecb_UQSUB8x16:    *nm = "uqsub ";    *ar = "16b";  return;
    657       case ARM64vecb_SQDMULL2DSS:  *nm = "sqdmull";   *ar = "2dss"; return;
    658       case ARM64vecb_SQDMULL4SHH:  *nm = "sqdmull";   *ar = "4shh"; return;
    659       case ARM64vecb_SQDMULH32x4:  *nm = "sqdmulh";   *ar = "4s";   return;
    660       case ARM64vecb_SQDMULH16x8:  *nm = "sqdmulh";   *ar = "8h";   return;
    661       case ARM64vecb_SQRDMULH32x4: *nm = "sqrdmulh";  *ar = "4s";   return;
    662       case ARM64vecb_SQRDMULH16x8: *nm = "sqrdmulh";  *ar = "8h";   return;
    663       case ARM64vecb_SQSHL64x2:    *nm = "sqshl ";    *ar = "2d";   return;
    664       case ARM64vecb_SQSHL32x4:    *nm = "sqshl ";    *ar = "4s";   return;
    665       case ARM64vecb_SQSHL16x8:    *nm = "sqshl ";    *ar = "8h";   return;
    666       case ARM64vecb_SQSHL8x16:    *nm = "sqshl ";    *ar = "16b";  return;
    667       case ARM64vecb_UQSHL64x2:    *nm = "uqshl ";    *ar = "2d";   return;
    668       case ARM64vecb_UQSHL32x4:    *nm = "uqshl ";    *ar = "4s";   return;
    669       case ARM64vecb_UQSHL16x8:    *nm = "uqshl ";    *ar = "8h";   return;
    670       case ARM64vecb_UQSHL8x16:    *nm = "uqshl ";    *ar = "16b";  return;
    671       case ARM64vecb_SQRSHL64x2:   *nm = "sqrshl";    *ar = "2d";   return;
    672       case ARM64vecb_SQRSHL32x4:   *nm = "sqrshl";    *ar = "4s";   return;
    673       case ARM64vecb_SQRSHL16x8:   *nm = "sqrshl";    *ar = "8h";   return;
    674       case ARM64vecb_SQRSHL8x16:   *nm = "sqrshl";    *ar = "16b";  return;
    675       case ARM64vecb_UQRSHL64x2:   *nm = "uqrshl";    *ar = "2d";   return;
    676       case ARM64vecb_UQRSHL32x4:   *nm = "uqrshl";    *ar = "4s";   return;
    677       case ARM64vecb_UQRSHL16x8:   *nm = "uqrshl";    *ar = "8h";   return;
    678       case ARM64vecb_UQRSHL8x16:   *nm = "uqrshl";    *ar = "16b";  return;
    679       case ARM64vecb_SSHL64x2:     *nm = "sshl  ";    *ar = "2d";   return;
    680       case ARM64vecb_SSHL32x4:     *nm = "sshl  ";    *ar = "4s";   return;
    681       case ARM64vecb_SSHL16x8:     *nm = "sshl  ";    *ar = "8h";   return;
    682       case ARM64vecb_SSHL8x16:     *nm = "sshl  ";    *ar = "16b";  return;
    683       case ARM64vecb_USHL64x2:     *nm = "ushl  ";    *ar = "2d";   return;
    684       case ARM64vecb_USHL32x4:     *nm = "ushl  ";    *ar = "4s";   return;
    685       case ARM64vecb_USHL16x8:     *nm = "ushl  ";    *ar = "8h";   return;
    686       case ARM64vecb_USHL8x16:     *nm = "ushl  ";    *ar = "16b";  return;
    687       case ARM64vecb_SRSHL64x2:    *nm = "srshl ";    *ar = "2d";   return;
    688       case ARM64vecb_SRSHL32x4:    *nm = "srshl ";    *ar = "4s";   return;
    689       case ARM64vecb_SRSHL16x8:    *nm = "srshl ";    *ar = "8h";   return;
    690       case ARM64vecb_SRSHL8x16:    *nm = "srshl ";    *ar = "16b";  return;
    691       case ARM64vecb_URSHL64x2:    *nm = "urshl ";    *ar = "2d";   return;
    692       case ARM64vecb_URSHL32x4:    *nm = "urshl ";    *ar = "4s";   return;
    693       case ARM64vecb_URSHL16x8:    *nm = "urshl ";    *ar = "8h";   return;
    694       case ARM64vecb_URSHL8x16:    *nm = "urshl ";    *ar = "16b";  return;
    695       case ARM64vecb_FRECPS64x2:   *nm = "frecps";    *ar = "2d";   return;
    696       case ARM64vecb_FRECPS32x4:   *nm = "frecps";    *ar = "4s";   return;
    697       case ARM64vecb_FRSQRTS64x2:  *nm = "frsqrts";   *ar = "2d";   return;
    698       case ARM64vecb_FRSQRTS32x4:  *nm = "frsqrts";   *ar = "4s";   return;
    699       default: vpanic("showARM64VecBinOp");
    700    }
    701 }
    702 
    703 static void showARM64VecModifyOp(/*OUT*/const HChar** nm,
    704                                  /*OUT*/const HChar** ar,
    705                                  ARM64VecModifyOp op ) {
    706    switch (op) {
    707       case ARM64vecmo_SUQADD64x2:   *nm = "suqadd";    *ar = "2d";   return;
    708       case ARM64vecmo_SUQADD32x4:   *nm = "suqadd";    *ar = "4s";   return;
    709       case ARM64vecmo_SUQADD16x8:   *nm = "suqadd";    *ar = "8h";   return;
    710       case ARM64vecmo_SUQADD8x16:   *nm = "suqadd";    *ar = "16b";  return;
    711       case ARM64vecmo_USQADD64x2:   *nm = "usqadd";    *ar = "2d";   return;
    712       case ARM64vecmo_USQADD32x4:   *nm = "usqadd";    *ar = "4s";   return;
    713       case ARM64vecmo_USQADD16x8:   *nm = "usqadd";    *ar = "8h";   return;
    714       case ARM64vecmo_USQADD8x16:   *nm = "usqadd";    *ar = "16b";  return;
    715       default: vpanic("showARM64VecModifyOp");
    716    }
    717 }
    718 
    719 static void showARM64VecUnaryOp(/*OUT*/const HChar** nm,
    720                                 /*OUT*/const HChar** ar, ARM64VecUnaryOp op )
    721 {
    722    switch (op) {
    723       case ARM64vecu_FNEG64x2:    *nm = "fneg ";   *ar = "2d";  return;
    724       case ARM64vecu_FNEG32x4:    *nm = "fneg ";   *ar = "4s";  return;
    725       case ARM64vecu_FABS64x2:    *nm = "fabs ";   *ar = "2d";  return;
    726       case ARM64vecu_FABS32x4:    *nm = "fabs ";   *ar = "4s";  return;
    727       case ARM64vecu_NOT:         *nm = "not  ";   *ar = "all"; return;
    728       case ARM64vecu_ABS64x2:     *nm = "abs  ";   *ar = "2d";  return;
    729       case ARM64vecu_ABS32x4:     *nm = "abs  ";   *ar = "4s";  return;
    730       case ARM64vecu_ABS16x8:     *nm = "abs  ";   *ar = "8h";  return;
    731       case ARM64vecu_ABS8x16:     *nm = "abs  ";   *ar = "16b"; return;
    732       case ARM64vecu_CLS32x4:     *nm = "cls  ";   *ar = "4s";  return;
    733       case ARM64vecu_CLS16x8:     *nm = "cls  ";   *ar = "8h";  return;
    734       case ARM64vecu_CLS8x16:     *nm = "cls  ";   *ar = "16b"; return;
    735       case ARM64vecu_CLZ32x4:     *nm = "clz  ";   *ar = "4s";  return;
    736       case ARM64vecu_CLZ16x8:     *nm = "clz  ";   *ar = "8h";  return;
    737       case ARM64vecu_CLZ8x16:     *nm = "clz  ";   *ar = "16b"; return;
    738       case ARM64vecu_CNT8x16:     *nm = "cnt  ";   *ar = "16b"; return;
    739       case ARM64vecu_RBIT:        *nm = "rbit ";   *ar = "16b"; return;
    740       case ARM64vecu_REV1616B:    *nm = "rev16";   *ar = "16b"; return;
    741       case ARM64vecu_REV3216B:    *nm = "rev32";   *ar = "16b"; return;
    742       case ARM64vecu_REV328H:     *nm = "rev32";   *ar = "8h";  return;
    743       case ARM64vecu_REV6416B:    *nm = "rev64";   *ar = "16b"; return;
    744       case ARM64vecu_REV648H:     *nm = "rev64";   *ar = "8h";  return;
    745       case ARM64vecu_REV644S:     *nm = "rev64";   *ar = "4s";  return;
    746       case ARM64vecu_URECPE32x4:  *nm = "urecpe";  *ar = "4s";  return;
    747       case ARM64vecu_URSQRTE32x4: *nm = "ursqrte"; *ar = "4s";  return;
    748       case ARM64vecu_FRECPE64x2:  *nm = "frecpe";  *ar = "2d";  return;
    749       case ARM64vecu_FRECPE32x4:  *nm = "frecpe";  *ar = "4s";  return;
    750       case ARM64vecu_FRSQRTE64x2: *nm = "frsqrte"; *ar = "2d";  return;
    751       case ARM64vecu_FRSQRTE32x4: *nm = "frsqrte"; *ar = "4s";  return;
    752       case ARM64vecu_FSQRT64x2:   *nm = "fsqrt";   *ar = "2d";  return;
    753       case ARM64vecu_FSQRT32x4:   *nm = "fsqrt";   *ar = "4s";  return;
    754       default: vpanic("showARM64VecUnaryOp");
    755    }
    756 }
    757 
    758 static void showARM64VecShiftImmOp(/*OUT*/const HChar** nm,
    759                                    /*OUT*/const HChar** ar,
    760                                    ARM64VecShiftImmOp op )
    761 {
    762    switch (op) {
    763       case ARM64vecshi_USHR64x2:    *nm = "ushr  ";   *ar = "2d";  return;
    764       case ARM64vecshi_USHR32x4:    *nm = "ushr  ";   *ar = "4s";  return;
    765       case ARM64vecshi_USHR16x8:    *nm = "ushr  ";   *ar = "8h";  return;
    766       case ARM64vecshi_USHR8x16:    *nm = "ushr  ";   *ar = "16b"; return;
    767       case ARM64vecshi_SSHR64x2:    *nm = "sshr  ";   *ar = "2d";  return;
    768       case ARM64vecshi_SSHR32x4:    *nm = "sshr  ";   *ar = "4s";  return;
    769       case ARM64vecshi_SSHR16x8:    *nm = "sshr  ";   *ar = "8h";  return;
    770       case ARM64vecshi_SSHR8x16:    *nm = "sshr  ";   *ar = "16b"; return;
    771       case ARM64vecshi_SHL64x2:     *nm = "shl   ";   *ar = "2d";  return;
    772       case ARM64vecshi_SHL32x4:     *nm = "shl   ";   *ar = "4s";  return;
    773       case ARM64vecshi_SHL16x8:     *nm = "shl   ";   *ar = "8h";  return;
    774       case ARM64vecshi_SHL8x16:     *nm = "shl   ";   *ar = "16b"; return;
    775       case ARM64vecshi_SQSHRN2SD:   *nm = "sqshrn";   *ar = "2sd"; return;
    776       case ARM64vecshi_SQSHRN4HS:   *nm = "sqshrn";   *ar = "4hs"; return;
    777       case ARM64vecshi_SQSHRN8BH:   *nm = "sqshrn";   *ar = "8bh"; return;
    778       case ARM64vecshi_UQSHRN2SD:   *nm = "uqshrn";   *ar = "2sd"; return;
    779       case ARM64vecshi_UQSHRN4HS:   *nm = "uqshrn";   *ar = "4hs"; return;
    780       case ARM64vecshi_UQSHRN8BH:   *nm = "uqshrn";   *ar = "8bh"; return;
    781       case ARM64vecshi_SQSHRUN2SD:  *nm = "sqshrun";  *ar = "2sd"; return;
    782       case ARM64vecshi_SQSHRUN4HS:  *nm = "sqshrun";  *ar = "4hs"; return;
    783       case ARM64vecshi_SQSHRUN8BH:  *nm = "sqshrun";  *ar = "8bh"; return;
    784       case ARM64vecshi_SQRSHRN2SD:  *nm = "sqrshrn";  *ar = "2sd"; return;
    785       case ARM64vecshi_SQRSHRN4HS:  *nm = "sqrshrn";  *ar = "4hs"; return;
    786       case ARM64vecshi_SQRSHRN8BH:  *nm = "sqrshrn";  *ar = "8bh"; return;
    787       case ARM64vecshi_UQRSHRN2SD:  *nm = "uqrshrn";  *ar = "2sd"; return;
    788       case ARM64vecshi_UQRSHRN4HS:  *nm = "uqrshrn";  *ar = "4hs"; return;
    789       case ARM64vecshi_UQRSHRN8BH:  *nm = "uqrshrn";  *ar = "8bh"; return;
    790       case ARM64vecshi_SQRSHRUN2SD: *nm = "sqrshrun"; *ar = "2sd"; return;
    791       case ARM64vecshi_SQRSHRUN4HS: *nm = "sqrshrun"; *ar = "4hs"; return;
    792       case ARM64vecshi_SQRSHRUN8BH: *nm = "sqrshrun"; *ar = "8bh"; return;
    793       case ARM64vecshi_UQSHL64x2:   *nm = "uqshl ";   *ar = "2d";  return;
    794       case ARM64vecshi_UQSHL32x4:   *nm = "uqshl ";   *ar = "4s";  return;
    795       case ARM64vecshi_UQSHL16x8:   *nm = "uqshl ";   *ar = "8h";  return;
    796       case ARM64vecshi_UQSHL8x16:   *nm = "uqshl ";   *ar = "16b"; return;
    797       case ARM64vecshi_SQSHL64x2:   *nm = "sqshl ";   *ar = "2d";  return;
    798       case ARM64vecshi_SQSHL32x4:   *nm = "sqshl ";   *ar = "4s";  return;
    799       case ARM64vecshi_SQSHL16x8:   *nm = "sqshl ";   *ar = "8h";  return;
    800       case ARM64vecshi_SQSHL8x16:   *nm = "sqshl ";   *ar = "16b"; return;
    801       case ARM64vecshi_SQSHLU64x2:  *nm = "sqshlu";   *ar = "2d";  return;
    802       case ARM64vecshi_SQSHLU32x4:  *nm = "sqshlu";   *ar = "4s";  return;
    803       case ARM64vecshi_SQSHLU16x8:  *nm = "sqshlu";   *ar = "8h";  return;
    804       case ARM64vecshi_SQSHLU8x16:  *nm = "sqshlu";   *ar = "16b"; return;
    805       default: vpanic("showARM64VecShiftImmOp");
    806    }
    807 }
    808 
    809 static const HChar* showARM64VecNarrowOp(ARM64VecNarrowOp op) {
    810    switch (op) {
    811       case ARM64vecna_XTN:    return "xtn   ";
    812       case ARM64vecna_SQXTN:  return "sqxtn ";
    813       case ARM64vecna_UQXTN:  return "uqxtn ";
    814       case ARM64vecna_SQXTUN: return "sqxtun";
    815       default: vpanic("showARM64VecNarrowOp");
    816    }
    817 }
    818 
    819 ARM64Instr* ARM64Instr_Arith ( HReg dst,
    820                                HReg argL, ARM64RIA* argR, Bool isAdd ) {
    821    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    822    i->tag                 = ARM64in_Arith;
    823    i->ARM64in.Arith.dst   = dst;
    824    i->ARM64in.Arith.argL  = argL;
    825    i->ARM64in.Arith.argR  = argR;
    826    i->ARM64in.Arith.isAdd = isAdd;
    827    return i;
    828 }
    829 ARM64Instr* ARM64Instr_Cmp ( HReg argL, ARM64RIA* argR, Bool is64 ) {
    830    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    831    i->tag              = ARM64in_Cmp;
    832    i->ARM64in.Cmp.argL = argL;
    833    i->ARM64in.Cmp.argR = argR;
    834    i->ARM64in.Cmp.is64 = is64;
    835    return i;
    836 }
    837 ARM64Instr* ARM64Instr_Logic ( HReg dst,
    838                                HReg argL, ARM64RIL* argR, ARM64LogicOp op ) {
    839    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    840    i->tag                 = ARM64in_Logic;
    841    i->ARM64in.Logic.dst   = dst;
    842    i->ARM64in.Logic.argL  = argL;
    843    i->ARM64in.Logic.argR  = argR;
    844    i->ARM64in.Logic.op    = op;
    845    return i;
    846 }
    847 ARM64Instr* ARM64Instr_Test ( HReg argL, ARM64RIL* argR ) {
    848    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    849    i->tag               = ARM64in_Test;
    850    i->ARM64in.Test.argL = argL;
    851    i->ARM64in.Test.argR = argR;
    852    return i;
    853 }
    854 ARM64Instr* ARM64Instr_Shift ( HReg dst,
    855                                HReg argL, ARM64RI6* argR, ARM64ShiftOp op ) {
    856    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    857    i->tag                = ARM64in_Shift;
    858    i->ARM64in.Shift.dst  = dst;
    859    i->ARM64in.Shift.argL = argL;
    860    i->ARM64in.Shift.argR = argR;
    861    i->ARM64in.Shift.op   = op;
    862    return i;
    863 }
    864 ARM64Instr* ARM64Instr_Unary ( HReg dst, HReg src, ARM64UnaryOp op ) {
    865    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    866    i->tag               = ARM64in_Unary;
    867    i->ARM64in.Unary.dst = dst;
    868    i->ARM64in.Unary.src = src;
    869    i->ARM64in.Unary.op  = op;
    870    return i;
    871 }
    872 ARM64Instr* ARM64Instr_MovI ( HReg dst, HReg src ) {
    873    ARM64Instr* i      = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    874    i->tag             = ARM64in_MovI;
    875    i->ARM64in.MovI.dst = dst;
    876    i->ARM64in.MovI.src = src;
    877    vassert(hregClass(src) == HRcInt64);
    878    vassert(hregClass(dst) == HRcInt64);
    879    return i;
    880 }
    881 ARM64Instr* ARM64Instr_Imm64 ( HReg dst, ULong imm64 ) {
    882    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    883    i->tag                 = ARM64in_Imm64;
    884    i->ARM64in.Imm64.dst   = dst;
    885    i->ARM64in.Imm64.imm64 = imm64;
    886    return i;
    887 }
    888 ARM64Instr* ARM64Instr_LdSt64 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
    889    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    890    i->tag                   = ARM64in_LdSt64;
    891    i->ARM64in.LdSt64.isLoad = isLoad;
    892    i->ARM64in.LdSt64.rD     = rD;
    893    i->ARM64in.LdSt64.amode  = amode;
    894    return i;
    895 }
    896 ARM64Instr* ARM64Instr_LdSt32 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
    897    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    898    i->tag                   = ARM64in_LdSt32;
    899    i->ARM64in.LdSt32.isLoad = isLoad;
    900    i->ARM64in.LdSt32.rD     = rD;
    901    i->ARM64in.LdSt32.amode  = amode;
    902    return i;
    903 }
    904 ARM64Instr* ARM64Instr_LdSt16 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
    905    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    906    i->tag                   = ARM64in_LdSt16;
    907    i->ARM64in.LdSt16.isLoad = isLoad;
    908    i->ARM64in.LdSt16.rD     = rD;
    909    i->ARM64in.LdSt16.amode  = amode;
    910    return i;
    911 }
    912 ARM64Instr* ARM64Instr_LdSt8 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
    913    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    914    i->tag                  = ARM64in_LdSt8;
    915    i->ARM64in.LdSt8.isLoad = isLoad;
    916    i->ARM64in.LdSt8.rD     = rD;
    917    i->ARM64in.LdSt8.amode  = amode;
    918    return i;
    919 }
    920 ARM64Instr* ARM64Instr_XDirect ( Addr64 dstGA, ARM64AMode* amPC,
    921                                  ARM64CondCode cond, Bool toFastEP ) {
    922    ARM64Instr* i               = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    923    i->tag                      = ARM64in_XDirect;
    924    i->ARM64in.XDirect.dstGA    = dstGA;
    925    i->ARM64in.XDirect.amPC     = amPC;
    926    i->ARM64in.XDirect.cond     = cond;
    927    i->ARM64in.XDirect.toFastEP = toFastEP;
    928    return i;
    929 }
    930 ARM64Instr* ARM64Instr_XIndir ( HReg dstGA, ARM64AMode* amPC,
    931                                 ARM64CondCode cond ) {
    932    ARM64Instr* i           = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    933    i->tag                  = ARM64in_XIndir;
    934    i->ARM64in.XIndir.dstGA = dstGA;
    935    i->ARM64in.XIndir.amPC  = amPC;
    936    i->ARM64in.XIndir.cond  = cond;
    937    return i;
    938 }
    939 ARM64Instr* ARM64Instr_XAssisted ( HReg dstGA, ARM64AMode* amPC,
    940                                    ARM64CondCode cond, IRJumpKind jk ) {
    941    ARM64Instr* i              = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    942    i->tag                     = ARM64in_XAssisted;
    943    i->ARM64in.XAssisted.dstGA = dstGA;
    944    i->ARM64in.XAssisted.amPC  = amPC;
    945    i->ARM64in.XAssisted.cond  = cond;
    946    i->ARM64in.XAssisted.jk    = jk;
    947    return i;
    948 }
    949 ARM64Instr* ARM64Instr_CSel ( HReg dst, HReg argL, HReg argR,
    950                               ARM64CondCode cond ) {
    951    ARM64Instr* i        = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    952    i->tag               = ARM64in_CSel;
    953    i->ARM64in.CSel.dst  = dst;
    954    i->ARM64in.CSel.argL = argL;
    955    i->ARM64in.CSel.argR = argR;
    956    i->ARM64in.CSel.cond = cond;
    957    return i;
    958 }
    959 ARM64Instr* ARM64Instr_Call ( ARM64CondCode cond, Addr64 target, Int nArgRegs,
    960                               RetLoc rloc ) {
    961    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    962    i->tag                   = ARM64in_Call;
    963    i->ARM64in.Call.cond     = cond;
    964    i->ARM64in.Call.target   = target;
    965    i->ARM64in.Call.nArgRegs = nArgRegs;
    966    i->ARM64in.Call.rloc     = rloc;
    967    vassert(is_sane_RetLoc(rloc));
    968    return i;
    969 }
    970 extern ARM64Instr* ARM64Instr_AddToSP ( Int simm ) {
    971    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    972    i->tag                  = ARM64in_AddToSP;
    973    i->ARM64in.AddToSP.simm = simm;
    974    vassert(-4096 < simm && simm < 4096);
    975    vassert(0 == (simm & 0xF));
    976    return i;
    977 }
    978 extern ARM64Instr* ARM64Instr_FromSP  ( HReg dst ) {
    979    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    980    i->tag                = ARM64in_FromSP;
    981    i->ARM64in.FromSP.dst = dst;
    982    return i;
    983 }
    984 ARM64Instr* ARM64Instr_Mul ( HReg dst, HReg argL, HReg argR,
    985                              ARM64MulOp op ) {
    986    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    987    i->tag              = ARM64in_Mul;
    988    i->ARM64in.Mul.dst  = dst;
    989    i->ARM64in.Mul.argL = argL;
    990    i->ARM64in.Mul.argR = argR;
    991    i->ARM64in.Mul.op   = op;
    992    return i;
    993 }
    994 ARM64Instr* ARM64Instr_LdrEX ( Int szB ) {
    995    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
    996    i->tag               = ARM64in_LdrEX;
    997    i->ARM64in.LdrEX.szB = szB;
    998    vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
    999    return i;
   1000 }
   1001 ARM64Instr* ARM64Instr_StrEX ( Int szB ) {
   1002    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1003    i->tag               = ARM64in_StrEX;
   1004    i->ARM64in.StrEX.szB = szB;
   1005    vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
   1006    return i;
   1007 }
   1008 ARM64Instr* ARM64Instr_MFence ( void ) {
   1009    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1010    i->tag        = ARM64in_MFence;
   1011    return i;
   1012 }
   1013 ARM64Instr* ARM64Instr_VLdStH ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) {
   1014    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1015    i->tag                   = ARM64in_VLdStH;
   1016    i->ARM64in.VLdStH.isLoad = isLoad;
   1017    i->ARM64in.VLdStH.hD     = sD;
   1018    i->ARM64in.VLdStH.rN     = rN;
   1019    i->ARM64in.VLdStH.uimm12 = uimm12;
   1020    vassert(uimm12 < 8192 && 0 == (uimm12 & 1));
   1021    return i;
   1022 }
   1023 ARM64Instr* ARM64Instr_VLdStS ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) {
   1024    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1025    i->tag                   = ARM64in_VLdStS;
   1026    i->ARM64in.VLdStS.isLoad = isLoad;
   1027    i->ARM64in.VLdStS.sD     = sD;
   1028    i->ARM64in.VLdStS.rN     = rN;
   1029    i->ARM64in.VLdStS.uimm12 = uimm12;
   1030    vassert(uimm12 < 16384 && 0 == (uimm12 & 3));
   1031    return i;
   1032 }
   1033 ARM64Instr* ARM64Instr_VLdStD ( Bool isLoad, HReg dD, HReg rN, UInt uimm12 ) {
   1034    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1035    i->tag                   = ARM64in_VLdStD;
   1036    i->ARM64in.VLdStD.isLoad = isLoad;
   1037    i->ARM64in.VLdStD.dD     = dD;
   1038    i->ARM64in.VLdStD.rN     = rN;
   1039    i->ARM64in.VLdStD.uimm12 = uimm12;
   1040    vassert(uimm12 < 32768 && 0 == (uimm12 & 7));
   1041    return i;
   1042 }
   1043 ARM64Instr* ARM64Instr_VLdStQ ( Bool isLoad, HReg rQ, HReg rN ) {
   1044    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1045    i->tag                   = ARM64in_VLdStQ;
   1046    i->ARM64in.VLdStQ.isLoad = isLoad;
   1047    i->ARM64in.VLdStQ.rQ     = rQ;
   1048    i->ARM64in.VLdStQ.rN     = rN;
   1049    return i;
   1050 }
   1051 ARM64Instr* ARM64Instr_VCvtI2F ( ARM64CvtOp how, HReg rD, HReg rS ) {
   1052    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1053    i->tag                 = ARM64in_VCvtI2F;
   1054    i->ARM64in.VCvtI2F.how = how;
   1055    i->ARM64in.VCvtI2F.rD  = rD;
   1056    i->ARM64in.VCvtI2F.rS  = rS;
   1057    return i;
   1058 }
   1059 ARM64Instr* ARM64Instr_VCvtF2I ( ARM64CvtOp how, HReg rD, HReg rS,
   1060                                  UChar armRM ) {
   1061    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1062    i->tag                   = ARM64in_VCvtF2I;
   1063    i->ARM64in.VCvtF2I.how   = how;
   1064    i->ARM64in.VCvtF2I.rD    = rD;
   1065    i->ARM64in.VCvtF2I.rS    = rS;
   1066    i->ARM64in.VCvtF2I.armRM = armRM;
   1067    vassert(armRM <= 3);
   1068    return i;
   1069 }
   1070 ARM64Instr* ARM64Instr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
   1071    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1072    i->tag                 = ARM64in_VCvtSD;
   1073    i->ARM64in.VCvtSD.sToD = sToD;
   1074    i->ARM64in.VCvtSD.dst  = dst;
   1075    i->ARM64in.VCvtSD.src  = src;
   1076    return i;
   1077 }
   1078 ARM64Instr* ARM64Instr_VCvtHS ( Bool hToS, HReg dst, HReg src ) {
   1079    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1080    i->tag                 = ARM64in_VCvtHS;
   1081    i->ARM64in.VCvtHS.hToS = hToS;
   1082    i->ARM64in.VCvtHS.dst  = dst;
   1083    i->ARM64in.VCvtHS.src  = src;
   1084    return i;
   1085 }
   1086 ARM64Instr* ARM64Instr_VCvtHD ( Bool hToD, HReg dst, HReg src ) {
   1087    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1088    i->tag                 = ARM64in_VCvtHD;
   1089    i->ARM64in.VCvtHD.hToD = hToD;
   1090    i->ARM64in.VCvtHD.dst  = dst;
   1091    i->ARM64in.VCvtHD.src  = src;
   1092    return i;
   1093 }
   1094 ARM64Instr* ARM64Instr_VUnaryD ( ARM64FpUnaryOp op, HReg dst, HReg src ) {
   1095    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1096    i->tag                 = ARM64in_VUnaryD;
   1097    i->ARM64in.VUnaryD.op  = op;
   1098    i->ARM64in.VUnaryD.dst = dst;
   1099    i->ARM64in.VUnaryD.src = src;
   1100    return i;
   1101 }
   1102 ARM64Instr* ARM64Instr_VUnaryS ( ARM64FpUnaryOp op, HReg dst, HReg src ) {
   1103    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1104    i->tag                 = ARM64in_VUnaryS;
   1105    i->ARM64in.VUnaryS.op  = op;
   1106    i->ARM64in.VUnaryS.dst = dst;
   1107    i->ARM64in.VUnaryS.src = src;
   1108    return i;
   1109 }
   1110 ARM64Instr* ARM64Instr_VBinD ( ARM64FpBinOp op,
   1111                                HReg dst, HReg argL, HReg argR ) {
   1112    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1113    i->tag                = ARM64in_VBinD;
   1114    i->ARM64in.VBinD.op   = op;
   1115    i->ARM64in.VBinD.dst  = dst;
   1116    i->ARM64in.VBinD.argL = argL;
   1117    i->ARM64in.VBinD.argR = argR;
   1118    return i;
   1119 }
   1120 ARM64Instr* ARM64Instr_VBinS ( ARM64FpBinOp op,
   1121                                HReg dst, HReg argL, HReg argR ) {
   1122    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1123    i->tag                = ARM64in_VBinS;
   1124    i->ARM64in.VBinS.op   = op;
   1125    i->ARM64in.VBinS.dst  = dst;
   1126    i->ARM64in.VBinS.argL = argL;
   1127    i->ARM64in.VBinS.argR = argR;
   1128    return i;
   1129 }
   1130 ARM64Instr* ARM64Instr_VCmpD ( HReg argL, HReg argR ) {
   1131    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1132    i->tag                = ARM64in_VCmpD;
   1133    i->ARM64in.VCmpD.argL = argL;
   1134    i->ARM64in.VCmpD.argR = argR;
   1135    return i;
   1136 }
   1137 ARM64Instr* ARM64Instr_VCmpS ( HReg argL, HReg argR ) {
   1138    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1139    i->tag                = ARM64in_VCmpS;
   1140    i->ARM64in.VCmpS.argL = argL;
   1141    i->ARM64in.VCmpS.argR = argR;
   1142    return i;
   1143 }
   1144 ARM64Instr* ARM64Instr_VFCSel ( HReg dst, HReg argL, HReg argR,
   1145                                 ARM64CondCode cond, Bool isD ) {
   1146    ARM64Instr* i          = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1147    i->tag                 = ARM64in_VFCSel;
   1148    i->ARM64in.VFCSel.dst  = dst;
   1149    i->ARM64in.VFCSel.argL = argL;
   1150    i->ARM64in.VFCSel.argR = argR;
   1151    i->ARM64in.VFCSel.cond = cond;
   1152    i->ARM64in.VFCSel.isD  = isD;
   1153    return i;
   1154 }
   1155 ARM64Instr* ARM64Instr_FPCR ( Bool toFPCR, HReg iReg ) {
   1156    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1157    i->tag                 = ARM64in_FPCR;
   1158    i->ARM64in.FPCR.toFPCR = toFPCR;
   1159    i->ARM64in.FPCR.iReg   = iReg;
   1160    return i;
   1161 }
   1162 ARM64Instr* ARM64Instr_FPSR ( Bool toFPSR, HReg iReg ) {
   1163    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1164    i->tag                 = ARM64in_FPSR;
   1165    i->ARM64in.FPSR.toFPSR = toFPSR;
   1166    i->ARM64in.FPSR.iReg   = iReg;
   1167    return i;
   1168 }
   1169 ARM64Instr* ARM64Instr_VBinV ( ARM64VecBinOp op,
   1170                                HReg dst, HReg argL, HReg argR ) {
   1171    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1172    i->tag                = ARM64in_VBinV;
   1173    i->ARM64in.VBinV.op   = op;
   1174    i->ARM64in.VBinV.dst  = dst;
   1175    i->ARM64in.VBinV.argL = argL;
   1176    i->ARM64in.VBinV.argR = argR;
   1177    return i;
   1178 }
   1179 ARM64Instr* ARM64Instr_VModifyV ( ARM64VecModifyOp op, HReg mod, HReg arg ) {
   1180    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1181    i->tag                  = ARM64in_VModifyV;
   1182    i->ARM64in.VModifyV.op  = op;
   1183    i->ARM64in.VModifyV.mod = mod;
   1184    i->ARM64in.VModifyV.arg = arg;
   1185    return i;
   1186 }
   1187 ARM64Instr* ARM64Instr_VUnaryV ( ARM64VecUnaryOp op, HReg dst, HReg arg ) {
   1188    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1189    i->tag                 = ARM64in_VUnaryV;
   1190    i->ARM64in.VUnaryV.op  = op;
   1191    i->ARM64in.VUnaryV.dst = dst;
   1192    i->ARM64in.VUnaryV.arg = arg;
   1193    return i;
   1194 }
   1195 ARM64Instr* ARM64Instr_VNarrowV ( ARM64VecNarrowOp op,
   1196                                   UInt dszBlg2, HReg dst, HReg src ) {
   1197    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1198    i->tag                      = ARM64in_VNarrowV;
   1199    i->ARM64in.VNarrowV.op      = op;
   1200    i->ARM64in.VNarrowV.dszBlg2 = dszBlg2;
   1201    i->ARM64in.VNarrowV.dst     = dst;
   1202    i->ARM64in.VNarrowV.src     = src;
   1203    vassert(dszBlg2 == 0 || dszBlg2 == 1 || dszBlg2 == 2);
   1204    return i;
   1205 }
   1206 ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftImmOp op,
   1207                                     HReg dst, HReg src, UInt amt ) {
   1208    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1209    i->tag                    = ARM64in_VShiftImmV;
   1210    i->ARM64in.VShiftImmV.op  = op;
   1211    i->ARM64in.VShiftImmV.dst = dst;
   1212    i->ARM64in.VShiftImmV.src = src;
   1213    i->ARM64in.VShiftImmV.amt = amt;
   1214    UInt minSh = 0;
   1215    UInt maxSh = 0;
   1216    switch (op) {
   1217       /* For right shifts, the allowed shift amounts are 1 .. lane_size.
   1218          For left shifts,  the allowed shift amounts are 0 .. lane_size-1.
   1219       */
   1220       case ARM64vecshi_USHR64x2: case ARM64vecshi_SSHR64x2:
   1221       case ARM64vecshi_UQSHRN2SD: case ARM64vecshi_SQSHRN2SD:
   1222       case ARM64vecshi_SQSHRUN2SD:
   1223       case ARM64vecshi_UQRSHRN2SD: case ARM64vecshi_SQRSHRN2SD:
   1224       case ARM64vecshi_SQRSHRUN2SD:
   1225          minSh = 1; maxSh = 64; break;
   1226       case ARM64vecshi_SHL64x2:
   1227       case ARM64vecshi_UQSHL64x2: case ARM64vecshi_SQSHL64x2:
   1228       case ARM64vecshi_SQSHLU64x2:
   1229          minSh = 0; maxSh = 63; break;
   1230       case ARM64vecshi_USHR32x4: case ARM64vecshi_SSHR32x4:
   1231       case ARM64vecshi_UQSHRN4HS: case ARM64vecshi_SQSHRN4HS:
   1232       case ARM64vecshi_SQSHRUN4HS:
   1233       case ARM64vecshi_UQRSHRN4HS: case ARM64vecshi_SQRSHRN4HS:
   1234       case ARM64vecshi_SQRSHRUN4HS:
   1235          minSh = 1; maxSh = 32; break;
   1236       case ARM64vecshi_SHL32x4:
   1237       case ARM64vecshi_UQSHL32x4: case ARM64vecshi_SQSHL32x4:
   1238       case ARM64vecshi_SQSHLU32x4:
   1239          minSh = 0; maxSh = 31; break;
   1240       case ARM64vecshi_USHR16x8: case ARM64vecshi_SSHR16x8:
   1241       case ARM64vecshi_UQSHRN8BH: case ARM64vecshi_SQSHRN8BH:
   1242       case ARM64vecshi_SQSHRUN8BH:
   1243       case ARM64vecshi_UQRSHRN8BH: case ARM64vecshi_SQRSHRN8BH:
   1244       case ARM64vecshi_SQRSHRUN8BH:
   1245          minSh = 1; maxSh = 16; break;
   1246       case ARM64vecshi_SHL16x8:
   1247       case ARM64vecshi_UQSHL16x8: case ARM64vecshi_SQSHL16x8:
   1248       case ARM64vecshi_SQSHLU16x8:
   1249          minSh = 0; maxSh = 15; break;
   1250       case ARM64vecshi_USHR8x16: case ARM64vecshi_SSHR8x16:
   1251          minSh = 1; maxSh = 8; break;
   1252       case ARM64vecshi_SHL8x16:
   1253       case ARM64vecshi_UQSHL8x16: case ARM64vecshi_SQSHL8x16:
   1254       case ARM64vecshi_SQSHLU8x16:
   1255          minSh = 0; maxSh = 7; break;
   1256       default:
   1257          vassert(0);
   1258    }
   1259    vassert(maxSh > 0);
   1260    vassert(amt >= minSh && amt <= maxSh);
   1261    return i;
   1262 }
   1263 ARM64Instr* ARM64Instr_VExtV ( HReg dst, HReg srcLo, HReg srcHi, UInt amtB ) {
   1264    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1265    i->tag                 = ARM64in_VExtV;
   1266    i->ARM64in.VExtV.dst   = dst;
   1267    i->ARM64in.VExtV.srcLo = srcLo;
   1268    i->ARM64in.VExtV.srcHi = srcHi;
   1269    i->ARM64in.VExtV.amtB  = amtB;
   1270    vassert(amtB >= 1 && amtB <= 15);
   1271    return i;
   1272 }
   1273 ARM64Instr* ARM64Instr_VImmQ (HReg rQ, UShort imm) {
   1274    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1275    i->tag               = ARM64in_VImmQ;
   1276    i->ARM64in.VImmQ.rQ  = rQ;
   1277    i->ARM64in.VImmQ.imm = imm;
   1278    /* Check that this is something that can actually be emitted. */
   1279    switch (imm) {
   1280       case 0x0000: case 0x0001: case 0x0003:
   1281       case 0x000F: case 0x003F: case 0x00FF: case 0xFFFF:
   1282          break;
   1283       default:
   1284          vassert(0);
   1285    }
   1286    return i;
   1287 }
   1288 ARM64Instr* ARM64Instr_VDfromX ( HReg rD, HReg rX ) {
   1289    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1290    i->tag                = ARM64in_VDfromX;
   1291    i->ARM64in.VDfromX.rD = rD;
   1292    i->ARM64in.VDfromX.rX = rX;
   1293    return i;
   1294 }
   1295 ARM64Instr* ARM64Instr_VQfromX ( HReg rQ, HReg rXlo ) {
   1296    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1297    i->tag                  = ARM64in_VQfromX;
   1298    i->ARM64in.VQfromX.rQ   = rQ;
   1299    i->ARM64in.VQfromX.rXlo = rXlo;
   1300    return i;
   1301 }
   1302 ARM64Instr* ARM64Instr_VQfromXX ( HReg rQ, HReg rXhi, HReg rXlo ) {
   1303    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1304    i->tag                   = ARM64in_VQfromXX;
   1305    i->ARM64in.VQfromXX.rQ   = rQ;
   1306    i->ARM64in.VQfromXX.rXhi = rXhi;
   1307    i->ARM64in.VQfromXX.rXlo = rXlo;
   1308    return i;
   1309 }
   1310 ARM64Instr* ARM64Instr_VXfromQ ( HReg rX, HReg rQ, UInt laneNo ) {
   1311    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1312    i->tag                    = ARM64in_VXfromQ;
   1313    i->ARM64in.VXfromQ.rX     = rX;
   1314    i->ARM64in.VXfromQ.rQ     = rQ;
   1315    i->ARM64in.VXfromQ.laneNo = laneNo;
   1316    vassert(laneNo <= 1);
   1317    return i;
   1318 }
   1319 ARM64Instr* ARM64Instr_VXfromDorS ( HReg rX, HReg rDorS, Bool fromD ) {
   1320    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1321    i->tag                      = ARM64in_VXfromDorS;
   1322    i->ARM64in.VXfromDorS.rX    = rX;
   1323    i->ARM64in.VXfromDorS.rDorS = rDorS;
   1324    i->ARM64in.VXfromDorS.fromD = fromD;
   1325    return i;
   1326 }
   1327 ARM64Instr* ARM64Instr_VMov ( UInt szB, HReg dst, HReg src ) {
   1328    ARM64Instr* i       = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1329    i->tag              = ARM64in_VMov;
   1330    i->ARM64in.VMov.szB = szB;
   1331    i->ARM64in.VMov.dst = dst;
   1332    i->ARM64in.VMov.src = src;
   1333    switch (szB) {
   1334       case 16:
   1335         vassert(hregClass(src) == HRcVec128);
   1336         vassert(hregClass(dst) == HRcVec128);
   1337         break;
   1338       case 8:
   1339         vassert(hregClass(src) == HRcFlt64);
   1340         vassert(hregClass(dst) == HRcFlt64);
   1341         break;
   1342       default:
   1343         vpanic("ARM64Instr_VMov");
   1344    }
   1345    return i;
   1346 }
   1347 ARM64Instr* ARM64Instr_EvCheck ( ARM64AMode* amCounter,
   1348                                  ARM64AMode* amFailAddr ) {
   1349    ARM64Instr* i                 = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1350    i->tag                        = ARM64in_EvCheck;
   1351    i->ARM64in.EvCheck.amCounter  = amCounter;
   1352    i->ARM64in.EvCheck.amFailAddr = amFailAddr;
   1353    return i;
   1354 }
   1355 ARM64Instr* ARM64Instr_ProfInc ( void ) {
   1356    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
   1357    i->tag        = ARM64in_ProfInc;
   1358    return i;
   1359 }
   1360 
   1361 /* ... */
   1362 
   1363 void ppARM64Instr ( const ARM64Instr* i ) {
   1364    switch (i->tag) {
   1365       case ARM64in_Arith:
   1366          vex_printf("%s    ", i->ARM64in.Arith.isAdd ? "add" : "sub");
   1367          ppHRegARM64(i->ARM64in.Arith.dst);
   1368          vex_printf(", ");
   1369          ppHRegARM64(i->ARM64in.Arith.argL);
   1370          vex_printf(", ");
   1371          ppARM64RIA(i->ARM64in.Arith.argR);
   1372          return;
   1373       case ARM64in_Cmp:
   1374          vex_printf("cmp%s ", i->ARM64in.Cmp.is64 ? "   " : "(w)" );
   1375          ppHRegARM64(i->ARM64in.Cmp.argL);
   1376          vex_printf(", ");
   1377          ppARM64RIA(i->ARM64in.Cmp.argR);
   1378          return;
   1379       case ARM64in_Logic:
   1380          vex_printf("%s    ", showARM64LogicOp(i->ARM64in.Logic.op));
   1381          ppHRegARM64(i->ARM64in.Logic.dst);
   1382          vex_printf(", ");
   1383          ppHRegARM64(i->ARM64in.Logic.argL);
   1384          vex_printf(", ");
   1385          ppARM64RIL(i->ARM64in.Logic.argR);
   1386          return;
   1387       case ARM64in_Test:
   1388          vex_printf("tst    ");
   1389          ppHRegARM64(i->ARM64in.Test.argL);
   1390          vex_printf(", ");
   1391          ppARM64RIL(i->ARM64in.Test.argR);
   1392          return;
   1393       case ARM64in_Shift:
   1394          vex_printf("%s    ", showARM64ShiftOp(i->ARM64in.Shift.op));
   1395          ppHRegARM64(i->ARM64in.Shift.dst);
   1396          vex_printf(", ");
   1397          ppHRegARM64(i->ARM64in.Shift.argL);
   1398          vex_printf(", ");
   1399          ppARM64RI6(i->ARM64in.Shift.argR);
   1400          return;
   1401       case ARM64in_Unary:
   1402          vex_printf("%s    ", showARM64UnaryOp(i->ARM64in.Unary.op));
   1403          ppHRegARM64(i->ARM64in.Unary.dst);
   1404          vex_printf(", ");
   1405          ppHRegARM64(i->ARM64in.Unary.src);
   1406          return;
   1407       case ARM64in_MovI:
   1408          vex_printf("mov    ");
   1409          ppHRegARM64(i->ARM64in.MovI.dst);
   1410          vex_printf(", ");
   1411          ppHRegARM64(i->ARM64in.MovI.src);
   1412          return;
   1413       case ARM64in_Imm64:
   1414          vex_printf("imm64  ");
   1415          ppHRegARM64(i->ARM64in.Imm64.dst);
   1416          vex_printf(", 0x%llx", i->ARM64in.Imm64.imm64);
   1417          return;
   1418       case ARM64in_LdSt64:
   1419          if (i->ARM64in.LdSt64.isLoad) {
   1420             vex_printf("ldr    ");
   1421             ppHRegARM64(i->ARM64in.LdSt64.rD);
   1422             vex_printf(", ");
   1423             ppARM64AMode(i->ARM64in.LdSt64.amode);
   1424          } else {
   1425             vex_printf("str    ");
   1426             ppARM64AMode(i->ARM64in.LdSt64.amode);
   1427             vex_printf(", ");
   1428             ppHRegARM64(i->ARM64in.LdSt64.rD);
   1429          }
   1430          return;
   1431       case ARM64in_LdSt32:
   1432          if (i->ARM64in.LdSt32.isLoad) {
   1433             vex_printf("ldruw  ");
   1434             ppHRegARM64(i->ARM64in.LdSt32.rD);
   1435             vex_printf(", ");
   1436             ppARM64AMode(i->ARM64in.LdSt32.amode);
   1437          } else {
   1438             vex_printf("strw   ");
   1439             ppARM64AMode(i->ARM64in.LdSt32.amode);
   1440             vex_printf(", ");
   1441             ppHRegARM64(i->ARM64in.LdSt32.rD);
   1442          }
   1443          return;
   1444       case ARM64in_LdSt16:
   1445          if (i->ARM64in.LdSt16.isLoad) {
   1446             vex_printf("ldruh  ");
   1447             ppHRegARM64(i->ARM64in.LdSt16.rD);
   1448             vex_printf(", ");
   1449             ppARM64AMode(i->ARM64in.LdSt16.amode);
   1450          } else {
   1451             vex_printf("strh   ");
   1452             ppARM64AMode(i->ARM64in.LdSt16.amode);
   1453             vex_printf(", ");
   1454             ppHRegARM64(i->ARM64in.LdSt16.rD);
   1455          }
   1456          return;
   1457       case ARM64in_LdSt8:
   1458          if (i->ARM64in.LdSt8.isLoad) {
   1459             vex_printf("ldrub  ");
   1460             ppHRegARM64(i->ARM64in.LdSt8.rD);
   1461             vex_printf(", ");
   1462             ppARM64AMode(i->ARM64in.LdSt8.amode);
   1463          } else {
   1464             vex_printf("strb   ");
   1465             ppARM64AMode(i->ARM64in.LdSt8.amode);
   1466             vex_printf(", ");
   1467             ppHRegARM64(i->ARM64in.LdSt8.rD);
   1468          }
   1469          return;
   1470       case ARM64in_XDirect:
   1471          vex_printf("(xDirect) ");
   1472          vex_printf("if (%%pstate.%s) { ",
   1473                     showARM64CondCode(i->ARM64in.XDirect.cond));
   1474          vex_printf("imm64 x9,0x%llx; ", i->ARM64in.XDirect.dstGA);
   1475          vex_printf("str x9,");
   1476          ppARM64AMode(i->ARM64in.XDirect.amPC);
   1477          vex_printf("; imm64-exactly4 x9,$disp_cp_chain_me_to_%sEP; ",
   1478                     i->ARM64in.XDirect.toFastEP ? "fast" : "slow");
   1479          vex_printf("blr x9 }");
   1480          return;
   1481       case ARM64in_XIndir:
   1482          vex_printf("(xIndir) ");
   1483          vex_printf("if (%%pstate.%s) { ",
   1484                     showARM64CondCode(i->ARM64in.XIndir.cond));
   1485          vex_printf("str ");
   1486          ppHRegARM64(i->ARM64in.XIndir.dstGA);
   1487          vex_printf(",");
   1488          ppARM64AMode(i->ARM64in.XIndir.amPC);
   1489          vex_printf("; imm64 x9,$disp_cp_xindir; ");
   1490          vex_printf("br x9 }");
   1491          return;
   1492       case ARM64in_XAssisted:
   1493          vex_printf("(xAssisted) ");
   1494          vex_printf("if (%%pstate.%s) { ",
   1495                     showARM64CondCode(i->ARM64in.XAssisted.cond));
   1496          vex_printf("str ");
   1497          ppHRegARM64(i->ARM64in.XAssisted.dstGA);
   1498          vex_printf(",");
   1499          ppARM64AMode(i->ARM64in.XAssisted.amPC);
   1500          vex_printf("; movw x21,$IRJumpKind_to_TRCVAL(%d); ",
   1501                     (Int)i->ARM64in.XAssisted.jk);
   1502          vex_printf("imm64 x9,$disp_cp_xassisted; ");
   1503          vex_printf("br x9 }");
   1504          return;
   1505       case ARM64in_CSel:
   1506          vex_printf("csel   ");
   1507          ppHRegARM64(i->ARM64in.CSel.dst);
   1508          vex_printf(", ");
   1509          ppHRegARM64(i->ARM64in.CSel.argL);
   1510          vex_printf(", ");
   1511          ppHRegARM64(i->ARM64in.CSel.argR);
   1512          vex_printf(", %s", showARM64CondCode(i->ARM64in.CSel.cond));
   1513          return;
   1514       case ARM64in_Call:
   1515          vex_printf("call%s ",
   1516                     i->ARM64in.Call.cond==ARM64cc_AL
   1517                        ? "  " : showARM64CondCode(i->ARM64in.Call.cond));
   1518          vex_printf("0x%llx [nArgRegs=%d, ",
   1519                     i->ARM64in.Call.target, i->ARM64in.Call.nArgRegs);
   1520          ppRetLoc(i->ARM64in.Call.rloc);
   1521          vex_printf("]");
   1522          return;
   1523       case ARM64in_AddToSP: {
   1524          Int simm = i->ARM64in.AddToSP.simm;
   1525          vex_printf("%s    xsp, xsp, #%d", simm < 0 ? "sub" : "add",
   1526                                            simm < 0 ? -simm : simm);
   1527          return;
   1528       }
   1529       case ARM64in_FromSP:
   1530          vex_printf("mov    ");
   1531          ppHRegARM64(i->ARM64in.FromSP.dst);
   1532          vex_printf(", xsp");
   1533          return;
   1534       case ARM64in_Mul:
   1535          vex_printf("%s  ", showARM64MulOp(i->ARM64in.Mul.op));
   1536          ppHRegARM64(i->ARM64in.Mul.dst);
   1537          vex_printf(", ");
   1538          ppHRegARM64(i->ARM64in.Mul.argL);
   1539          vex_printf(", ");
   1540          ppHRegARM64(i->ARM64in.Mul.argR);
   1541          return;
   1542 
   1543       case ARM64in_LdrEX: {
   1544          const HChar* sz = " ";
   1545          switch (i->ARM64in.LdrEX.szB) {
   1546             case 1: sz = "b"; break;
   1547             case 2: sz = "h"; break;
   1548             case 4: case 8: break;
   1549             default: vassert(0);
   1550          }
   1551          vex_printf("ldxr%s  %c2, [x4]",
   1552                     sz, i->ARM64in.LdrEX.szB == 8 ? 'x' : 'w');
   1553          return;
   1554       }
   1555       case ARM64in_StrEX: {
   1556          const HChar* sz = " ";
   1557          switch (i->ARM64in.StrEX.szB) {
   1558             case 1: sz = "b"; break;
   1559             case 2: sz = "h"; break;
   1560             case 4: case 8: break;
   1561             default: vassert(0);
   1562          }
   1563          vex_printf("stxr%s  w0, %c2, [x4]",
   1564                     sz, i->ARM64in.StrEX.szB == 8 ? 'x' : 'w');
   1565          return;
   1566       }
   1567       case ARM64in_MFence:
   1568          vex_printf("(mfence) dsb sy; dmb sy; isb");
   1569          return;
   1570       case ARM64in_VLdStH:
   1571          if (i->ARM64in.VLdStH.isLoad) {
   1572             vex_printf("ldr    ");
   1573             ppHRegARM64asHreg(i->ARM64in.VLdStH.hD);
   1574             vex_printf(", %u(", i->ARM64in.VLdStH.uimm12);
   1575             ppHRegARM64(i->ARM64in.VLdStH.rN);
   1576             vex_printf(")");
   1577          } else {
   1578             vex_printf("str    ");
   1579             vex_printf("%u(", i->ARM64in.VLdStH.uimm12);
   1580             ppHRegARM64(i->ARM64in.VLdStH.rN);
   1581             vex_printf("), ");
   1582             ppHRegARM64asHreg(i->ARM64in.VLdStH.hD);
   1583          }
   1584          return;
   1585       case ARM64in_VLdStS:
   1586          if (i->ARM64in.VLdStS.isLoad) {
   1587             vex_printf("ldr    ");
   1588             ppHRegARM64asSreg(i->ARM64in.VLdStS.sD);
   1589             vex_printf(", %u(", i->ARM64in.VLdStS.uimm12);
   1590             ppHRegARM64(i->ARM64in.VLdStS.rN);
   1591             vex_printf(")");
   1592          } else {
   1593             vex_printf("str    ");
   1594             vex_printf("%u(", i->ARM64in.VLdStS.uimm12);
   1595             ppHRegARM64(i->ARM64in.VLdStS.rN);
   1596             vex_printf("), ");
   1597             ppHRegARM64asSreg(i->ARM64in.VLdStS.sD);
   1598          }
   1599          return;
   1600       case ARM64in_VLdStD:
   1601          if (i->ARM64in.VLdStD.isLoad) {
   1602             vex_printf("ldr    ");
   1603             ppHRegARM64(i->ARM64in.VLdStD.dD);
   1604             vex_printf(", %u(", i->ARM64in.VLdStD.uimm12);
   1605             ppHRegARM64(i->ARM64in.VLdStD.rN);
   1606             vex_printf(")");
   1607          } else {
   1608             vex_printf("str    ");
   1609             vex_printf("%u(", i->ARM64in.VLdStD.uimm12);
   1610             ppHRegARM64(i->ARM64in.VLdStD.rN);
   1611             vex_printf("), ");
   1612             ppHRegARM64(i->ARM64in.VLdStD.dD);
   1613          }
   1614          return;
   1615       case ARM64in_VLdStQ:
   1616          if (i->ARM64in.VLdStQ.isLoad)
   1617             vex_printf("ld1.2d {");
   1618          else
   1619             vex_printf("st1.2d {");
   1620          ppHRegARM64(i->ARM64in.VLdStQ.rQ);
   1621          vex_printf("}, [");
   1622          ppHRegARM64(i->ARM64in.VLdStQ.rN);
   1623          vex_printf("]");
   1624          return;
   1625       case ARM64in_VCvtI2F: {
   1626          HChar syn  = '?';
   1627          UInt  fszB = 0;
   1628          UInt  iszB = 0;
   1629          characteriseARM64CvtOp(&syn, &fszB, &iszB, i->ARM64in.VCvtI2F.how);
   1630          vex_printf("%ccvtf  ", syn);
   1631          ppHRegARM64(i->ARM64in.VCvtI2F.rD);
   1632          vex_printf("(%c-reg), ", fszB == 4 ? 'S' : 'D');
   1633          ppHRegARM64(i->ARM64in.VCvtI2F.rS);
   1634          vex_printf("(%c-reg)", iszB == 4 ? 'W' : 'X');
   1635          return;
   1636       }
   1637       case ARM64in_VCvtF2I: {
   1638          HChar syn  = '?';
   1639          UInt  fszB = 0;
   1640          UInt  iszB = 0;
   1641          HChar rmo  = '?';
   1642          characteriseARM64CvtOp(&syn, &fszB, &iszB, i->ARM64in.VCvtF2I.how);
   1643          UChar armRM = i->ARM64in.VCvtF2I.armRM;
   1644          if (armRM < 4) rmo = "npmz"[armRM];
   1645          vex_printf("fcvt%c%c ", rmo, syn);
   1646          ppHRegARM64(i->ARM64in.VCvtF2I.rD);
   1647          vex_printf("(%c-reg), ", iszB == 4 ? 'W' : 'X');
   1648          ppHRegARM64(i->ARM64in.VCvtF2I.rS);
   1649          vex_printf("(%c-reg)", fszB == 4 ? 'S' : 'D');
   1650          return;
   1651       }
   1652       case ARM64in_VCvtSD:
   1653          vex_printf("fcvt%s ", i->ARM64in.VCvtSD.sToD ? "s2d" : "d2s");
   1654          if (i->ARM64in.VCvtSD.sToD) {
   1655             ppHRegARM64(i->ARM64in.VCvtSD.dst);
   1656             vex_printf(", ");
   1657             ppHRegARM64asSreg(i->ARM64in.VCvtSD.src);
   1658          } else {
   1659             ppHRegARM64asSreg(i->ARM64in.VCvtSD.dst);
   1660             vex_printf(", ");
   1661             ppHRegARM64(i->ARM64in.VCvtSD.src);
   1662          }
   1663          return;
   1664       case ARM64in_VCvtHS:
   1665          vex_printf("fcvt%s ", i->ARM64in.VCvtHS.hToS ? "h2s" : "s2h");
   1666          if (i->ARM64in.VCvtHS.hToS) {
   1667             ppHRegARM64asSreg(i->ARM64in.VCvtHS.dst);
   1668             vex_printf(", ");
   1669             ppHRegARM64asHreg(i->ARM64in.VCvtHS.src);
   1670          } else {
   1671             ppHRegARM64asHreg(i->ARM64in.VCvtHS.dst);
   1672             vex_printf(", ");
   1673             ppHRegARM64asSreg(i->ARM64in.VCvtHS.src);
   1674          }
   1675          return;
   1676       case ARM64in_VCvtHD:
   1677          vex_printf("fcvt%s ", i->ARM64in.VCvtHD.hToD ? "h2d" : "d2h");
   1678          if (i->ARM64in.VCvtHD.hToD) {
   1679             ppHRegARM64(i->ARM64in.VCvtHD.dst);
   1680             vex_printf(", ");
   1681             ppHRegARM64asHreg(i->ARM64in.VCvtHD.src);
   1682          } else {
   1683             ppHRegARM64asHreg(i->ARM64in.VCvtHD.dst);
   1684             vex_printf(", ");
   1685             ppHRegARM64(i->ARM64in.VCvtHD.src);
   1686          }
   1687          return;
   1688       case ARM64in_VUnaryD:
   1689          vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryD.op));
   1690          ppHRegARM64(i->ARM64in.VUnaryD.dst);
   1691          vex_printf(", ");
   1692          ppHRegARM64(i->ARM64in.VUnaryD.src);
   1693          return;
   1694       case ARM64in_VUnaryS:
   1695          vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryS.op));
   1696          ppHRegARM64asSreg(i->ARM64in.VUnaryS.dst);
   1697          vex_printf(", ");
   1698          ppHRegARM64asSreg(i->ARM64in.VUnaryS.src);
   1699          return;
   1700       case ARM64in_VBinD:
   1701          vex_printf("f%s   ", showARM64FpBinOp(i->ARM64in.VBinD.op));
   1702          ppHRegARM64(i->ARM64in.VBinD.dst);
   1703          vex_printf(", ");
   1704          ppHRegARM64(i->ARM64in.VBinD.argL);
   1705          vex_printf(", ");
   1706          ppHRegARM64(i->ARM64in.VBinD.argR);
   1707          return;
   1708       case ARM64in_VBinS:
   1709          vex_printf("f%s   ", showARM64FpBinOp(i->ARM64in.VBinS.op));
   1710          ppHRegARM64asSreg(i->ARM64in.VBinS.dst);
   1711          vex_printf(", ");
   1712          ppHRegARM64asSreg(i->ARM64in.VBinS.argL);
   1713          vex_printf(", ");
   1714          ppHRegARM64asSreg(i->ARM64in.VBinS.argR);
   1715          return;
   1716       case ARM64in_VCmpD:
   1717          vex_printf("fcmp   ");
   1718          ppHRegARM64(i->ARM64in.VCmpD.argL);
   1719          vex_printf(", ");
   1720          ppHRegARM64(i->ARM64in.VCmpD.argR);
   1721          return;
   1722       case ARM64in_VCmpS:
   1723          vex_printf("fcmp   ");
   1724          ppHRegARM64asSreg(i->ARM64in.VCmpS.argL);
   1725          vex_printf(", ");
   1726          ppHRegARM64asSreg(i->ARM64in.VCmpS.argR);
   1727          return;
   1728       case ARM64in_VFCSel: {
   1729          void (*ppHRegARM64fp)(HReg)
   1730             = (i->ARM64in.VFCSel.isD ? ppHRegARM64 : ppHRegARM64asSreg);
   1731          vex_printf("fcsel  ");
   1732          ppHRegARM64fp(i->ARM64in.VFCSel.dst);
   1733          vex_printf(", ");
   1734          ppHRegARM64fp(i->ARM64in.VFCSel.argL);
   1735          vex_printf(", ");
   1736          ppHRegARM64fp(i->ARM64in.VFCSel.argR);
   1737          vex_printf(", %s", showARM64CondCode(i->ARM64in.VFCSel.cond));
   1738          return;
   1739       }
   1740       case ARM64in_FPCR:
   1741          if (i->ARM64in.FPCR.toFPCR) {
   1742             vex_printf("msr    fpcr, ");
   1743             ppHRegARM64(i->ARM64in.FPCR.iReg);
   1744          } else {
   1745             vex_printf("mrs    ");
   1746             ppHRegARM64(i->ARM64in.FPCR.iReg);
   1747             vex_printf(", fpcr");
   1748          }
   1749          return;
   1750       case ARM64in_FPSR:
   1751          if (i->ARM64in.FPSR.toFPSR) {
   1752             vex_printf("msr    fpsr, ");
   1753             ppHRegARM64(i->ARM64in.FPSR.iReg);
   1754          } else {
   1755             vex_printf("mrs    ");
   1756             ppHRegARM64(i->ARM64in.FPSR.iReg);
   1757             vex_printf(", fpsr");
   1758          }
   1759          return;
   1760       case ARM64in_VBinV: {
   1761          const HChar* nm = "??";
   1762          const HChar* ar = "??";
   1763          showARM64VecBinOp(&nm, &ar, i->ARM64in.VBinV.op);
   1764          vex_printf("%s ", nm);
   1765          ppHRegARM64(i->ARM64in.VBinV.dst);
   1766          vex_printf(".%s, ", ar);
   1767          ppHRegARM64(i->ARM64in.VBinV.argL);
   1768          vex_printf(".%s, ", ar);
   1769          ppHRegARM64(i->ARM64in.VBinV.argR);
   1770          vex_printf(".%s", ar);
   1771          return;
   1772       }
   1773       case ARM64in_VModifyV: {
   1774          const HChar* nm = "??";
   1775          const HChar* ar = "??";
   1776          showARM64VecModifyOp(&nm, &ar, i->ARM64in.VModifyV.op);
   1777          vex_printf("%s ", nm);
   1778          ppHRegARM64(i->ARM64in.VModifyV.mod);
   1779          vex_printf(".%s, ", ar);
   1780          ppHRegARM64(i->ARM64in.VModifyV.arg);
   1781          vex_printf(".%s", ar);
   1782          return;
   1783       }
   1784       case ARM64in_VUnaryV: {
   1785          const HChar* nm = "??";
   1786          const HChar* ar = "??";
   1787          showARM64VecUnaryOp(&nm, &ar, i->ARM64in.VUnaryV.op);
   1788          vex_printf("%s  ", nm);
   1789          ppHRegARM64(i->ARM64in.VUnaryV.dst);
   1790          vex_printf(".%s, ", ar);
   1791          ppHRegARM64(i->ARM64in.VUnaryV.arg);
   1792          vex_printf(".%s", ar);
   1793          return;
   1794       }
   1795       case ARM64in_VNarrowV: {
   1796          UInt dszBlg2 = i->ARM64in.VNarrowV.dszBlg2;
   1797          const HChar* darr[3] = { "8b", "4h", "2s" };
   1798          const HChar* sarr[3] = { "8h", "4s", "2d" };
   1799          const HChar* nm = showARM64VecNarrowOp(i->ARM64in.VNarrowV.op);
   1800          vex_printf("%s ", nm);
   1801          ppHRegARM64(i->ARM64in.VNarrowV.dst);
   1802          vex_printf(".%s, ", dszBlg2 < 3 ? darr[dszBlg2] : "??");
   1803          ppHRegARM64(i->ARM64in.VNarrowV.src);
   1804          vex_printf(".%s", dszBlg2 < 3 ? sarr[dszBlg2] : "??");
   1805          return;
   1806       }
   1807       case ARM64in_VShiftImmV: {
   1808          const HChar* nm = "??";
   1809          const HChar* ar = "??";
   1810          showARM64VecShiftImmOp(&nm, &ar, i->ARM64in.VShiftImmV.op);
   1811          vex_printf("%s ", nm);
   1812          ppHRegARM64(i->ARM64in.VShiftImmV.dst);
   1813          vex_printf(".%s, ", ar);
   1814          ppHRegARM64(i->ARM64in.VShiftImmV.src);
   1815          vex_printf(".%s, #%u", ar, i->ARM64in.VShiftImmV.amt);
   1816          return;
   1817       }
   1818       case ARM64in_VExtV: {
   1819          vex_printf("ext    ");
   1820          ppHRegARM64(i->ARM64in.VExtV.dst);
   1821          vex_printf(".16b, ");
   1822          ppHRegARM64(i->ARM64in.VExtV.srcLo);
   1823          vex_printf(".16b, ");
   1824          ppHRegARM64(i->ARM64in.VExtV.srcHi);
   1825          vex_printf(".16b, #%u", i->ARM64in.VExtV.amtB);
   1826          return;
   1827       }
   1828       case ARM64in_VImmQ:
   1829          vex_printf("qimm   ");
   1830          ppHRegARM64(i->ARM64in.VImmQ.rQ);
   1831          vex_printf(", Bits16toBytes16(0x%x)", (UInt)i->ARM64in.VImmQ.imm);
   1832          return;
   1833       case ARM64in_VDfromX:
   1834          vex_printf("fmov   ");
   1835          ppHRegARM64(i->ARM64in.VDfromX.rD);
   1836          vex_printf(", ");
   1837          ppHRegARM64(i->ARM64in.VDfromX.rX);
   1838          return;
   1839       case ARM64in_VQfromX:
   1840          vex_printf("fmov   ");
   1841          ppHRegARM64(i->ARM64in.VQfromX.rQ);
   1842          vex_printf(".d[0], ");
   1843          ppHRegARM64(i->ARM64in.VQfromX.rXlo);
   1844          return;
   1845       case ARM64in_VQfromXX:
   1846          vex_printf("qFromXX ");
   1847          ppHRegARM64(i->ARM64in.VQfromXX.rQ);
   1848          vex_printf(", ");
   1849          ppHRegARM64(i->ARM64in.VQfromXX.rXhi);
   1850          vex_printf(", ");
   1851          ppHRegARM64(i->ARM64in.VQfromXX.rXlo);
   1852          return;
   1853       case ARM64in_VXfromQ:
   1854          vex_printf("fmov   ");
   1855          ppHRegARM64(i->ARM64in.VXfromQ.rX);
   1856          vex_printf(", ");
   1857          ppHRegARM64(i->ARM64in.VXfromQ.rQ);
   1858          vex_printf(".d[%u]", i->ARM64in.VXfromQ.laneNo);
   1859          return;
   1860       case ARM64in_VXfromDorS:
   1861          vex_printf("fmov   ");
   1862          ppHRegARM64(i->ARM64in.VXfromDorS.rX);
   1863          vex_printf("(%c-reg), ", i->ARM64in.VXfromDorS.fromD ? 'X':'W');
   1864          ppHRegARM64(i->ARM64in.VXfromDorS.rDorS);
   1865          vex_printf("(%c-reg)", i->ARM64in.VXfromDorS.fromD ? 'D' : 'S');
   1866          return;
   1867       case ARM64in_VMov: {
   1868          UChar aux = '?';
   1869          switch (i->ARM64in.VMov.szB) {
   1870             case 16: aux = 'q'; break;
   1871             case 8:  aux = 'd'; break;
   1872             case 4:  aux = 's'; break;
   1873             default: break;
   1874          }
   1875          vex_printf("mov(%c) ", aux);
   1876          ppHRegARM64(i->ARM64in.VMov.dst);
   1877          vex_printf(", ");
   1878          ppHRegARM64(i->ARM64in.VMov.src);
   1879          return;
   1880       }
   1881       case ARM64in_EvCheck:
   1882          vex_printf("(evCheck) ldr w9,");
   1883          ppARM64AMode(i->ARM64in.EvCheck.amCounter);
   1884          vex_printf("; subs w9,w9,$1; str w9,");
   1885          ppARM64AMode(i->ARM64in.EvCheck.amCounter);
   1886          vex_printf("; bpl nofail; ldr x9,");
   1887          ppARM64AMode(i->ARM64in.EvCheck.amFailAddr);
   1888          vex_printf("; br x9; nofail:");
   1889          return;
   1890       case ARM64in_ProfInc:
   1891          vex_printf("(profInc) imm64-fixed4 x9,$NotKnownYet; "
   1892                     "ldr x8,[x9]; add x8,x8,#1, str x8,[x9]");
   1893          return;
   1894       default:
   1895          vex_printf("ppARM64Instr: unhandled case (tag %d)", (Int)i->tag);
   1896          vpanic("ppARM64Instr(1)");
   1897          return;
   1898    }
   1899 }
   1900 
   1901 
   1902 /* --------- Helpers for register allocation. --------- */
   1903 
   1904 void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 )
   1905 {
   1906    vassert(mode64 == True);
   1907    initHRegUsage(u);
   1908    switch (i->tag) {
   1909       case ARM64in_Arith:
   1910          addHRegUse(u, HRmWrite, i->ARM64in.Arith.dst);
   1911          addHRegUse(u, HRmRead, i->ARM64in.Arith.argL);
   1912          addRegUsage_ARM64RIA(u, i->ARM64in.Arith.argR);
   1913          return;
   1914       case ARM64in_Cmp:
   1915          addHRegUse(u, HRmRead, i->ARM64in.Cmp.argL);
   1916          addRegUsage_ARM64RIA(u, i->ARM64in.Cmp.argR);
   1917          return;
   1918       case ARM64in_Logic:
   1919          addHRegUse(u, HRmWrite, i->ARM64in.Logic.dst);
   1920          addHRegUse(u, HRmRead, i->ARM64in.Logic.argL);
   1921          addRegUsage_ARM64RIL(u, i->ARM64in.Logic.argR);
   1922          return;
   1923       case ARM64in_Test:
   1924          addHRegUse(u, HRmRead, i->ARM64in.Test.argL);
   1925          addRegUsage_ARM64RIL(u, i->ARM64in.Test.argR);
   1926          return;
   1927       case ARM64in_Shift:
   1928          addHRegUse(u, HRmWrite, i->ARM64in.Shift.dst);
   1929          addHRegUse(u, HRmRead, i->ARM64in.Shift.argL);
   1930          addRegUsage_ARM64RI6(u, i->ARM64in.Shift.argR);
   1931          return;
   1932       case ARM64in_Unary:
   1933          addHRegUse(u, HRmWrite, i->ARM64in.Unary.dst);
   1934          addHRegUse(u, HRmRead, i->ARM64in.Unary.src);
   1935          return;
   1936       case ARM64in_MovI:
   1937          addHRegUse(u, HRmWrite, i->ARM64in.MovI.dst);
   1938          addHRegUse(u, HRmRead,  i->ARM64in.MovI.src);
   1939          return;
   1940       case ARM64in_Imm64:
   1941          addHRegUse(u, HRmWrite, i->ARM64in.Imm64.dst);
   1942          return;
   1943       case ARM64in_LdSt64:
   1944          addRegUsage_ARM64AMode(u, i->ARM64in.LdSt64.amode);
   1945          if (i->ARM64in.LdSt64.isLoad) {
   1946             addHRegUse(u, HRmWrite, i->ARM64in.LdSt64.rD);
   1947          } else {
   1948             addHRegUse(u, HRmRead, i->ARM64in.LdSt64.rD);
   1949          }
   1950          return;
   1951       case ARM64in_LdSt32:
   1952          addRegUsage_ARM64AMode(u, i->ARM64in.LdSt32.amode);
   1953          if (i->ARM64in.LdSt32.isLoad) {
   1954             addHRegUse(u, HRmWrite, i->ARM64in.LdSt32.rD);
   1955          } else {
   1956             addHRegUse(u, HRmRead, i->ARM64in.LdSt32.rD);
   1957          }
   1958          return;
   1959       case ARM64in_LdSt16:
   1960          addRegUsage_ARM64AMode(u, i->ARM64in.LdSt16.amode);
   1961          if (i->ARM64in.LdSt16.isLoad) {
   1962             addHRegUse(u, HRmWrite, i->ARM64in.LdSt16.rD);
   1963          } else {
   1964             addHRegUse(u, HRmRead, i->ARM64in.LdSt16.rD);
   1965          }
   1966          return;
   1967       case ARM64in_LdSt8:
   1968          addRegUsage_ARM64AMode(u, i->ARM64in.LdSt8.amode);
   1969          if (i->ARM64in.LdSt8.isLoad) {
   1970             addHRegUse(u, HRmWrite, i->ARM64in.LdSt8.rD);
   1971          } else {
   1972             addHRegUse(u, HRmRead, i->ARM64in.LdSt8.rD);
   1973          }
   1974          return;
   1975       /* XDirect/XIndir/XAssisted are also a bit subtle.  They
   1976          conditionally exit the block.  Hence we only need to list (1)
   1977          the registers that they read, and (2) the registers that they
   1978          write in the case where the block is not exited.  (2) is
   1979          empty, hence only (1) is relevant here. */
   1980       case ARM64in_XDirect:
   1981          addRegUsage_ARM64AMode(u, i->ARM64in.XDirect.amPC);
   1982          return;
   1983       case ARM64in_XIndir:
   1984          addHRegUse(u, HRmRead, i->ARM64in.XIndir.dstGA);
   1985          addRegUsage_ARM64AMode(u, i->ARM64in.XIndir.amPC);
   1986          return;
   1987       case ARM64in_XAssisted:
   1988          addHRegUse(u, HRmRead, i->ARM64in.XAssisted.dstGA);
   1989          addRegUsage_ARM64AMode(u, i->ARM64in.XAssisted.amPC);
   1990          return;
   1991       case ARM64in_CSel:
   1992          addHRegUse(u, HRmWrite, i->ARM64in.CSel.dst);
   1993          addHRegUse(u, HRmRead,  i->ARM64in.CSel.argL);
   1994          addHRegUse(u, HRmRead,  i->ARM64in.CSel.argR);
   1995          return;
   1996       case ARM64in_Call:
   1997          /* logic and comments copied/modified from x86 back end */
   1998          /* This is a bit subtle. */
   1999          /* First off, claim it trashes all the caller-saved regs
   2000             which fall within the register allocator's jurisdiction.
   2001             These I believe to be x0 to x7 and the 128-bit vector
   2002             registers in use, q16 .. q20. */
   2003          addHRegUse(u, HRmWrite, hregARM64_X0());
   2004          addHRegUse(u, HRmWrite, hregARM64_X1());
   2005          addHRegUse(u, HRmWrite, hregARM64_X2());
   2006          addHRegUse(u, HRmWrite, hregARM64_X3());
   2007          addHRegUse(u, HRmWrite, hregARM64_X4());
   2008          addHRegUse(u, HRmWrite, hregARM64_X5());
   2009          addHRegUse(u, HRmWrite, hregARM64_X6());
   2010          addHRegUse(u, HRmWrite, hregARM64_X7());
   2011          addHRegUse(u, HRmWrite, hregARM64_Q16());
   2012          addHRegUse(u, HRmWrite, hregARM64_Q17());
   2013          addHRegUse(u, HRmWrite, hregARM64_Q18());
   2014          addHRegUse(u, HRmWrite, hregARM64_Q19());
   2015          addHRegUse(u, HRmWrite, hregARM64_Q20());
   2016          /* Now we have to state any parameter-carrying registers
   2017             which might be read.  This depends on nArgRegs. */
   2018             switch (i->ARM64in.Call.nArgRegs) {
   2019             case 8: addHRegUse(u, HRmRead, hregARM64_X7()); /*fallthru*/
   2020             case 7: addHRegUse(u, HRmRead, hregARM64_X6()); /*fallthru*/
   2021             case 6: addHRegUse(u, HRmRead, hregARM64_X5()); /*fallthru*/
   2022             case 5: addHRegUse(u, HRmRead, hregARM64_X4()); /*fallthru*/
   2023             case 4: addHRegUse(u, HRmRead, hregARM64_X3()); /*fallthru*/
   2024             case 3: addHRegUse(u, HRmRead, hregARM64_X2()); /*fallthru*/
   2025             case 2: addHRegUse(u, HRmRead, hregARM64_X1()); /*fallthru*/
   2026             case 1: addHRegUse(u, HRmRead, hregARM64_X0()); break;
   2027             case 0: break;
   2028             default: vpanic("getRegUsage_ARM64:Call:regparms");
   2029          }
   2030          /* Finally, there is the issue that the insn trashes a
   2031             register because the literal target address has to be
   2032             loaded into a register.  However, we reserve x9 for that
   2033             purpose so there's no further complexity here.  Stating x9
   2034             as trashed is pointless since it's not under the control
   2035             of the allocator, but what the hell. */
   2036          addHRegUse(u, HRmWrite, hregARM64_X9());
   2037          return;
   2038       case ARM64in_AddToSP:
   2039          /* Only changes SP, but regalloc doesn't control that, hence
   2040             we don't care. */
   2041          return;
   2042       case ARM64in_FromSP:
   2043          addHRegUse(u, HRmWrite, i->ARM64in.FromSP.dst);
   2044          return;
   2045       case ARM64in_Mul:
   2046          addHRegUse(u, HRmWrite, i->ARM64in.Mul.dst);
   2047          addHRegUse(u, HRmRead,  i->ARM64in.Mul.argL);
   2048          addHRegUse(u, HRmRead,  i->ARM64in.Mul.argR);
   2049          return;
   2050       case ARM64in_LdrEX:
   2051          addHRegUse(u, HRmRead, hregARM64_X4());
   2052          addHRegUse(u, HRmWrite, hregARM64_X2());
   2053          return;
   2054       case ARM64in_StrEX:
   2055          addHRegUse(u, HRmRead, hregARM64_X4());
   2056          addHRegUse(u, HRmWrite, hregARM64_X0());
   2057          addHRegUse(u, HRmRead, hregARM64_X2());
   2058          return;
   2059       case ARM64in_MFence:
   2060          return;
   2061       case ARM64in_VLdStH:
   2062          addHRegUse(u, HRmRead, i->ARM64in.VLdStH.rN);
   2063          if (i->ARM64in.VLdStH.isLoad) {
   2064             addHRegUse(u, HRmWrite, i->ARM64in.VLdStH.hD);
   2065          } else {
   2066             addHRegUse(u, HRmRead, i->ARM64in.VLdStH.hD);
   2067          }
   2068          return;
   2069       case ARM64in_VLdStS:
   2070          addHRegUse(u, HRmRead, i->ARM64in.VLdStS.rN);
   2071          if (i->ARM64in.VLdStS.isLoad) {
   2072             addHRegUse(u, HRmWrite, i->ARM64in.VLdStS.sD);
   2073          } else {
   2074             addHRegUse(u, HRmRead, i->ARM64in.VLdStS.sD);
   2075          }
   2076          return;
   2077       case ARM64in_VLdStD:
   2078          addHRegUse(u, HRmRead, i->ARM64in.VLdStD.rN);
   2079          if (i->ARM64in.VLdStD.isLoad) {
   2080             addHRegUse(u, HRmWrite, i->ARM64in.VLdStD.dD);
   2081          } else {
   2082             addHRegUse(u, HRmRead, i->ARM64in.VLdStD.dD);
   2083          }
   2084          return;
   2085       case ARM64in_VLdStQ:
   2086          addHRegUse(u, HRmRead, i->ARM64in.VLdStQ.rN);
   2087          if (i->ARM64in.VLdStQ.isLoad)
   2088             addHRegUse(u, HRmWrite, i->ARM64in.VLdStQ.rQ);
   2089          else
   2090             addHRegUse(u, HRmRead, i->ARM64in.VLdStQ.rQ);
   2091          return;
   2092       case ARM64in_VCvtI2F:
   2093          addHRegUse(u, HRmRead, i->ARM64in.VCvtI2F.rS);
   2094          addHRegUse(u, HRmWrite, i->ARM64in.VCvtI2F.rD);
   2095          return;
   2096       case ARM64in_VCvtF2I:
   2097          addHRegUse(u, HRmRead, i->ARM64in.VCvtF2I.rS);
   2098          addHRegUse(u, HRmWrite, i->ARM64in.VCvtF2I.rD);
   2099          return;
   2100       case ARM64in_VCvtSD:
   2101          addHRegUse(u, HRmWrite, i->ARM64in.VCvtSD.dst);
   2102          addHRegUse(u, HRmRead,  i->ARM64in.VCvtSD.src);
   2103          return;
   2104       case ARM64in_VCvtHS:
   2105          addHRegUse(u, HRmWrite, i->ARM64in.VCvtHS.dst);
   2106          addHRegUse(u, HRmRead,  i->ARM64in.VCvtHS.src);
   2107          return;
   2108       case ARM64in_VCvtHD:
   2109          addHRegUse(u, HRmWrite, i->ARM64in.VCvtHD.dst);
   2110          addHRegUse(u, HRmRead,  i->ARM64in.VCvtHD.src);
   2111          return;
   2112       case ARM64in_VUnaryD:
   2113          addHRegUse(u, HRmWrite, i->ARM64in.VUnaryD.dst);
   2114          addHRegUse(u, HRmRead, i->ARM64in.VUnaryD.src);
   2115          return;
   2116       case ARM64in_VUnaryS:
   2117          addHRegUse(u, HRmWrite, i->ARM64in.VUnaryS.dst);
   2118          addHRegUse(u, HRmRead, i->ARM64in.VUnaryS.src);
   2119          return;
   2120       case ARM64in_VBinD:
   2121          addHRegUse(u, HRmWrite, i->ARM64in.VBinD.dst);
   2122          addHRegUse(u, HRmRead, i->ARM64in.VBinD.argL);
   2123          addHRegUse(u, HRmRead, i->ARM64in.VBinD.argR);
   2124          return;
   2125       case ARM64in_VBinS:
   2126          addHRegUse(u, HRmWrite, i->ARM64in.VBinS.dst);
   2127          addHRegUse(u, HRmRead, i->ARM64in.VBinS.argL);
   2128          addHRegUse(u, HRmRead, i->ARM64in.VBinS.argR);
   2129          return;
   2130       case ARM64in_VCmpD:
   2131          addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argL);
   2132          addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argR);
   2133          return;
   2134       case ARM64in_VCmpS:
   2135          addHRegUse(u, HRmRead, i->ARM64in.VCmpS.argL);
   2136          addHRegUse(u, HRmRead, i->ARM64in.VCmpS.argR);
   2137          return;
   2138       case ARM64in_VFCSel:
   2139          addHRegUse(u, HRmRead, i->ARM64in.VFCSel.argL);
   2140          addHRegUse(u, HRmRead, i->ARM64in.VFCSel.argR);
   2141          addHRegUse(u, HRmWrite, i->ARM64in.VFCSel.dst);
   2142          return;
   2143       case ARM64in_FPCR:
   2144          if (i->ARM64in.FPCR.toFPCR)
   2145             addHRegUse(u, HRmRead, i->ARM64in.FPCR.iReg);
   2146          else
   2147             addHRegUse(u, HRmWrite, i->ARM64in.FPCR.iReg);
   2148          return;
   2149       case ARM64in_FPSR:
   2150          if (i->ARM64in.FPSR.toFPSR)
   2151             addHRegUse(u, HRmRead, i->ARM64in.FPSR.iReg);
   2152          else
   2153             addHRegUse(u, HRmWrite, i->ARM64in.FPSR.iReg);
   2154          return;
   2155       case ARM64in_VBinV:
   2156          addHRegUse(u, HRmWrite, i->ARM64in.VBinV.dst);
   2157          addHRegUse(u, HRmRead, i->ARM64in.VBinV.argL);
   2158          addHRegUse(u, HRmRead, i->ARM64in.VBinV.argR);
   2159          return;
   2160       case ARM64in_VModifyV:
   2161          addHRegUse(u, HRmWrite, i->ARM64in.VModifyV.mod);
   2162          addHRegUse(u, HRmRead, i->ARM64in.VModifyV.mod);
   2163          addHRegUse(u, HRmRead, i->ARM64in.VModifyV.arg);
   2164          return;
   2165       case ARM64in_VUnaryV:
   2166          addHRegUse(u, HRmWrite, i->ARM64in.VUnaryV.dst);
   2167          addHRegUse(u, HRmRead, i->ARM64in.VUnaryV.arg);
   2168          return;
   2169       case ARM64in_VNarrowV:
   2170          addHRegUse(u, HRmWrite, i->ARM64in.VNarrowV.dst);
   2171          addHRegUse(u, HRmRead, i->ARM64in.VNarrowV.src);
   2172          return;
   2173       case ARM64in_VShiftImmV:
   2174          addHRegUse(u, HRmWrite, i->ARM64in.VShiftImmV.dst);
   2175          addHRegUse(u, HRmRead, i->ARM64in.VShiftImmV.src);
   2176          return;
   2177       case ARM64in_VExtV:
   2178          addHRegUse(u, HRmWrite, i->ARM64in.VExtV.dst);
   2179          addHRegUse(u, HRmRead, i->ARM64in.VExtV.srcLo);
   2180          addHRegUse(u, HRmRead, i->ARM64in.VExtV.srcHi);
   2181          return;
   2182       case ARM64in_VImmQ:
   2183          addHRegUse(u, HRmWrite, i->ARM64in.VImmQ.rQ);
   2184          return;
   2185       case ARM64in_VDfromX:
   2186          addHRegUse(u, HRmWrite, i->ARM64in.VDfromX.rD);
   2187          addHRegUse(u, HRmRead,  i->ARM64in.VDfromX.rX);
   2188          return;
   2189       case ARM64in_VQfromX:
   2190          addHRegUse(u, HRmWrite, i->ARM64in.VQfromX.rQ);
   2191          addHRegUse(u, HRmRead,  i->ARM64in.VQfromX.rXlo);
   2192          return;
   2193       case ARM64in_VQfromXX:
   2194          addHRegUse(u, HRmWrite, i->ARM64in.VQfromXX.rQ);
   2195          addHRegUse(u, HRmRead,  i->ARM64in.VQfromXX.rXhi);
   2196          addHRegUse(u, HRmRead,  i->ARM64in.VQfromXX.rXlo);
   2197          return;
   2198       case ARM64in_VXfromQ:
   2199          addHRegUse(u, HRmWrite, i->ARM64in.VXfromQ.rX);
   2200          addHRegUse(u, HRmRead,  i->ARM64in.VXfromQ.rQ);
   2201          return;
   2202       case ARM64in_VXfromDorS:
   2203          addHRegUse(u, HRmWrite, i->ARM64in.VXfromDorS.rX);
   2204          addHRegUse(u, HRmRead,  i->ARM64in.VXfromDorS.rDorS);
   2205          return;
   2206       case ARM64in_VMov:
   2207          addHRegUse(u, HRmWrite, i->ARM64in.VMov.dst);
   2208          addHRegUse(u, HRmRead,  i->ARM64in.VMov.src);
   2209          return;
   2210       case ARM64in_EvCheck:
   2211          /* We expect both amodes only to mention x21, so this is in
   2212             fact pointless, since x21 isn't allocatable, but
   2213             anyway.. */
   2214          addRegUsage_ARM64AMode(u, i->ARM64in.EvCheck.amCounter);
   2215          addRegUsage_ARM64AMode(u, i->ARM64in.EvCheck.amFailAddr);
   2216          addHRegUse(u, HRmWrite, hregARM64_X9()); /* also unavail to RA */
   2217          return;
   2218       case ARM64in_ProfInc:
   2219          /* Again, pointless to actually state these since neither
   2220             is available to RA. */
   2221          addHRegUse(u, HRmWrite, hregARM64_X9()); /* unavail to RA */
   2222          addHRegUse(u, HRmWrite, hregARM64_X8()); /* unavail to RA */
   2223          return;
   2224       default:
   2225          ppARM64Instr(i);
   2226          vpanic("getRegUsage_ARM64Instr");
   2227    }
   2228 }
   2229 
   2230 
   2231 void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 )
   2232 {
   2233    vassert(mode64 == True);
   2234    switch (i->tag) {
   2235       case ARM64in_Arith:
   2236          i->ARM64in.Arith.dst = lookupHRegRemap(m, i->ARM64in.Arith.dst);
   2237          i->ARM64in.Arith.argL = lookupHRegRemap(m, i->ARM64in.Arith.argL);
   2238          mapRegs_ARM64RIA(m, i->ARM64in.Arith.argR);
   2239          return;
   2240       case ARM64in_Cmp:
   2241          i->ARM64in.Cmp.argL = lookupHRegRemap(m, i->ARM64in.Cmp.argL);
   2242          mapRegs_ARM64RIA(m, i->ARM64in.Cmp.argR);
   2243          return;
   2244       case ARM64in_Logic:
   2245          i->ARM64in.Logic.dst = lookupHRegRemap(m, i->ARM64in.Logic.dst);
   2246          i->ARM64in.Logic.argL = lookupHRegRemap(m, i->ARM64in.Logic.argL);
   2247          mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR);
   2248          return;
   2249       case ARM64in_Test:
   2250          i->ARM64in.Test.argL = lookupHRegRemap(m, i->ARM64in.Test.argL);
   2251          mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR);
   2252          return;
   2253       case ARM64in_Shift:
   2254          i->ARM64in.Shift.dst = lookupHRegRemap(m, i->ARM64in.Shift.dst);
   2255          i->ARM64in.Shift.argL = lookupHRegRemap(m, i->ARM64in.Shift.argL);
   2256          mapRegs_ARM64RI6(m, i->ARM64in.Shift.argR);
   2257          return;
   2258       case ARM64in_Unary:
   2259          i->ARM64in.Unary.dst = lookupHRegRemap(m, i->ARM64in.Unary.dst);
   2260          i->ARM64in.Unary.src = lookupHRegRemap(m, i->ARM64in.Unary.src);
   2261          return;
   2262       case ARM64in_MovI:
   2263          i->ARM64in.MovI.dst = lookupHRegRemap(m, i->ARM64in.MovI.dst);
   2264          i->ARM64in.MovI.src = lookupHRegRemap(m, i->ARM64in.MovI.src);
   2265          return;
   2266       case ARM64in_Imm64:
   2267          i->ARM64in.Imm64.dst = lookupHRegRemap(m, i->ARM64in.Imm64.dst);
   2268          return;
   2269       case ARM64in_LdSt64:
   2270          i->ARM64in.LdSt64.rD = lookupHRegRemap(m, i->ARM64in.LdSt64.rD);
   2271          mapRegs_ARM64AMode(m, i->ARM64in.LdSt64.amode);
   2272          return;
   2273       case ARM64in_LdSt32:
   2274          i->ARM64in.LdSt32.rD = lookupHRegRemap(m, i->ARM64in.LdSt32.rD);
   2275          mapRegs_ARM64AMode(m, i->ARM64in.LdSt32.amode);
   2276          return;
   2277       case ARM64in_LdSt16:
   2278          i->ARM64in.LdSt16.rD = lookupHRegRemap(m, i->ARM64in.LdSt16.rD);
   2279          mapRegs_ARM64AMode(m, i->ARM64in.LdSt16.amode);
   2280          return;
   2281       case ARM64in_LdSt8:
   2282          i->ARM64in.LdSt8.rD = lookupHRegRemap(m, i->ARM64in.LdSt8.rD);
   2283          mapRegs_ARM64AMode(m, i->ARM64in.LdSt8.amode);
   2284          return;
   2285       case ARM64in_XDirect:
   2286          mapRegs_ARM64AMode(m, i->ARM64in.XDirect.amPC);
   2287          return;
   2288       case ARM64in_XIndir:
   2289          i->ARM64in.XIndir.dstGA
   2290             = lookupHRegRemap(m, i->ARM64in.XIndir.dstGA);
   2291          mapRegs_ARM64AMode(m, i->ARM64in.XIndir.amPC);
   2292          return;
   2293       case ARM64in_XAssisted:
   2294          i->ARM64in.XAssisted.dstGA
   2295             = lookupHRegRemap(m, i->ARM64in.XAssisted.dstGA);
   2296          mapRegs_ARM64AMode(m, i->ARM64in.XAssisted.amPC);
   2297          return;
   2298       case ARM64in_CSel:
   2299          i->ARM64in.CSel.dst  = lookupHRegRemap(m, i->ARM64in.CSel.dst);
   2300          i->ARM64in.CSel.argL = lookupHRegRemap(m, i->ARM64in.CSel.argL);
   2301          i->ARM64in.CSel.argR = lookupHRegRemap(m, i->ARM64in.CSel.argR);
   2302          return;
   2303       case ARM64in_Call:
   2304          return;
   2305       case ARM64in_AddToSP:
   2306          return;
   2307       case ARM64in_FromSP:
   2308          i->ARM64in.FromSP.dst = lookupHRegRemap(m, i->ARM64in.FromSP.dst);
   2309          return;
   2310       case ARM64in_Mul:
   2311          i->ARM64in.Mul.dst  = lookupHRegRemap(m, i->ARM64in.Mul.dst);
   2312          i->ARM64in.Mul.argL = lookupHRegRemap(m, i->ARM64in.Mul.argL);
   2313          i->ARM64in.Mul.argR = lookupHRegRemap(m, i->ARM64in.Mul.argR);
   2314          break;
   2315       case ARM64in_LdrEX:
   2316          return;
   2317       case ARM64in_StrEX:
   2318          return;
   2319       case ARM64in_MFence:
   2320          return;
   2321       case ARM64in_VLdStH:
   2322          i->ARM64in.VLdStH.hD = lookupHRegRemap(m, i->ARM64in.VLdStH.hD);
   2323          i->ARM64in.VLdStH.rN = lookupHRegRemap(m, i->ARM64in.VLdStH.rN);
   2324          return;
   2325       case ARM64in_VLdStS:
   2326          i->ARM64in.VLdStS.sD = lookupHRegRemap(m, i->ARM64in.VLdStS.sD);
   2327          i->ARM64in.VLdStS.rN = lookupHRegRemap(m, i->ARM64in.VLdStS.rN);
   2328          return;
   2329       case ARM64in_VLdStD:
   2330          i->ARM64in.VLdStD.dD = lookupHRegRemap(m, i->ARM64in.VLdStD.dD);
   2331          i->ARM64in.VLdStD.rN = lookupHRegRemap(m, i->ARM64in.VLdStD.rN);
   2332          return;
   2333       case ARM64in_VLdStQ:
   2334          i->ARM64in.VLdStQ.rQ = lookupHRegRemap(m, i->ARM64in.VLdStQ.rQ);
   2335          i->ARM64in.VLdStQ.rN = lookupHRegRemap(m, i->ARM64in.VLdStQ.rN);
   2336          return;
   2337       case ARM64in_VCvtI2F:
   2338          i->ARM64in.VCvtI2F.rS = lookupHRegRemap(m, i->ARM64in.VCvtI2F.rS);
   2339          i->ARM64in.VCvtI2F.rD = lookupHRegRemap(m, i->ARM64in.VCvtI2F.rD);
   2340          return;
   2341       case ARM64in_VCvtF2I:
   2342          i->ARM64in.VCvtF2I.rS = lookupHRegRemap(m, i->ARM64in.VCvtF2I.rS);
   2343          i->ARM64in.VCvtF2I.rD = lookupHRegRemap(m, i->ARM64in.VCvtF2I.rD);
   2344          return;
   2345       case ARM64in_VCvtSD:
   2346          i->ARM64in.VCvtSD.dst = lookupHRegRemap(m, i->ARM64in.VCvtSD.dst);
   2347          i->ARM64in.VCvtSD.src = lookupHRegRemap(m, i->ARM64in.VCvtSD.src);
   2348          return;
   2349       case ARM64in_VCvtHS:
   2350          i->ARM64in.VCvtHS.dst = lookupHRegRemap(m, i->ARM64in.VCvtHS.dst);
   2351          i->ARM64in.VCvtHS.src = lookupHRegRemap(m, i->ARM64in.VCvtHS.src);
   2352          return;
   2353       case ARM64in_VCvtHD:
   2354          i->ARM64in.VCvtHD.dst = lookupHRegRemap(m, i->ARM64in.VCvtHD.dst);
   2355          i->ARM64in.VCvtHD.src = lookupHRegRemap(m, i->ARM64in.VCvtHD.src);
   2356          return;
   2357       case ARM64in_VUnaryD:
   2358          i->ARM64in.VUnaryD.dst = lookupHRegRemap(m, i->ARM64in.VUnaryD.dst);
   2359          i->ARM64in.VUnaryD.src = lookupHRegRemap(m, i->ARM64in.VUnaryD.src);
   2360          return;
   2361       case ARM64in_VUnaryS:
   2362          i->ARM64in.VUnaryS.dst = lookupHRegRemap(m, i->ARM64in.VUnaryS.dst);
   2363          i->ARM64in.VUnaryS.src = lookupHRegRemap(m, i->ARM64in.VUnaryS.src);
   2364          return;
   2365       case ARM64in_VBinD:
   2366          i->ARM64in.VBinD.dst  = lookupHRegRemap(m, i->ARM64in.VBinD.dst);
   2367          i->ARM64in.VBinD.argL = lookupHRegRemap(m, i->ARM64in.VBinD.argL);
   2368          i->ARM64in.VBinD.argR = lookupHRegRemap(m, i->ARM64in.VBinD.argR);
   2369          return;
   2370       case ARM64in_VBinS:
   2371          i->ARM64in.VBinS.dst  = lookupHRegRemap(m, i->ARM64in.VBinS.dst);
   2372          i->ARM64in.VBinS.argL = lookupHRegRemap(m, i->ARM64in.VBinS.argL);
   2373          i->ARM64in.VBinS.argR = lookupHRegRemap(m, i->ARM64in.VBinS.argR);
   2374          return;
   2375       case ARM64in_VCmpD:
   2376          i->ARM64in.VCmpD.argL = lookupHRegRemap(m, i->ARM64in.VCmpD.argL);
   2377          i->ARM64in.VCmpD.argR = lookupHRegRemap(m, i->ARM64in.VCmpD.argR);
   2378          return;
   2379       case ARM64in_VCmpS:
   2380          i->ARM64in.VCmpS.argL = lookupHRegRemap(m, i->ARM64in.VCmpS.argL);
   2381          i->ARM64in.VCmpS.argR = lookupHRegRemap(m, i->ARM64in.VCmpS.argR);
   2382          return;
   2383       case ARM64in_VFCSel:
   2384          i->ARM64in.VFCSel.argL = lookupHRegRemap(m, i->ARM64in.VFCSel.argL);
   2385          i->ARM64in.VFCSel.argR = lookupHRegRemap(m, i->ARM64in.VFCSel.argR);
   2386          i->ARM64in.VFCSel.dst  = lookupHRegRemap(m, i->ARM64in.VFCSel.dst);
   2387          return;
   2388       case ARM64in_FPCR:
   2389          i->ARM64in.FPCR.iReg = lookupHRegRemap(m, i->ARM64in.FPCR.iReg);
   2390          return;
   2391       case ARM64in_FPSR:
   2392          i->ARM64in.FPSR.iReg = lookupHRegRemap(m, i->ARM64in.FPSR.iReg);
   2393          return;
   2394       case ARM64in_VBinV:
   2395          i->ARM64in.VBinV.dst  = lookupHRegRemap(m, i->ARM64in.VBinV.dst);
   2396          i->ARM64in.VBinV.argL = lookupHRegRemap(m, i->ARM64in.VBinV.argL);
   2397          i->ARM64in.VBinV.argR = lookupHRegRemap(m, i->ARM64in.VBinV.argR);
   2398          return;
   2399       case ARM64in_VModifyV:
   2400          i->ARM64in.VModifyV.mod = lookupHRegRemap(m, i->ARM64in.VModifyV.mod);
   2401          i->ARM64in.VModifyV.arg = lookupHRegRemap(m, i->ARM64in.VModifyV.arg);
   2402          return;
   2403       case ARM64in_VUnaryV:
   2404          i->ARM64in.VUnaryV.dst = lookupHRegRemap(m, i->ARM64in.VUnaryV.dst);
   2405          i->ARM64in.VUnaryV.arg = lookupHRegRemap(m, i->ARM64in.VUnaryV.arg);
   2406          return;
   2407       case ARM64in_VNarrowV:
   2408          i->ARM64in.VNarrowV.dst = lookupHRegRemap(m, i->ARM64in.VNarrowV.dst);
   2409          i->ARM64in.VNarrowV.src = lookupHRegRemap(m, i->ARM64in.VNarrowV.src);
   2410          return;
   2411       case ARM64in_VShiftImmV:
   2412          i->ARM64in.VShiftImmV.dst
   2413             = lookupHRegRemap(m, i->ARM64in.VShiftImmV.dst);
   2414          i->ARM64in.VShiftImmV.src
   2415             = lookupHRegRemap(m, i->ARM64in.VShiftImmV.src);
   2416          return;
   2417       case ARM64in_VExtV:
   2418          i->ARM64in.VExtV.dst = lookupHRegRemap(m, i->ARM64in.VExtV.dst);
   2419          i->ARM64in.VExtV.srcLo = lookupHRegRemap(m, i->ARM64in.VExtV.srcLo);
   2420          i->ARM64in.VExtV.srcHi = lookupHRegRemap(m, i->ARM64in.VExtV.srcHi);
   2421          return;
   2422       case ARM64in_VImmQ:
   2423          i->ARM64in.VImmQ.rQ = lookupHRegRemap(m, i->ARM64in.VImmQ.rQ);
   2424          return;
   2425       case ARM64in_VDfromX:
   2426          i->ARM64in.VDfromX.rD
   2427             = lookupHRegRemap(m, i->ARM64in.VDfromX.rD);
   2428          i->ARM64in.VDfromX.rX
   2429             = lookupHRegRemap(m, i->ARM64in.VDfromX.rX);
   2430          return;
   2431       case ARM64in_VQfromX:
   2432          i->ARM64in.VQfromX.rQ
   2433             = lookupHRegRemap(m, i->ARM64in.VQfromX.rQ);
   2434          i->ARM64in.VQfromX.rXlo
   2435             = lookupHRegRemap(m, i->ARM64in.VQfromX.rXlo);
   2436          return;
   2437       case ARM64in_VQfromXX:
   2438          i->ARM64in.VQfromXX.rQ
   2439             = lookupHRegRemap(m, i->ARM64in.VQfromXX.rQ);
   2440          i->ARM64in.VQfromXX.rXhi
   2441             = lookupHRegRemap(m, i->ARM64in.VQfromXX.rXhi);
   2442          i->ARM64in.VQfromXX.rXlo
   2443             = lookupHRegRemap(m, i->ARM64in.VQfromXX.rXlo);
   2444          return;
   2445       case ARM64in_VXfromQ:
   2446          i->ARM64in.VXfromQ.rX
   2447             = lookupHRegRemap(m, i->ARM64in.VXfromQ.rX);
   2448          i->ARM64in.VXfromQ.rQ
   2449             = lookupHRegRemap(m, i->ARM64in.VXfromQ.rQ);
   2450          return;
   2451       case ARM64in_VXfromDorS:
   2452          i->ARM64in.VXfromDorS.rX
   2453             = lookupHRegRemap(m, i->ARM64in.VXfromDorS.rX);
   2454          i->ARM64in.VXfromDorS.rDorS
   2455             = lookupHRegRemap(m, i->ARM64in.VXfromDorS.rDorS);
   2456          return;
   2457       case ARM64in_VMov:
   2458          i->ARM64in.VMov.dst = lookupHRegRemap(m, i->ARM64in.VMov.dst);
   2459          i->ARM64in.VMov.src = lookupHRegRemap(m, i->ARM64in.VMov.src);
   2460          return;
   2461       case ARM64in_EvCheck:
   2462          /* We expect both amodes only to mention x21, so this is in
   2463             fact pointless, since x21 isn't allocatable, but
   2464             anyway.. */
   2465          mapRegs_ARM64AMode(m, i->ARM64in.EvCheck.amCounter);
   2466          mapRegs_ARM64AMode(m, i->ARM64in.EvCheck.amFailAddr);
   2467          return;
   2468       case ARM64in_ProfInc:
   2469          /* hardwires x8 and x9 -- nothing to modify. */
   2470          return;
   2471       default:
   2472          ppARM64Instr(i);
   2473          vpanic("mapRegs_ARM64Instr");
   2474    }
   2475 }
   2476 
   2477 /* Figure out if i represents a reg-reg move, and if so assign the
   2478    source and destination to *src and *dst.  If in doubt say No.  Used
   2479    by the register allocator to do move coalescing.
   2480 */
   2481 Bool isMove_ARM64Instr ( const ARM64Instr* i, HReg* src, HReg* dst )
   2482 {
   2483    switch (i->tag) {
   2484       case ARM64in_MovI:
   2485          *src = i->ARM64in.MovI.src;
   2486          *dst = i->ARM64in.MovI.dst;
   2487          return True;
   2488       case ARM64in_VMov:
   2489          *src = i->ARM64in.VMov.src;
   2490          *dst = i->ARM64in.VMov.dst;
   2491          return True;
   2492       default:
   2493          break;
   2494    }
   2495 
   2496    return False;
   2497 }
   2498 
   2499 
   2500 /* Generate arm spill/reload instructions under the direction of the
   2501    register allocator.  Note it's critical these don't write the
   2502    condition codes. */
   2503 
   2504 void genSpill_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
   2505                       HReg rreg, Int offsetB, Bool mode64 )
   2506 {
   2507    HRegClass rclass;
   2508    vassert(offsetB >= 0);
   2509    vassert(!hregIsVirtual(rreg));
   2510    vassert(mode64 == True);
   2511    *i1 = *i2 = NULL;
   2512    rclass = hregClass(rreg);
   2513    switch (rclass) {
   2514       case HRcInt64:
   2515          vassert(0 == (offsetB & 7));
   2516          offsetB >>= 3;
   2517          vassert(offsetB < 4096);
   2518          *i1 = ARM64Instr_LdSt64(
   2519                   False/*!isLoad*/,
   2520                   rreg,
   2521                   ARM64AMode_RI12(hregARM64_X21(), offsetB, 8)
   2522                );
   2523          return;
   2524       case HRcFlt64:
   2525          vassert(0 == (offsetB & 7));
   2526          vassert(offsetB >= 0 && offsetB < 32768);
   2527          *i1 = ARM64Instr_VLdStD(False/*!isLoad*/,
   2528                                  rreg, hregARM64_X21(), offsetB);
   2529          return;
   2530       case HRcVec128: {
   2531          HReg x21  = hregARM64_X21();  // baseblock
   2532          HReg x9   = hregARM64_X9();   // spill temporary
   2533          vassert(0 == (offsetB & 15)); // check sane alignment
   2534          vassert(offsetB < 4096);
   2535          *i1 = ARM64Instr_Arith(x9, x21, ARM64RIA_I12(offsetB, 0), True);
   2536          *i2 = ARM64Instr_VLdStQ(False/*!isLoad*/, rreg, x9);
   2537          return;
   2538       }
   2539       default:
   2540          ppHRegClass(rclass);
   2541          vpanic("genSpill_ARM: unimplemented regclass");
   2542    }
   2543 }
   2544 
   2545 void genReload_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
   2546                        HReg rreg, Int offsetB, Bool mode64 )
   2547 {
   2548    HRegClass rclass;
   2549    vassert(offsetB >= 0);
   2550    vassert(!hregIsVirtual(rreg));
   2551    vassert(mode64 == True);
   2552    *i1 = *i2 = NULL;
   2553    rclass = hregClass(rreg);
   2554    switch (rclass) {
   2555       case HRcInt64:
   2556          vassert(0 == (offsetB & 7));
   2557          offsetB >>= 3;
   2558          vassert(offsetB < 4096);
   2559          *i1 = ARM64Instr_LdSt64(
   2560                   True/*isLoad*/,
   2561                   rreg,
   2562                   ARM64AMode_RI12(hregARM64_X21(), offsetB, 8)
   2563                );
   2564          return;
   2565       case HRcFlt64:
   2566          vassert(0 == (offsetB & 7));
   2567          vassert(offsetB >= 0 && offsetB < 32768);
   2568          *i1 = ARM64Instr_VLdStD(True/*isLoad*/,
   2569                                  rreg, hregARM64_X21(), offsetB);
   2570          return;
   2571       case HRcVec128: {
   2572          HReg x21  = hregARM64_X21();  // baseblock
   2573          HReg x9   = hregARM64_X9();   // spill temporary
   2574          vassert(0 == (offsetB & 15)); // check sane alignment
   2575          vassert(offsetB < 4096);
   2576          *i1 = ARM64Instr_Arith(x9, x21, ARM64RIA_I12(offsetB, 0), True);
   2577          *i2 = ARM64Instr_VLdStQ(True/*isLoad*/, rreg, x9);
   2578          return;
   2579       }
   2580       default:
   2581          ppHRegClass(rclass);
   2582          vpanic("genReload_ARM: unimplemented regclass");
   2583    }
   2584 }
   2585 
   2586 
   2587 /* Emit an instruction into buf and return the number of bytes used.
   2588    Note that buf is not the insn's final place, and therefore it is
   2589    imperative to emit position-independent code. */
   2590 
   2591 static inline UInt iregEnc ( HReg r )
   2592 {
   2593    UInt n;
   2594    vassert(hregClass(r) == HRcInt64);
   2595    vassert(!hregIsVirtual(r));
   2596    n = hregEncoding(r);
   2597    vassert(n <= 30);
   2598    return n;
   2599 }
   2600 
   2601 static inline UInt dregEnc ( HReg r )
   2602 {
   2603    UInt n;
   2604    vassert(hregClass(r) == HRcFlt64);
   2605    vassert(!hregIsVirtual(r));
   2606    n = hregEncoding(r);
   2607    vassert(n <= 31);
   2608    return n;
   2609 }
   2610 
   2611 static inline UInt qregEnc ( HReg r )
   2612 {
   2613    UInt n;
   2614    vassert(hregClass(r) == HRcVec128);
   2615    vassert(!hregIsVirtual(r));
   2616    n = hregEncoding(r);
   2617    vassert(n <= 31);
   2618    return n;
   2619 }
   2620 
   2621 #define BITS4(zzb3,zzb2,zzb1,zzb0) \
   2622    (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
   2623 
   2624 #define X00  BITS4(0,0, 0,0)
   2625 #define X01  BITS4(0,0, 0,1)
   2626 #define X10  BITS4(0,0, 1,0)
   2627 #define X11  BITS4(0,0, 1,1)
   2628 
   2629 #define X000 BITS4(0, 0,0,0)
   2630 #define X001 BITS4(0, 0,0,1)
   2631 #define X010 BITS4(0, 0,1,0)
   2632 #define X011 BITS4(0, 0,1,1)
   2633 #define X100 BITS4(0, 1,0,0)
   2634 #define X101 BITS4(0, 1,0,1)
   2635 #define X110 BITS4(0, 1,1,0)
   2636 #define X111 BITS4(0, 1,1,1)
   2637 
   2638 #define X0000 BITS4(0,0,0,0)
   2639 #define X0001 BITS4(0,0,0,1)
   2640 #define X0010 BITS4(0,0,1,0)
   2641 #define X0011 BITS4(0,0,1,1)
   2642 
   2643 #define BITS8(zzb7,zzb6,zzb5,zzb4,zzb3,zzb2,zzb1,zzb0) \
   2644   ((BITS4(zzb7,zzb6,zzb5,zzb4) << 4) | BITS4(zzb3,zzb2,zzb1,zzb0))
   2645 
   2646 #define X00000   BITS8(0,0,0, 0,0,0,0,0)
   2647 #define X00001   BITS8(0,0,0, 0,0,0,0,1)
   2648 #define X00110   BITS8(0,0,0, 0,0,1,1,0)
   2649 #define X00111   BITS8(0,0,0, 0,0,1,1,1)
   2650 #define X01000   BITS8(0,0,0, 0,1,0,0,0)
   2651 #define X10000   BITS8(0,0,0, 1,0,0,0,0)
   2652 #define X11000   BITS8(0,0,0, 1,1,0,0,0)
   2653 #define X11110   BITS8(0,0,0, 1,1,1,1,0)
   2654 #define X11111   BITS8(0,0,0, 1,1,1,1,1)
   2655 
   2656 #define X000000  BITS8(0,0, 0,0,0,0,0,0)
   2657 #define X000001  BITS8(0,0, 0,0,0,0,0,1)
   2658 #define X000010  BITS8(0,0, 0,0,0,0,1,0)
   2659 #define X000011  BITS8(0,0, 0,0,0,0,1,1)
   2660 #define X000100  BITS8(0,0, 0,0,0,1,0,0)
   2661 #define X000110  BITS8(0,0, 0,0,0,1,1,0)
   2662 #define X000111  BITS8(0,0, 0,0,0,1,1,1)
   2663 #define X001000  BITS8(0,0, 0,0,1,0,0,0)
   2664 #define X001001  BITS8(0,0, 0,0,1,0,0,1)
   2665 #define X001010  BITS8(0,0, 0,0,1,0,1,0)
   2666 #define X001011  BITS8(0,0, 0,0,1,0,1,1)
   2667 #define X001101  BITS8(0,0, 0,0,1,1,0,1)
   2668 #define X001110  BITS8(0,0, 0,0,1,1,1,0)
   2669 #define X001111  BITS8(0,0, 0,0,1,1,1,1)
   2670 #define X010000  BITS8(0,0, 0,1,0,0,0,0)
   2671 #define X010001  BITS8(0,0, 0,1,0,0,0,1)
   2672 #define X010010  BITS8(0,0, 0,1,0,0,1,0)
   2673 #define X010011  BITS8(0,0, 0,1,0,0,1,1)
   2674 #define X010101  BITS8(0,0, 0,1,0,1,0,1)
   2675 #define X010110  BITS8(0,0, 0,1,0,1,1,0)
   2676 #define X010111  BITS8(0,0, 0,1,0,1,1,1)
   2677 #define X011001  BITS8(0,0, 0,1,1,0,0,1)
   2678 #define X011010  BITS8(0,0, 0,1,1,0,1,0)
   2679 #define X011011  BITS8(0,0, 0,1,1,0,1,1)
   2680 #define X011101  BITS8(0,0, 0,1,1,1,0,1)
   2681 #define X011110  BITS8(0,0, 0,1,1,1,1,0)
   2682 #define X011111  BITS8(0,0, 0,1,1,1,1,1)
   2683 #define X100001  BITS8(0,0, 1,0,0,0,0,1)
   2684 #define X100011  BITS8(0,0, 1,0,0,0,1,1)
   2685 #define X100100  BITS8(0,0, 1,0,0,1,0,0)
   2686 #define X100101  BITS8(0,0, 1,0,0,1,0,1)
   2687 #define X100110  BITS8(0,0, 1,0,0,1,1,0)
   2688 #define X100111  BITS8(0,0, 1,0,0,1,1,1)
   2689 #define X101101  BITS8(0,0, 1,0,1,1,0,1)
   2690 #define X101110  BITS8(0,0, 1,0,1,1,1,0)
   2691 #define X110000  BITS8(0,0, 1,1,0,0,0,0)
   2692 #define X110001  BITS8(0,0, 1,1,0,0,0,1)
   2693 #define X110010  BITS8(0,0, 1,1,0,0,1,0)
   2694 #define X110100  BITS8(0,0, 1,1,0,1,0,0)
   2695 #define X110101  BITS8(0,0, 1,1,0,1,0,1)
   2696 #define X110110  BITS8(0,0, 1,1,0,1,1,0)
   2697 #define X110111  BITS8(0,0, 1,1,0,1,1,1)
   2698 #define X111000  BITS8(0,0, 1,1,1,0,0,0)
   2699 #define X111001  BITS8(0,0, 1,1,1,0,0,1)
   2700 #define X111101  BITS8(0,0, 1,1,1,1,0,1)
   2701 #define X111110  BITS8(0,0, 1,1,1,1,1,0)
   2702 #define X111111  BITS8(0,0, 1,1,1,1,1,1)
   2703 
   2704 #define X0001000  BITS8(0, 0,0,0,1,0,0,0)
   2705 #define X0010000  BITS8(0, 0,0,1,0,0,0,0)
   2706 #define X0100000  BITS8(0, 0,1,0,0,0,0,0)
   2707 #define X1000000  BITS8(0, 1,0,0,0,0,0,0)
   2708 
   2709 #define X00100000  BITS8(0,0,1,0,0,0,0,0)
   2710 #define X00100001  BITS8(0,0,1,0,0,0,0,1)
   2711 #define X00100010  BITS8(0,0,1,0,0,0,1,0)
   2712 #define X00100011  BITS8(0,0,1,0,0,0,1,1)
   2713 #define X01010000  BITS8(0,1,0,1,0,0,0,0)
   2714 #define X01010001  BITS8(0,1,0,1,0,0,0,1)
   2715 #define X01010100  BITS8(0,1,0,1,0,1,0,0)
   2716 #define X01011000  BITS8(0,1,0,1,1,0,0,0)
   2717 #define X01100000  BITS8(0,1,1,0,0,0,0,0)
   2718 #define X01100001  BITS8(0,1,1,0,0,0,0,1)
   2719 #define X01100010  BITS8(0,1,1,0,0,0,1,0)
   2720 #define X01100011  BITS8(0,1,1,0,0,0,1,1)
   2721 #define X01110000  BITS8(0,1,1,1,0,0,0,0)
   2722 #define X01110001  BITS8(0,1,1,1,0,0,0,1)
   2723 #define X01110010  BITS8(0,1,1,1,0,0,1,0)
   2724 #define X01110011  BITS8(0,1,1,1,0,0,1,1)
   2725 #define X01110100  BITS8(0,1,1,1,0,1,0,0)
   2726 #define X01110101  BITS8(0,1,1,1,0,1,0,1)
   2727 #define X01110110  BITS8(0,1,1,1,0,1,1,0)
   2728 #define X01110111  BITS8(0,1,1,1,0,1,1,1)
   2729 #define X11000001  BITS8(1,1,0,0,0,0,0,1)
   2730 #define X11000011  BITS8(1,1,0,0,0,0,1,1)
   2731 #define X11010100  BITS8(1,1,0,1,0,1,0,0)
   2732 #define X11010110  BITS8(1,1,0,1,0,1,1,0)
   2733 #define X11011000  BITS8(1,1,0,1,1,0,0,0)
   2734 #define X11011010  BITS8(1,1,0,1,1,0,1,0)
   2735 #define X11011110  BITS8(1,1,0,1,1,1,1,0)
   2736 #define X11100010  BITS8(1,1,1,0,0,0,1,0)
   2737 #define X11110001  BITS8(1,1,1,1,0,0,0,1)
   2738 #define X11110011  BITS8(1,1,1,1,0,0,1,1)
   2739 #define X11110101  BITS8(1,1,1,1,0,1,0,1)
   2740 #define X11110111  BITS8(1,1,1,1,0,1,1,1)
   2741 
   2742 
   2743 /* --- 4 fields --- */
   2744 
   2745 static inline UInt X_8_19_1_4 ( UInt f1, UInt f2, UInt f3, UInt f4 ) {
   2746    vassert(8+19+1+4 == 32);
   2747    vassert(f1 < (1<<8));
   2748    vassert(f2 < (1<<19));
   2749    vassert(f3 < (1<<1));
   2750    vassert(f4 < (1<<4));
   2751    UInt w = 0;
   2752    w = (w <<  8) | f1;
   2753    w = (w << 19) | f2;
   2754    w = (w <<  1) | f3;
   2755    w = (w <<  4) | f4;
   2756    return w;
   2757 }
   2758 
   2759 /* --- 5 fields --- */
   2760 
   2761 static inline UInt X_3_6_2_16_5 ( UInt f1, UInt f2,
   2762                                   UInt f3, UInt f4, UInt f5 ) {
   2763    vassert(3+6+2+16+5 == 32);
   2764    vassert(f1 < (1<<3));
   2765    vassert(f2 < (1<<6));
   2766    vassert(f3 < (1<<2));
   2767    vassert(f4 < (1<<16));
   2768    vassert(f5 < (1<<5));
   2769    UInt w = 0;
   2770    w = (w <<  3) | f1;
   2771    w = (w <<  6) | f2;
   2772    w = (w <<  2) | f3;
   2773    w = (w << 16) | f4;
   2774    w = (w <<  5) | f5;
   2775    return w;
   2776 }
   2777 
   2778 /* --- 6 fields --- */
   2779 
   2780 static inline UInt X_2_6_2_12_5_5 ( UInt f1, UInt f2, UInt f3,
   2781                                     UInt f4, UInt f5, UInt f6 ) {
   2782    vassert(2+6+2+12+5+5 == 32);
   2783    vassert(f1 < (1<<2));
   2784    vassert(f2 < (1<<6));
   2785    vassert(f3 < (1<<2));
   2786    vassert(f4 < (1<<12));
   2787    vassert(f5 < (1<<5));
   2788    vassert(f6 < (1<<5));
   2789    UInt w = 0;
   2790    w = (w <<  2) | f1;
   2791    w = (w <<  6) | f2;
   2792    w = (w <<  2) | f3;
   2793    w = (w << 12) | f4;
   2794    w = (w <<  5) | f5;
   2795    w = (w <<  5) | f6;
   2796    return w;
   2797 }
   2798 
   2799 static inline UInt X_3_8_5_6_5_5 ( UInt f1, UInt f2, UInt f3,
   2800                                    UInt f4, UInt f5, UInt f6 ) {
   2801    vassert(3+8+5+6+5+5 == 32);
   2802    vassert(f1 < (1<<3));
   2803    vassert(f2 < (1<<8));
   2804    vassert(f3 < (1<<5));
   2805    vassert(f4 < (1<<6));
   2806    vassert(f5 < (1<<5));
   2807    vassert(f6 < (1<<5));
   2808    UInt w = 0;
   2809    w = (w <<  3) | f1;
   2810    w = (w <<  8) | f2;
   2811    w = (w <<  5) | f3;
   2812    w = (w <<  6) | f4;
   2813    w = (w <<  5) | f5;
   2814    w = (w <<  5) | f6;
   2815    return w;
   2816 }
   2817 
   2818 static inline UInt X_3_5_8_6_5_5 ( UInt f1, UInt f2, UInt f3,
   2819                                    UInt f4, UInt f5, UInt f6 ) {
   2820    vassert(3+8+5+6+5+5 == 32);
   2821    vassert(f1 < (1<<3));
   2822    vassert(f2 < (1<<5));
   2823    vassert(f3 < (1<<8));
   2824    vassert(f4 < (1<<6));
   2825    vassert(f5 < (1<<5));
   2826    vassert(f6 < (1<<5));
   2827    UInt w = 0;
   2828    w = (w <<  3) | f1;
   2829    w = (w <<  5) | f2;
   2830    w = (w <<  8) | f3;
   2831    w = (w <<  6) | f4;
   2832    w = (w <<  5) | f5;
   2833    w = (w <<  5) | f6;
   2834    return w;
   2835 }
   2836 
   2837 static inline UInt X_3_6_7_6_5_5 ( UInt f1, UInt f2, UInt f3,
   2838                                    UInt f4, UInt f5, UInt f6 ) {
   2839    vassert(3+6+7+6+5+5 == 32);
   2840    vassert(f1 < (1<<3));
   2841    vassert(f2 < (1<<6));
   2842    vassert(f3 < (1<<7));
   2843    vassert(f4 < (1<<6));
   2844    vassert(f5 < (1<<5));
   2845    vassert(f6 < (1<<5));
   2846    UInt w = 0;
   2847    w = (w <<  3) | f1;
   2848    w = (w <<  6) | f2;
   2849    w = (w <<  7) | f3;
   2850    w = (w <<  6) | f4;
   2851    w = (w <<  5) | f5;
   2852    w = (w <<  5) | f6;
   2853    return w;
   2854 }
   2855 
   2856 /* --- 7 fields --- */
   2857 
   2858 static inline UInt X_2_6_3_9_2_5_5 ( UInt f1, UInt f2, UInt f3,
   2859                                      UInt f4, UInt f5, UInt f6, UInt f7 ) {
   2860    vassert(2+6+3+9+2+5+5 == 32);
   2861    vassert(f1 < (1<<2));
   2862    vassert(f2 < (1<<6));
   2863    vassert(f3 < (1<<3));
   2864    vassert(f4 < (1<<9));
   2865    vassert(f5 < (1<<2));
   2866    vassert(f6 < (1<<5));
   2867    vassert(f7 < (1<<5));
   2868    UInt w = 0;
   2869    w = (w << 2) | f1;
   2870    w = (w << 6) | f2;
   2871    w = (w << 3) | f3;
   2872    w = (w << 9) | f4;
   2873    w = (w << 2) | f5;
   2874    w = (w << 5) | f6;
   2875    w = (w << 5) | f7;
   2876    return w;
   2877 }
   2878 
   2879 static inline UInt X_3_6_1_6_6_5_5 ( UInt f1, UInt f2, UInt f3,
   2880                                      UInt f4, UInt f5, UInt f6, UInt f7 ) {
   2881    vassert(3+6+1+6+6+5+5 == 32);
   2882    vassert(f1 < (1<<3));
   2883    vassert(f2 < (1<<6));
   2884    vassert(f3 < (1<<1));
   2885    vassert(f4 < (1<<6));
   2886    vassert(f5 < (1<<6));
   2887    vassert(f6 < (1<<5));
   2888    vassert(f7 < (1<<5));
   2889    UInt w = 0;
   2890    w = (w << 3) | f1;
   2891    w = (w << 6) | f2;
   2892    w = (w << 1) | f3;
   2893    w = (w << 6) | f4;
   2894    w = (w << 6) | f5;
   2895    w = (w << 5) | f6;
   2896    w = (w << 5) | f7;
   2897    return w;
   2898 }
   2899 
   2900 
   2901 //ZZ #define X0000  BITS4(0,0,0,0)
   2902 //ZZ #define X0001  BITS4(0,0,0,1)
   2903 //ZZ #define X0010  BITS4(0,0,1,0)
   2904 //ZZ #define X0011  BITS4(0,0,1,1)
   2905 //ZZ #define X0100  BITS4(0,1,0,0)
   2906 //ZZ #define X0101  BITS4(0,1,0,1)
   2907 //ZZ #define X0110  BITS4(0,1,1,0)
   2908 //ZZ #define X0111  BITS4(0,1,1,1)
   2909 //ZZ #define X1000  BITS4(1,0,0,0)
   2910 //ZZ #define X1001  BITS4(1,0,0,1)
   2911 //ZZ #define X1010  BITS4(1,0,1,0)
   2912 //ZZ #define X1011  BITS4(1,0,1,1)
   2913 //ZZ #define X1100  BITS4(1,1,0,0)
   2914 //ZZ #define X1101  BITS4(1,1,0,1)
   2915 //ZZ #define X1110  BITS4(1,1,1,0)
   2916 //ZZ #define X1111  BITS4(1,1,1,1)
   2917 /*
   2918 #define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
   2919    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   2920     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2921     (((zzx3) & 0xF) << 12))
   2922 
   2923 #define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2)        \
   2924    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   2925     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2926     (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8))
   2927 
   2928 #define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0)        \
   2929    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   2930     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2931     (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) <<  0))
   2932 
   2933 #define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
   2934   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
   2935    (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
   2936    (((zzx0) & 0xF) << 0))
   2937 
   2938 #define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0)  \
   2939    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
   2940     (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
   2941     (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8) |  \
   2942     (((zzx1) & 0xF) <<  4) | (((zzx0) & 0xF) <<  0))
   2943 
   2944 #define XX______(zzx7,zzx6) \
   2945    ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
   2946 */
   2947 
   2948 
   2949 /* Get an immediate into a register, using only that register. */
   2950 static UInt* imm64_to_ireg ( UInt* p, Int xD, ULong imm64 )
   2951 {
   2952    if (imm64 == 0) {
   2953       // This has to be special-cased, since the logic below
   2954       // will leave the register unchanged in this case.
   2955       // MOVZ xD, #0, LSL #0
   2956       *p++ = X_3_6_2_16_5(X110, X100101, X00, 0/*imm16*/, xD);
   2957       return p;
   2958    }
   2959 
   2960    // There must be at least one non-zero halfword.  Find the
   2961    // lowest nonzero such, and use MOVZ to install it and zero
   2962    // out the rest of the register.
   2963    UShort h[4];
   2964    h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
   2965    h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
   2966    h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
   2967    h[0] = (UShort)((imm64 >>  0) & 0xFFFF);
   2968 
   2969    UInt i;
   2970    for (i = 0; i < 4; i++) {
   2971       if (h[i] != 0)
   2972          break;
   2973    }
   2974    vassert(i < 4);
   2975 
   2976    // MOVZ xD, h[i], LSL (16*i)
   2977    *p++ = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
   2978 
   2979    // Work on upwards through h[i], using MOVK to stuff in any
   2980    // remaining nonzero elements.
   2981    i++;
   2982    for (; i < 4; i++) {
   2983       if (h[i] == 0)
   2984          continue;
   2985       // MOVK xD, h[i], LSL (16*i)
   2986       *p++ = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
   2987    }
   2988 
   2989    return p;
   2990 }
   2991 
   2992 /* Get an immediate into a register, using only that register, and
   2993    generating exactly 4 instructions, regardless of the value of the
   2994    immediate. This is used when generating sections of code that need
   2995    to be patched later, so as to guarantee a specific size. */
   2996 static UInt* imm64_to_ireg_EXACTLY4 ( UInt* p, Int xD, ULong imm64 )
   2997 {
   2998    UShort h[4];
   2999    h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
   3000    h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
   3001    h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
   3002    h[0] = (UShort)((imm64 >>  0) & 0xFFFF);
   3003    // Work on upwards through h[i], using MOVK to stuff in the
   3004    // remaining elements.
   3005    UInt i;
   3006    for (i = 0; i < 4; i++) {
   3007       if (i == 0) {
   3008          // MOVZ xD, h[0], LSL (16*0)
   3009          *p++ = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
   3010       } else {
   3011          // MOVK xD, h[i], LSL (16*i)
   3012          *p++ = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
   3013       }
   3014    }
   3015    return p;
   3016 }
   3017 
   3018 /* Check whether p points at a 4-insn sequence cooked up by
   3019    imm64_to_ireg_EXACTLY4(). */
   3020 static Bool is_imm64_to_ireg_EXACTLY4 ( UInt* p, Int xD, ULong imm64 )
   3021 {
   3022    UShort h[4];
   3023    h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
   3024    h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
   3025    h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
   3026    h[0] = (UShort)((imm64 >>  0) & 0xFFFF);
   3027    // Work on upwards through h[i], using MOVK to stuff in the
   3028    // remaining elements.
   3029    UInt i;
   3030    for (i = 0; i < 4; i++) {
   3031       UInt expected;
   3032       if (i == 0) {
   3033          // MOVZ xD, h[0], LSL (16*0)
   3034          expected = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
   3035       } else {
   3036          // MOVK xD, h[i], LSL (16*i)
   3037          expected = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
   3038       }
   3039       if (p[i] != expected)
   3040          return False;
   3041    }
   3042    return True;
   3043 }
   3044 
   3045 
   3046 /* Generate a 8 bit store or 8-to-64 unsigned widening load from/to
   3047    rD, using the given amode for the address. */
   3048 static UInt* do_load_or_store8 ( UInt* p,
   3049                                  Bool isLoad, UInt wD, ARM64AMode* am )
   3050 {
   3051    vassert(wD <= 30);
   3052    if (am->tag == ARM64am_RI9) {
   3053       /* STURB Wd, [Xn|SP + simm9]:  00 111000 000 simm9 00 n d
   3054          LDURB Wd, [Xn|SP + simm9]:  00 111000 010 simm9 00 n d
   3055       */
   3056       Int simm9 = am->ARM64am.RI9.simm9;
   3057       vassert(-256 <= simm9 && simm9 <= 255);
   3058       UInt instr = X_2_6_3_9_2_5_5(X00, X111000, isLoad ? X010 : X000,
   3059                                    simm9 & 0x1FF, X00,
   3060                                    iregEnc(am->ARM64am.RI9.reg), wD);
   3061       *p++ = instr;
   3062       return p;
   3063    }
   3064    if (am->tag == ARM64am_RI12) {
   3065       /* STRB Wd, [Xn|SP + uimm12 * 1]:  00 111 001 00 imm12 n d
   3066          LDRB Wd, [Xn|SP + uimm12 * 1]:  00 111 001 01 imm12 n d
   3067       */
   3068       UInt uimm12 = am->ARM64am.RI12.uimm12;
   3069       UInt scale  = am->ARM64am.RI12.szB;
   3070       vassert(scale == 1); /* failure of this is serious.  Do not ignore. */
   3071       UInt xN    = iregEnc(am->ARM64am.RI12.reg);
   3072       vassert(xN <= 30);
   3073       UInt instr = X_2_6_2_12_5_5(X00, X111001, isLoad ? X01 : X00,
   3074                                   uimm12, xN, wD);
   3075       *p++ = instr;
   3076       return p;
   3077    }
   3078    if (am->tag == ARM64am_RR) {
   3079       /* STRB Xd, [Xn|SP, Xm]: 00 111 000 001 m 011 0 10 n d
   3080          LDRB Xd, [Xn|SP, Xm]: 00 111 000 011 m 011 0 10 n d
   3081       */
   3082       UInt xN = iregEnc(am->ARM64am.RR.base);
   3083       UInt xM = iregEnc(am->ARM64am.RR.index);
   3084       vassert(xN <= 30);
   3085       UInt instr = X_3_8_5_6_5_5(X001, isLoad ? X11000011 : X11000001,
   3086                                  xM, X011010, xN, wD);
   3087       *p++ = instr;
   3088       return p;
   3089    }
   3090    vpanic("do_load_or_store8");
   3091    vassert(0);
   3092 }
   3093 
   3094 
   3095 /* Generate a 16 bit store or 16-to-64 unsigned widening load from/to
   3096    rD, using the given amode for the address. */
   3097 static UInt* do_load_or_store16 ( UInt* p,
   3098                                   Bool isLoad, UInt wD, ARM64AMode* am )
   3099 {
   3100    vassert(wD <= 30);
   3101    if (am->tag == ARM64am_RI9) {
   3102       /* STURH Wd, [Xn|SP + simm9]:  01 111000 000 simm9 00 n d
   3103          LDURH Wd, [Xn|SP + simm9]:  01 111000 010 simm9 00 n d
   3104       */
   3105       Int simm9 = am->ARM64am.RI9.simm9;
   3106       vassert(-256 <= simm9 && simm9 <= 255);
   3107       UInt instr = X_2_6_3_9_2_5_5(X01, X111000, isLoad ? X010 : X000,
   3108                                    simm9 & 0x1FF, X00,
   3109                                    iregEnc(am->ARM64am.RI9.reg), wD);
   3110       *p++ = instr;
   3111       return p;
   3112    }
   3113    if (am->tag == ARM64am_RI12) {
   3114       /* STRH Wd, [Xn|SP + uimm12 * 2]:  01 111 001 00 imm12 n d
   3115          LDRH Wd, [Xn|SP + uimm12 * 2]:  01 111 001 01 imm12 n d
   3116       */
   3117       UInt uimm12 = am->ARM64am.RI12.uimm12;
   3118       UInt scale  = am->ARM64am.RI12.szB;
   3119       vassert(scale == 2); /* failure of this is serious.  Do not ignore. */
   3120       UInt xN    = iregEnc(am->ARM64am.RI12.reg);
   3121       vassert(xN <= 30);
   3122       UInt instr = X_2_6_2_12_5_5(X01, X111001, isLoad ? X01 : X00,
   3123                                   uimm12, xN, wD);
   3124       *p++ = instr;
   3125       return p;
   3126    }
   3127    if (am->tag == ARM64am_RR) {
   3128       /* STRH Xd, [Xn|SP, Xm]: 01 111 000 001 m 011 0 10 n d
   3129          LDRH Xd, [Xn|SP, Xm]: 01 111 000 011 m 011 0 10 n d
   3130       */
   3131       UInt xN = iregEnc(am->ARM64am.RR.base);
   3132       UInt xM = iregEnc(am->ARM64am.RR.index);
   3133       vassert(xN <= 30);
   3134       UInt instr = X_3_8_5_6_5_5(X011, isLoad ? X11000011 : X11000001,
   3135                                  xM, X011010, xN, wD);
   3136       *p++ = instr;
   3137       return p;
   3138    }
   3139    vpanic("do_load_or_store16");
   3140    vassert(0);
   3141 }
   3142 
   3143 
   3144 /* Generate a 32 bit store or 32-to-64 unsigned widening load from/to
   3145    rD, using the given amode for the address. */
   3146 static UInt* do_load_or_store32 ( UInt* p,
   3147                                   Bool isLoad, UInt wD, ARM64AMode* am )
   3148 {
   3149    vassert(wD <= 30);
   3150    if (am->tag == ARM64am_RI9) {
   3151       /* STUR Wd, [Xn|SP + simm9]:  10 111000 000 simm9 00 n d
   3152          LDUR Wd, [Xn|SP + simm9]:  10 111000 010 simm9 00 n d
   3153       */
   3154       Int simm9 = am->ARM64am.RI9.simm9;
   3155       vassert(-256 <= simm9 && simm9 <= 255);
   3156       UInt instr = X_2_6_3_9_2_5_5(X10, X111000, isLoad ? X010 : X000,
   3157                                    simm9 & 0x1FF, X00,
   3158                                    iregEnc(am->ARM64am.RI9.reg), wD);
   3159       *p++ = instr;
   3160       return p;
   3161    }
   3162    if (am->tag == ARM64am_RI12) {
   3163       /* STR Wd, [Xn|SP + uimm12 * 4]:  10 111 001 00 imm12 n d
   3164          LDR Wd, [Xn|SP + uimm12 * 4]:  10 111 001 01 imm12 n d
   3165       */
   3166       UInt uimm12 = am->ARM64am.RI12.uimm12;
   3167       UInt scale  = am->ARM64am.RI12.szB;
   3168       vassert(scale == 4); /* failure of this is serious.  Do not ignore. */
   3169       UInt xN    = iregEnc(am->ARM64am.RI12.reg);
   3170       vassert(xN <= 30);
   3171       UInt instr = X_2_6_2_12_5_5(X10, X111001, isLoad ? X01 : X00,
   3172                                   uimm12, xN, wD);
   3173       *p++ = instr;
   3174       return p;
   3175    }
   3176    if (am->tag == ARM64am_RR) {
   3177       /* STR Wd, [Xn|SP, Xm]: 10 111 000 001 m 011 0 10 n d
   3178          LDR Wd, [Xn|SP, Xm]: 10 111 000 011 m 011 0 10 n d
   3179       */
   3180       UInt xN = iregEnc(am->ARM64am.RR.base);
   3181       UInt xM = iregEnc(am->ARM64am.RR.index);
   3182       vassert(xN <= 30);
   3183       UInt instr = X_3_8_5_6_5_5(X101, isLoad ? X11000011 : X11000001,
   3184                                  xM, X011010, xN, wD);
   3185       *p++ = instr;
   3186       return p;
   3187    }
   3188    vpanic("do_load_or_store32");
   3189    vassert(0);
   3190 }
   3191 
   3192 
   3193 /* Generate a 64 bit load or store to/from xD, using the given amode
   3194    for the address. */
   3195 static UInt* do_load_or_store64 ( UInt* p,
   3196                                   Bool isLoad, UInt xD, ARM64AMode* am )
   3197 {
   3198    /* In all these cases, Rn can't be 31 since that means SP. */
   3199    vassert(xD <= 30);
   3200    if (am->tag == ARM64am_RI9) {
   3201       /* STUR Xd, [Xn|SP + simm9]:  11 111000 000 simm9 00 n d
   3202          LDUR Xd, [Xn|SP + simm9]:  11 111000 010 simm9 00 n d
   3203       */
   3204       Int simm9 = am->ARM64am.RI9.simm9;
   3205       vassert(-256 <= simm9 && simm9 <= 255);
   3206       UInt xN = iregEnc(am->ARM64am.RI9.reg);
   3207       vassert(xN <= 30);
   3208       UInt instr = X_2_6_3_9_2_5_5(X11, X111000, isLoad ? X010 : X000,
   3209                                    simm9 & 0x1FF, X00, xN, xD);
   3210       *p++ = instr;
   3211       return p;
   3212    }
   3213    if (am->tag == ARM64am_RI12) {
   3214       /* STR Xd, [Xn|SP + uimm12 * 8]:  11 111 001 00 imm12 n d
   3215          LDR Xd, [Xn|SP + uimm12 * 8]:  11 111 001 01 imm12 n d
   3216       */
   3217       UInt uimm12 = am->ARM64am.RI12.uimm12;
   3218       UInt scale  = am->ARM64am.RI12.szB;
   3219       vassert(scale == 8); /* failure of this is serious.  Do not ignore. */
   3220       UInt xN    = iregEnc(am->ARM64am.RI12.reg);
   3221       vassert(xN <= 30);
   3222       UInt instr = X_2_6_2_12_5_5(X11, X111001, isLoad ? X01 : X00,
   3223                                   uimm12, xN, xD);
   3224       *p++ = instr;
   3225       return p;
   3226    }
   3227    if (am->tag == ARM64am_RR) {
   3228       /* STR Xd, [Xn|SP, Xm]: 11 111 000 001 m 011 0 10 n d
   3229          LDR Xd, [Xn|SP, Xm]: 11 111 000 011 m 011 0 10 n d
   3230       */
   3231       UInt xN = iregEnc(am->ARM64am.RR.base);
   3232       UInt xM = iregEnc(am->ARM64am.RR.index);
   3233       vassert(xN <= 30);
   3234       UInt instr = X_3_8_5_6_5_5(X111, isLoad ? X11000011 : X11000001,
   3235                                  xM, X011010, xN, xD);
   3236       *p++ = instr;
   3237       return p;
   3238    }
   3239    vpanic("do_load_or_store64");
   3240    vassert(0);
   3241 }
   3242 
   3243 
   3244 /* Emit an instruction into buf and return the number of bytes used.
   3245    Note that buf is not the insn's final place, and therefore it is
   3246    imperative to emit position-independent code.  If the emitted
   3247    instruction was a profiler inc, set *is_profInc to True, else
   3248    leave it unchanged. */
   3249 
   3250 Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
   3251                       UChar* buf, Int nbuf, const ARM64Instr* i,
   3252                       Bool mode64, VexEndness endness_host,
   3253                       const void* disp_cp_chain_me_to_slowEP,
   3254                       const void* disp_cp_chain_me_to_fastEP,
   3255                       const void* disp_cp_xindir,
   3256                       const void* disp_cp_xassisted )
   3257 {
   3258    UInt* p = (UInt*)buf;
   3259    vassert(nbuf >= 32);
   3260    vassert(mode64 == True);
   3261    vassert(0 == (((HWord)buf) & 3));
   3262 
   3263    switch (i->tag) {
   3264       case ARM64in_Arith: {
   3265          UInt      rD   = iregEnc(i->ARM64in.Arith.dst);
   3266          UInt      rN   = iregEnc(i->ARM64in.Arith.argL);
   3267          ARM64RIA* argR = i->ARM64in.Arith.argR;
   3268          switch (argR->tag) {
   3269             case ARM64riA_I12:
   3270                *p++ = X_2_6_2_12_5_5(
   3271                          i->ARM64in.Arith.isAdd ? X10 : X11,
   3272                          X010001,
   3273                          argR->ARM64riA.I12.shift == 12 ? X01 : X00,
   3274                          argR->ARM64riA.I12.imm12, rN, rD
   3275                       );
   3276                break;
   3277             case ARM64riA_R: {
   3278                UInt rM = iregEnc(i->ARM64in.Arith.argR->ARM64riA.R.reg);
   3279                *p++ = X_3_8_5_6_5_5(
   3280                          i->ARM64in.Arith.isAdd ? X100 : X110,
   3281                          X01011000, rM, X000000, rN, rD
   3282                       );
   3283                break;
   3284             }
   3285             default:
   3286                goto bad;
   3287          }
   3288          goto done;
   3289       }
   3290       case ARM64in_Cmp: {
   3291          UInt      rD   = 31; /* XZR, we are going to dump the result */
   3292          UInt      rN   = iregEnc(i->ARM64in.Cmp.argL);
   3293          ARM64RIA* argR = i->ARM64in.Cmp.argR;
   3294          Bool      is64 = i->ARM64in.Cmp.is64;
   3295          switch (argR->tag) {
   3296             case ARM64riA_I12:
   3297                /* 1 11 10001 sh imm12 Rn Rd = SUBS Xd, Xn, #imm */
   3298                /* 0 11 10001 sh imm12 Rn Rd = SUBS Wd, Wn, #imm */
   3299                *p++ = X_2_6_2_12_5_5(
   3300                          is64 ? X11 : X01, X110001,
   3301                          argR->ARM64riA.I12.shift == 12 ? X01 : X00,
   3302                          argR->ARM64riA.I12.imm12, rN, rD);
   3303                break;
   3304             case ARM64riA_R: {
   3305                /* 1 11 01011 00 0 Rm 000000 Rn Rd = SUBS Xd, Xn, Xm */
   3306                /* 0 11 01011 00 0 Rm 000000 Rn Rd = SUBS Wd, Wn, Wm */
   3307                UInt rM = iregEnc(i->ARM64in.Cmp.argR->ARM64riA.R.reg);
   3308                *p++ = X_3_8_5_6_5_5(is64 ? X111 : X011,
   3309                                     X01011000, rM, X000000, rN, rD);
   3310                break;
   3311             }
   3312             default:
   3313                goto bad;
   3314          }
   3315          goto done;
   3316       }
   3317       case ARM64in_Logic: {
   3318          UInt      rD   = iregEnc(i->ARM64in.Logic.dst);
   3319          UInt      rN   = iregEnc(i->ARM64in.Logic.argL);
   3320          ARM64RIL* argR = i->ARM64in.Logic.argR;
   3321          UInt      opc  = 0; /* invalid */
   3322          vassert(rD < 31);
   3323          vassert(rN < 31);
   3324          switch (i->ARM64in.Logic.op) {
   3325             case ARM64lo_OR:  opc = X101; break;
   3326             case ARM64lo_AND: opc = X100; break;
   3327             case ARM64lo_XOR: opc = X110; break;
   3328             default: break;
   3329          }
   3330          vassert(opc != 0);
   3331          switch (argR->tag) {
   3332             case ARM64riL_I13: {
   3333                /* 1 01 100100 N immR immS Rn Rd = ORR <Xd|Sp>, Xn, #imm */
   3334                /* 1 00 100100 N immR immS Rn Rd = AND <Xd|Sp>, Xn, #imm */
   3335                /* 1 10 100100 N immR immS Rn Rd = EOR <Xd|Sp>, Xn, #imm */
   3336                *p++ = X_3_6_1_6_6_5_5(
   3337                          opc, X100100, argR->ARM64riL.I13.bitN,
   3338                          argR->ARM64riL.I13.immR, argR->ARM64riL.I13.immS,
   3339                          rN, rD
   3340                       );
   3341                break;
   3342             }
   3343             case ARM64riL_R: {
   3344                /* 1 01 01010 00 0 m 000000 n d = ORR Xd, Xn, Xm */
   3345                /* 1 00 01010 00 0 m 000000 n d = AND Xd, Xn, Xm */
   3346                /* 1 10 01010 00 0 m 000000 n d = EOR Xd, Xn, Xm */
   3347                UInt rM = iregEnc(argR->ARM64riL.R.reg);
   3348                vassert(rM < 31);
   3349                *p++ = X_3_8_5_6_5_5(opc, X01010000, rM, X000000, rN, rD);
   3350                break;
   3351             }
   3352             default:
   3353                goto bad;
   3354          }
   3355          goto done;
   3356       }
   3357       case ARM64in_Test: {
   3358          UInt      rD   = 31; /* XZR, we are going to dump the result */
   3359          UInt      rN   = iregEnc(i->ARM64in.Test.argL);
   3360          ARM64RIL* argR = i->ARM64in.Test.argR;
   3361          switch (argR->tag) {
   3362             case ARM64riL_I13: {
   3363                /* 1 11 100100 N immR immS Rn Rd = ANDS Xd, Xn, #imm */
   3364                *p++ = X_3_6_1_6_6_5_5(
   3365                          X111, X100100, argR->ARM64riL.I13.bitN,
   3366                          argR->ARM64riL.I13.immR, argR->ARM64riL.I13.immS,
   3367                          rN, rD
   3368                       );
   3369                break;
   3370             }
   3371             default:
   3372                goto bad;
   3373          }
   3374          goto done;
   3375       }
   3376       case ARM64in_Shift: {
   3377          UInt      rD   = iregEnc(i->ARM64in.Shift.dst);
   3378          UInt      rN   = iregEnc(i->ARM64in.Shift.argL);
   3379          ARM64RI6* argR = i->ARM64in.Shift.argR;
   3380          vassert(rD < 31);
   3381          vassert(rN < 31);
   3382          switch (argR->tag) {
   3383             case ARM64ri6_I6: {
   3384                /* 110 1001101 (63-sh) (64-sh) nn dd   LSL Xd, Xn, sh */
   3385                /* 110 1001101 sh      63      nn dd   LSR Xd, Xn, sh */
   3386                /* 100 1001101 sh      63      nn dd   ASR Xd, Xn, sh */
   3387                UInt sh = argR->ARM64ri6.I6.imm6;
   3388                vassert(sh > 0 && sh < 64);
   3389                switch (i->ARM64in.Shift.op) {
   3390                   case ARM64sh_SHL:
   3391                      *p++ = X_3_6_1_6_6_5_5(X110, X100110,
   3392                                             1, 64-sh, 63-sh, rN, rD);
   3393                      break;
   3394                   case ARM64sh_SHR:
   3395                      *p++ = X_3_6_1_6_6_5_5(X110, X100110, 1, sh, 63, rN, rD);
   3396                      break;
   3397                   case ARM64sh_SAR:
   3398                      *p++ = X_3_6_1_6_6_5_5(X100, X100110, 1, sh, 63, rN, rD);
   3399                      break;
   3400                   default:
   3401                      vassert(0);
   3402                }
   3403                break;
   3404             }
   3405             case ARM64ri6_R: {
   3406                /* 100 1101 0110 mm 001000 nn dd   LSL Xd, Xn, Xm */
   3407                /* 100 1101 0110 mm 001001 nn dd   LSR Xd, Xn, Xm */
   3408                /* 100 1101 0110 mm 001010 nn dd   ASR Xd, Xn, Xm */
   3409                UInt rM = iregEnc(argR->ARM64ri6.R.reg);
   3410                vassert(rM < 31);
   3411                UInt subOpc = 0;
   3412                switch (i->ARM64in.Shift.op) {
   3413                   case ARM64sh_SHL: subOpc = X001000; break;
   3414                   case ARM64sh_SHR: subOpc = X001001; break;
   3415                   case ARM64sh_SAR: subOpc = X001010; break;
   3416                   default: vassert(0);
   3417                }
   3418                *p++ = X_3_8_5_6_5_5(X100, X11010110, rM, subOpc, rN, rD);
   3419                break;
   3420             }
   3421             default:
   3422                vassert(0);
   3423          }
   3424          goto done;
   3425       }
   3426       case ARM64in_Unary: {
   3427          UInt rDst = iregEnc(i->ARM64in.Unary.dst);
   3428          UInt rSrc = iregEnc(i->ARM64in.Unary.src);
   3429          switch (i->ARM64in.Unary.op) {
   3430             case ARM64un_CLZ:
   3431                /* 1 10 1101 0110 00000 00010 0 nn dd   CLZ Xd, Xn */
   3432                /* 1 10 1101 0110 00000 00010 1 nn dd   CLS Xd, Xn (unimp) */
   3433                *p++ = X_3_8_5_6_5_5(X110,
   3434                                     X11010110, X00000, X000100, rSrc, rDst);
   3435                goto done;
   3436             case ARM64un_NEG:
   3437                /* 1 10 01011 000 m 000000 11111 d  NEG Xd,Xm */
   3438                /* 0 10 01011 000 m 000000 11111 d  NEG Wd,Wm (unimp) */
   3439                *p++ = X_3_8_5_6_5_5(X110,
   3440                                     X01011000, rSrc, X000000, X11111, rDst);
   3441                goto done;
   3442             case ARM64un_NOT: {
   3443                /* 1 01 01010 00 1 m 000000 11111 d   MVN Xd,Xm */
   3444                *p++ = X_3_8_5_6_5_5(X101,
   3445                                     X01010001, rSrc, X000000, X11111, rDst);
   3446                goto done;
   3447             }
   3448             default:
   3449                break;
   3450          }
   3451          goto bad;
   3452       }
   3453       case ARM64in_MovI: {
   3454          /* We generate the "preferred form", ORR Xd, XZR, Xm
   3455             101 01010 00 0 m 000000 11111 d
   3456          */
   3457          UInt instr = 0xAA0003E0;
   3458          UInt d     = iregEnc(i->ARM64in.MovI.dst);
   3459          UInt m     = iregEnc(i->ARM64in.MovI.src);
   3460          *p++ = instr | ((m & 31) << 16) | ((d & 31) << 0);
   3461          goto done;
   3462       }
   3463       case ARM64in_Imm64: {
   3464          p = imm64_to_ireg( p, iregEnc(i->ARM64in.Imm64.dst),
   3465                                i->ARM64in.Imm64.imm64 );
   3466          goto done;
   3467       }
   3468       case ARM64in_LdSt64: {
   3469          p = do_load_or_store64( p, i->ARM64in.LdSt64.isLoad,
   3470                                  iregEnc(i->ARM64in.LdSt64.rD),
   3471                                  i->ARM64in.LdSt64.amode );
   3472          goto done;
   3473       }
   3474       case ARM64in_LdSt32: {
   3475          p = do_load_or_store32( p, i->ARM64in.LdSt32.isLoad,
   3476                                  iregEnc(i->ARM64in.LdSt32.rD),
   3477                                  i->ARM64in.LdSt32.amode );
   3478          goto done;
   3479       }
   3480       case ARM64in_LdSt16: {
   3481          p = do_load_or_store16( p, i->ARM64in.LdSt16.isLoad,
   3482                                  iregEnc(i->ARM64in.LdSt16.rD),
   3483                                  i->ARM64in.LdSt16.amode );
   3484          goto done;
   3485       }
   3486       case ARM64in_LdSt8: {
   3487          p = do_load_or_store8( p, i->ARM64in.LdSt8.isLoad,
   3488                                 iregEnc(i->ARM64in.LdSt8.rD),
   3489                                 i->ARM64in.LdSt8.amode );
   3490          goto done;
   3491       }
   3492 
   3493       case ARM64in_XDirect: {
   3494          /* NB: what goes on here has to be very closely coordinated
   3495             with chainXDirect_ARM64 and unchainXDirect_ARM64 below. */
   3496          /* We're generating chain-me requests here, so we need to be
   3497             sure this is actually allowed -- no-redir translations
   3498             can't use chain-me's.  Hence: */
   3499          vassert(disp_cp_chain_me_to_slowEP != NULL);
   3500          vassert(disp_cp_chain_me_to_fastEP != NULL);
   3501 
   3502          /* Use ptmp for backpatching conditional jumps. */
   3503          UInt* ptmp = NULL;
   3504 
   3505          /* First off, if this is conditional, create a conditional
   3506             jump over the rest of it.  Or at least, leave a space for
   3507             it that we will shortly fill in. */
   3508          if (i->ARM64in.XDirect.cond != ARM64cc_AL) {
   3509             vassert(i->ARM64in.XDirect.cond != ARM64cc_NV);
   3510             ptmp = p;
   3511             *p++ = 0;
   3512          }
   3513 
   3514          /* Update the guest PC. */
   3515          /* imm64 x9, dstGA */
   3516          /* str   x9, amPC */
   3517          p = imm64_to_ireg(p, /*x*/9, i->ARM64in.XDirect.dstGA);
   3518          p = do_load_or_store64(p, False/*!isLoad*/,
   3519                                 /*x*/9, i->ARM64in.XDirect.amPC);
   3520 
   3521          /* --- FIRST PATCHABLE BYTE follows --- */
   3522          /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
   3523             calling to) backs up the return address, so as to find the
   3524             address of the first patchable byte.  So: don't change the
   3525             number of instructions (5) below. */
   3526          /* movw x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[15:0] */
   3527          /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[31:15], lsl 16 */
   3528          /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[47:32], lsl 32 */
   3529          /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[63:48], lsl 48 */
   3530          /* blr  x9 */
   3531          const void* disp_cp_chain_me
   3532                   = i->ARM64in.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
   3533                                                 : disp_cp_chain_me_to_slowEP;
   3534          p = imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)disp_cp_chain_me);
   3535          *p++ = 0xD63F0120;
   3536          /* --- END of PATCHABLE BYTES --- */
   3537 
   3538          /* Fix up the conditional jump, if there was one. */
   3539          if (i->ARM64in.XDirect.cond != ARM64cc_AL) {
   3540             Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
   3541             vassert(delta > 0 && delta < 40);
   3542             vassert((delta & 3) == 0);
   3543             UInt notCond = 1 ^ (UInt)i->ARM64in.XDirect.cond;
   3544             vassert(notCond <= 13); /* Neither AL nor NV */
   3545             vassert(ptmp != NULL);
   3546             delta = delta >> 2;
   3547             *ptmp = X_8_19_1_4(X01010100, delta & ((1<<19)-1), 0, notCond);
   3548          }
   3549          goto done;
   3550       }
   3551 
   3552       case ARM64in_XIndir: {
   3553          // XIndir is more or less the same as XAssisted, except
   3554          // we don't have a trc value to hand back, so there's no
   3555          // write to r21
   3556          /* Use ptmp for backpatching conditional jumps. */
   3557          //UInt* ptmp = NULL;
   3558 
   3559          /* First off, if this is conditional, create a conditional
   3560             jump over the rest of it.  Or at least, leave a space for
   3561             it that we will shortly fill in. */
   3562          if (i->ARM64in.XIndir.cond != ARM64cc_AL) {
   3563             vassert(0); //ATC
   3564 //ZZ             vassert(i->ARMin.XIndir.cond != ARMcc_NV);
   3565 //ZZ             ptmp = p;
   3566 //ZZ             *p++ = 0;
   3567          }
   3568 
   3569          /* Update the guest PC. */
   3570          /* str r-dstGA, amPC */
   3571          p = do_load_or_store64(p, False/*!isLoad*/,
   3572                                 iregEnc(i->ARM64in.XIndir.dstGA),
   3573                                 i->ARM64in.XIndir.amPC);
   3574 
   3575          /* imm64 x9, VG_(disp_cp_xindir) */
   3576          /* br    x9 */
   3577          p = imm64_to_ireg(p, /*x*/9, (Addr)disp_cp_xindir);
   3578          *p++ = 0xD61F0120; /* br x9 */
   3579 
   3580          /* Fix up the conditional jump, if there was one. */
   3581          if (i->ARM64in.XIndir.cond != ARM64cc_AL) {
   3582             vassert(0); //ATC
   3583 //ZZ             Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
   3584 //ZZ             vassert(delta > 0 && delta < 40);
   3585 //ZZ             vassert((delta & 3) == 0);
   3586 //ZZ             UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
   3587 //ZZ             vassert(notCond <= 13); /* Neither AL nor NV */
   3588 //ZZ             delta = (delta >> 2) - 2;
   3589 //ZZ             *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
   3590          }
   3591          goto done;
   3592       }
   3593 
   3594       case ARM64in_XAssisted: {
   3595          /* Use ptmp for backpatching conditional jumps. */
   3596          UInt* ptmp = NULL;
   3597 
   3598          /* First off, if this is conditional, create a conditional
   3599             jump over the rest of it.  Or at least, leave a space for
   3600             it that we will shortly fill in.  I think this can only
   3601             ever happen when VEX is driven by the switchbacker. */
   3602          if (i->ARM64in.XAssisted.cond != ARM64cc_AL) {
   3603             vassert(i->ARM64in.XDirect.cond != ARM64cc_NV);
   3604             ptmp = p;
   3605             *p++ = 0;
   3606          }
   3607 
   3608          /* Update the guest PC. */
   3609          /* str r-dstGA, amPC */
   3610          p = do_load_or_store64(p, False/*!isLoad*/,
   3611                                 iregEnc(i->ARM64in.XAssisted.dstGA),
   3612                                 i->ARM64in.XAssisted.amPC);
   3613 
   3614          /* movw r21,  $magic_number */
   3615          UInt trcval = 0;
   3616          switch (i->ARM64in.XAssisted.jk) {
   3617             case Ijk_ClientReq:   trcval = VEX_TRC_JMP_CLIENTREQ;   break;
   3618             case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
   3619             //case Ijk_Sys_int128:  trcval = VEX_TRC_JMP_SYS_INT128;  break;
   3620             case Ijk_Yield:       trcval = VEX_TRC_JMP_YIELD;       break;
   3621             //case Ijk_EmWarn:      trcval = VEX_TRC_JMP_EMWARN;      break;
   3622             //case Ijk_MapFail:     trcval = VEX_TRC_JMP_MAPFAIL;     break;
   3623             case Ijk_NoDecode:    trcval = VEX_TRC_JMP_NODECODE;    break;
   3624             case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
   3625             case Ijk_FlushDCache: trcval = VEX_TRC_JMP_FLUSHDCACHE; break;
   3626             case Ijk_NoRedir:     trcval = VEX_TRC_JMP_NOREDIR;     break;
   3627             case Ijk_SigTRAP:     trcval = VEX_TRC_JMP_SIGTRAP;     break;
   3628             //case Ijk_SigSEGV:     trcval = VEX_TRC_JMP_SIGSEGV;     break;
   3629             case Ijk_Boring:      trcval = VEX_TRC_JMP_BORING;      break;
   3630             /* We don't expect to see the following being assisted. */
   3631             //case Ijk_Ret:
   3632             //case Ijk_Call:
   3633             /* fallthrough */
   3634             default:
   3635                ppIRJumpKind(i->ARM64in.XAssisted.jk);
   3636                vpanic("emit_ARM64Instr.ARM64in_XAssisted: "
   3637                       "unexpected jump kind");
   3638          }
   3639          vassert(trcval != 0);
   3640          p = imm64_to_ireg(p, /*x*/21, (ULong)trcval);
   3641 
   3642          /* imm64 x9, VG_(disp_cp_xassisted) */
   3643          /* br    x9 */
   3644          p = imm64_to_ireg(p, /*x*/9, (Addr)disp_cp_xassisted);
   3645          *p++ = 0xD61F0120; /* br x9 */
   3646 
   3647          /* Fix up the conditional jump, if there was one. */
   3648          if (i->ARM64in.XAssisted.cond != ARM64cc_AL) {
   3649             Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
   3650             vassert(delta > 0 && delta < 40);
   3651             vassert((delta & 3) == 0);
   3652             UInt notCond = 1 ^ (UInt)i->ARM64in.XDirect.cond;
   3653             vassert(notCond <= 13); /* Neither AL nor NV */
   3654             vassert(ptmp != NULL);
   3655             delta = delta >> 2;
   3656             *ptmp = X_8_19_1_4(X01010100, delta & ((1<<19)-1), 0, notCond);
   3657          }
   3658          goto done;
   3659       }
   3660 
   3661       case ARM64in_CSel: {
   3662          /* 100 1101 0100 mm cond 00 nn dd = CSEL Xd, Xn, Xm, cond */
   3663          UInt dd   = iregEnc(i->ARM64in.CSel.dst);
   3664          UInt nn   = iregEnc(i->ARM64in.CSel.argL);
   3665          UInt mm   = iregEnc(i->ARM64in.CSel.argR);
   3666          UInt cond = (UInt)i->ARM64in.CSel.cond;
   3667          vassert(dd < 31 && nn < 31 && mm < 31 && cond < 16);
   3668          *p++ = X_3_8_5_6_5_5(X100, X11010100, mm, cond << 2, nn, dd);
   3669          goto done;
   3670       }
   3671 
   3672       case ARM64in_Call: {
   3673          /* We'll use x9 as a scratch register to put the target
   3674             address in. */
   3675          if (i->ARM64in.Call.cond != ARM64cc_AL
   3676              && i->ARM64in.Call.rloc.pri != RLPri_None) {
   3677             /* The call might not happen (it isn't unconditional) and
   3678                it returns a result.  In this case we will need to
   3679                generate a control flow diamond to put 0x555..555 in
   3680                the return register(s) in the case where the call
   3681                doesn't happen.  If this ever becomes necessary, maybe
   3682                copy code from the 32-bit ARM equivalent.  Until that
   3683                day, just give up. */
   3684             goto bad;
   3685          }
   3686 
   3687          UInt* ptmp = NULL;
   3688          if (i->ARM64in.Call.cond != ARM64cc_AL) {
   3689             /* Create a hole to put a conditional branch in.  We'll
   3690                patch it once we know the branch length. */
   3691             ptmp = p;
   3692             *p++ = 0;
   3693          }
   3694 
   3695          // x9 = &target
   3696          p = imm64_to_ireg( (UInt*)p, /*x*/9, (ULong)i->ARM64in.Call.target );
   3697          // blr x9
   3698          *p++ = 0xD63F0120;
   3699 
   3700          // Patch the hole if necessary
   3701          if (i->ARM64in.Call.cond != ARM64cc_AL) {
   3702             ULong dist = (ULong)(p - ptmp);
   3703             /* imm64_to_ireg produces between 1 and 4 insns, and
   3704                then there's the BLR itself.  Hence: */
   3705             vassert(dist >= 2 && dist <= 5);
   3706             vassert(ptmp != NULL);
   3707             // 01010100 simm19 0 cond = B.cond (here + simm19 << 2)
   3708             *ptmp = X_8_19_1_4(X01010100, dist, 0,
   3709                                1 ^ (UInt)i->ARM64in.Call.cond);
   3710          } else {
   3711             vassert(ptmp == NULL);
   3712          }
   3713 
   3714          goto done;
   3715       }
   3716 
   3717       case ARM64in_AddToSP: {
   3718          /* 10,0 10001 00 imm12 11111 11111  ADD xsp, xsp, #imm12
   3719             11,0 10001 00 imm12 11111 11111  SUB xsp, xsp, #imm12
   3720          */
   3721          Int simm12 = i->ARM64in.AddToSP.simm;
   3722          vassert(-4096 < simm12 && simm12 < 4096);
   3723          vassert(0 == (simm12 & 0xF));
   3724          if (simm12 >= 0) {
   3725             *p++ = X_2_6_2_12_5_5(X10, X010001, X00, simm12, X11111, X11111);
   3726          } else {
   3727             *p++ = X_2_6_2_12_5_5(X11, X010001, X00, -simm12, X11111, X11111);
   3728          }
   3729          goto done;
   3730       }
   3731 
   3732       case ARM64in_FromSP: {
   3733          /* 10,0 10001 00 0..(12)..0 11111 dd  MOV Xd, xsp */
   3734          UInt dd = iregEnc(i->ARM64in.FromSP.dst);
   3735          vassert(dd < 31);
   3736          *p++ = X_2_6_2_12_5_5(X10, X010001, X00, 0, X11111, dd);
   3737          goto done;
   3738       }
   3739 
   3740       case ARM64in_Mul: {
   3741          /* 100 11011 110 mm 011111 nn dd   UMULH Xd, Xn,Xm
   3742             100 11011 010 mm 011111 nn dd   SMULH Xd, Xn,Xm
   3743             100 11011 000 mm 011111 nn dd   MUL   Xd, Xn,Xm
   3744          */
   3745          UInt dd = iregEnc(i->ARM64in.Mul.dst);
   3746          UInt nn = iregEnc(i->ARM64in.Mul.argL);
   3747          UInt mm = iregEnc(i->ARM64in.Mul.argR);
   3748          vassert(dd < 31 && nn < 31 && mm < 31);
   3749          switch (i->ARM64in.Mul.op) {
   3750             case ARM64mul_ZX:
   3751                *p++ = X_3_8_5_6_5_5(X100, X11011110, mm, X011111, nn, dd);
   3752                goto done;
   3753             case ARM64mul_SX:
   3754                *p++ = X_3_8_5_6_5_5(X100, X11011010, mm, X011111, nn, dd);
   3755                goto done;
   3756             case ARM64mul_PLAIN:
   3757                *p++ = X_3_8_5_6_5_5(X100, X11011000, mm, X011111, nn, dd);
   3758                goto done;
   3759             default:
   3760                vassert(0);
   3761          }
   3762          goto bad;
   3763       }
   3764       case ARM64in_LdrEX: {
   3765          /* 085F7C82   ldxrb w2, [x4]
   3766             485F7C82   ldxrh w2, [x4]
   3767             885F7C82   ldxr  w2, [x4]
   3768             C85F7C82   ldxr  x2, [x4]
   3769          */
   3770          switch (i->ARM64in.LdrEX.szB) {
   3771             case 1: *p++ = 0x085F7C82; goto done;
   3772             case 2: *p++ = 0x485F7C82; goto done;
   3773             case 4: *p++ = 0x885F7C82; goto done;
   3774             case 8: *p++ = 0xC85F7C82; goto done;
   3775             default: break;
   3776          }
   3777          goto bad;
   3778       }
   3779       case ARM64in_StrEX: {
   3780          /* 08007C82   stxrb w0, w2, [x4]
   3781             48007C82   stxrh w0, w2, [x4]
   3782             88007C82   stxr  w0, w2, [x4]
   3783             C8007C82   stxr  w0, x2, [x4]
   3784          */
   3785          switch (i->ARM64in.StrEX.szB) {
   3786             case 1: *p++ = 0x08007C82; goto done;
   3787             case 2: *p++ = 0x48007C82; goto done;
   3788             case 4: *p++ = 0x88007C82; goto done;
   3789             case 8: *p++ = 0xC8007C82; goto done;
   3790             default: break;
   3791          }
   3792          goto bad;
   3793       }
   3794       case ARM64in_MFence: {
   3795          *p++ = 0xD5033F9F; /* DSB sy */
   3796          *p++ = 0xD5033FBF; /* DMB sy */
   3797          *p++ = 0xD5033FDF; /* ISB */
   3798          goto done;
   3799       }
   3800       //case ARM64in_CLREX: {
   3801       //   //ATC, but believed to be correct
   3802       //   goto bad;
   3803       //   *p++ = 0xD5033F5F; /* clrex */
   3804       //   goto done;
   3805       //}
   3806       case ARM64in_VLdStH: {
   3807          /* 01 111101 01 imm12 n t   LDR Ht, [Xn|SP, #imm12 * 2]
   3808             01 111101 00 imm12 n t   STR Ht, [Xn|SP, #imm12 * 2]
   3809          */
   3810          UInt hD     = dregEnc(i->ARM64in.VLdStH.hD);
   3811          UInt rN     = iregEnc(i->ARM64in.VLdStH.rN);
   3812          UInt uimm12 = i->ARM64in.VLdStH.uimm12;
   3813          Bool isLD   = i->ARM64in.VLdStH.isLoad;
   3814          vassert(uimm12 < 8192 && 0 == (uimm12 & 1));
   3815          uimm12 >>= 1;
   3816          vassert(uimm12 < (1<<12));
   3817          vassert(hD < 32);
   3818          vassert(rN < 31);
   3819          *p++ = X_2_6_2_12_5_5(X01, X111101, isLD ? X01 : X00,
   3820                                uimm12, rN, hD);
   3821          goto done;
   3822       }
   3823       case ARM64in_VLdStS: {
   3824          /* 10 111101 01 imm12 n t   LDR St, [Xn|SP, #imm12 * 4]
   3825             10 111101 00 imm12 n t   STR St, [Xn|SP, #imm12 * 4]
   3826          */
   3827          UInt sD     = dregEnc(i->ARM64in.VLdStS.sD);
   3828          UInt rN     = iregEnc(i->ARM64in.VLdStS.rN);
   3829          UInt uimm12 = i->ARM64in.VLdStS.uimm12;
   3830          Bool isLD   = i->ARM64in.VLdStS.isLoad;
   3831          vassert(uimm12 < 16384 && 0 == (uimm12 & 3));
   3832          uimm12 >>= 2;
   3833          vassert(uimm12 < (1<<12));
   3834          vassert(sD < 32);
   3835          vassert(rN < 31);
   3836          *p++ = X_2_6_2_12_5_5(X10, X111101, isLD ? X01 : X00,
   3837                                uimm12, rN, sD);
   3838          goto done;
   3839       }
   3840       case ARM64in_VLdStD: {
   3841          /* 11 111101 01 imm12 n t   LDR Dt, [Xn|SP, #imm12 * 8]
   3842             11 111101 00 imm12 n t   STR Dt, [Xn|SP, #imm12 * 8]
   3843          */
   3844          UInt dD     = dregEnc(i->ARM64in.VLdStD.dD);
   3845          UInt rN     = iregEnc(i->ARM64in.VLdStD.rN);
   3846          UInt uimm12 = i->ARM64in.VLdStD.uimm12;
   3847          Bool isLD   = i->ARM64in.VLdStD.isLoad;
   3848          vassert(uimm12 < 32768 && 0 == (uimm12 & 7));
   3849          uimm12 >>= 3;
   3850          vassert(uimm12 < (1<<12));
   3851          vassert(dD < 32);
   3852          vassert(rN < 31);
   3853          *p++ = X_2_6_2_12_5_5(X11, X111101, isLD ? X01 : X00,
   3854                                uimm12, rN, dD);
   3855          goto done;
   3856       }
   3857       case ARM64in_VLdStQ: {
   3858          /* 0100 1100 0000 0000 0111 11 rN rQ   st1 {vQ.2d}, [<rN|SP>]
   3859             0100 1100 0100 0000 0111 11 rN rQ   ld1 {vQ.2d}, [<rN|SP>]
   3860          */
   3861          UInt rQ = qregEnc(i->ARM64in.VLdStQ.rQ);
   3862          UInt rN = iregEnc(i->ARM64in.VLdStQ.rN);
   3863          vassert(rQ < 32);
   3864          vassert(rN < 31);
   3865          if (i->ARM64in.VLdStQ.isLoad) {
   3866             *p++ = 0x4C407C00 | (rN << 5) | rQ;
   3867          } else {
   3868             *p++ = 0x4C007C00 | (rN << 5) | rQ;
   3869          }
   3870          goto done;
   3871       }
   3872       case ARM64in_VCvtI2F: {
   3873          /* 31  28    23 21 20 18  15     9 4
   3874             000 11110 00 1  00 010 000000 n d  SCVTF Sd, Wn
   3875             000 11110 01 1  00 010 000000 n d  SCVTF Dd, Wn
   3876             100 11110 00 1  00 010 000000 n d  SCVTF Sd, Xn
   3877             100 11110 01 1  00 010 000000 n d  SCVTF Dd, Xn
   3878             000 11110 00 1  00 011 000000 n d  UCVTF Sd, Wn
   3879             000 11110 01 1  00 011 000000 n d  UCVTF Dd, Wn
   3880             100 11110 00 1  00 011 000000 n d  UCVTF Sd, Xn
   3881             100 11110 01 1  00 011 000000 n d  UCVTF Dd, Xn
   3882          */
   3883          UInt       rN = iregEnc(i->ARM64in.VCvtI2F.rS);
   3884          UInt       rD = dregEnc(i->ARM64in.VCvtI2F.rD);
   3885          ARM64CvtOp how = i->ARM64in.VCvtI2F.how;
   3886          /* Just handle cases as they show up. */
   3887          switch (how) {
   3888             case ARM64cvt_F32_I32S: /* SCVTF Sd, Wn */
   3889                *p++ = X_3_5_8_6_5_5(X000, X11110, X00100010, X000000, rN, rD);
   3890                break;
   3891             case ARM64cvt_F64_I32S: /* SCVTF Dd, Wn */
   3892                *p++ = X_3_5_8_6_5_5(X000, X11110, X01100010, X000000, rN, rD);
   3893                break;
   3894             case ARM64cvt_F32_I64S: /* SCVTF Sd, Xn */
   3895                *p++ = X_3_5_8_6_5_5(X100, X11110, X00100010, X000000, rN, rD);
   3896                break;
   3897             case ARM64cvt_F64_I64S: /* SCVTF Dd, Xn */
   3898                *p++ = X_3_5_8_6_5_5(X100, X11110, X01100010, X000000, rN, rD);
   3899                break;
   3900             case ARM64cvt_F32_I32U: /* UCVTF Sd, Wn */
   3901                *p++ = X_3_5_8_6_5_5(X000, X11110, X00100011, X000000, rN, rD);
   3902                break;
   3903             case ARM64cvt_F64_I32U: /* UCVTF Dd, Wn */
   3904                *p++ = X_3_5_8_6_5_5(X000, X11110, X01100011, X000000, rN, rD);
   3905                break;
   3906             case ARM64cvt_F32_I64U: /* UCVTF Sd, Xn */
   3907                *p++ = X_3_5_8_6_5_5(X100, X11110, X00100011, X000000, rN, rD);
   3908                break;
   3909             case ARM64cvt_F64_I64U: /* UCVTF Dd, Xn  */
   3910                *p++ = X_3_5_8_6_5_5(X100, X11110, X01100011, X000000, rN, rD);
   3911                break;
   3912             default:
   3913                goto bad; //ATC
   3914          }
   3915          goto done;
   3916       }
   3917       case ARM64in_VCvtF2I: {
   3918          /*    30       23   20 18  15     9 4
   3919             sf 00,11110,0x 1 00 000,000000 n d  FCVTNS Rd, Fn (round to
   3920             sf 00,11110,0x 1 00 001,000000 n d  FCVTNU Rd, Fn  nearest)
   3921             ---------------- 01 --------------  FCVTP-------- (round to +inf)
   3922             ---------------- 10 --------------  FCVTM-------- (round to -inf)
   3923             ---------------- 11 --------------  FCVTZ-------- (round to zero)
   3924 
   3925             Rd is Xd when sf==1, Wd when sf==0
   3926             Fn is Dn when x==1, Sn when x==0
   3927             20:19 carry the rounding mode, using the same encoding as FPCR
   3928          */
   3929          UInt       rD    = iregEnc(i->ARM64in.VCvtF2I.rD);
   3930          UInt       rN    = dregEnc(i->ARM64in.VCvtF2I.rS);
   3931          ARM64CvtOp how   = i->ARM64in.VCvtF2I.how;
   3932          UChar      armRM = i->ARM64in.VCvtF2I.armRM;
   3933          /* Just handle cases as they show up. */
   3934          switch (how) {
   3935             case ARM64cvt_F64_I32S: /* FCVTxS Wd, Dn */
   3936                *p++ = X_3_5_8_6_5_5(X000, X11110, X01100000 | (armRM << 3),
   3937                                     X000000, rN, rD);
   3938                break;
   3939             case ARM64cvt_F64_I32U: /* FCVTxU Wd, Dn */
   3940                *p++ = X_3_5_8_6_5_5(X000, X11110, X01100001 | (armRM << 3),
   3941                                     X000000, rN, rD);
   3942                break;
   3943             case ARM64cvt_F64_I64S: /* FCVTxS Xd, Dn */
   3944                *p++ = X_3_5_8_6_5_5(X100, X11110, X01100000 | (armRM << 3),
   3945                                     X000000, rN, rD);
   3946                break;
   3947             case ARM64cvt_F64_I64U: /* FCVTxU Xd, Dn */
   3948                *p++ = X_3_5_8_6_5_5(X100, X11110, X01100001 | (armRM << 3),
   3949                                     X000000, rN, rD);
   3950                break;
   3951             case ARM64cvt_F32_I32S: /* FCVTxS Wd, Sn */
   3952                *p++ = X_3_5_8_6_5_5(X000, X11110, X00100000 | (armRM << 3),
   3953                                     X000000, rN, rD);
   3954                break;
   3955             case ARM64cvt_F32_I32U: /* FCVTxU Wd, Sn */
   3956                *p++ = X_3_5_8_6_5_5(X000, X11110, X00100001 | (armRM << 3),
   3957                                     X000000, rN, rD);
   3958                break;
   3959             case ARM64cvt_F32_I64S: /* FCVTxS Xd, Sn */
   3960                *p++ = X_3_5_8_6_5_5(X100, X11110, X00100000 | (armRM << 3),
   3961                                     X000000, rN, rD);
   3962                break;
   3963             case ARM64cvt_F32_I64U: /* FCVTxU Xd, Sn */
   3964                *p++ = X_3_5_8_6_5_5(X100, X11110, X00100001 | (armRM << 3),
   3965                                     X000000, rN, rD);
   3966                break;
   3967             default:
   3968                goto bad; //ATC
   3969          }
   3970          goto done;
   3971       }
   3972       case ARM64in_VCvtSD: {
   3973          /* 31         23 21    16  14    9 4
   3974             000,11110, 00 10001 0,1 10000 n d   FCVT Dd, Sn (S->D)
   3975             ---------- 01 ----- 0,0 ---------   FCVT Sd, Dn (D->S)
   3976             Rounding, when dst is smaller than src, is per the FPCR.
   3977          */
   3978          UInt dd = dregEnc(i->ARM64in.VCvtSD.dst);
   3979          UInt nn = dregEnc(i->ARM64in.VCvtSD.src);
   3980          if (i->ARM64in.VCvtSD.sToD) {
   3981             *p++ = X_3_5_8_6_5_5(X000, X11110, X00100010, X110000, nn, dd);
   3982          } else {
   3983             *p++ = X_3_5_8_6_5_5(X000, X11110, X01100010, X010000, nn, dd);
   3984          }
   3985          goto done;
   3986       }
   3987       case ARM64in_VCvtHS: {
   3988          /* 31         23 21    16  14    9 4
   3989             000,11110, 11 10001 0,0 10000 n d   FCVT Sd, Hn (H->S)
   3990             ---------- 00 ----- 1,1 ---------   FCVT Hd, Sn (S->H)
   3991             Rounding, when dst is smaller than src, is per the FPCR.
   3992          */
   3993          UInt dd = dregEnc(i->ARM64in.VCvtHS.dst);
   3994          UInt nn = dregEnc(i->ARM64in.VCvtHS.src);
   3995          if (i->ARM64in.VCvtHS.hToS) {
   3996             *p++ = X_3_5_8_6_5_5(X000, X11110, X11100010, X010000, nn, dd);
   3997          } else {
   3998             *p++ = X_3_5_8_6_5_5(X000, X11110, X00100011, X110000, nn, dd);
   3999          }
   4000          goto done;
   4001       }
   4002       case ARM64in_VCvtHD: {
   4003          /* 31         23 21    16  14    9 4
   4004             000,11110, 11 10001 0,1 10000 n d   FCVT Dd, Hn (H->D)
   4005             ---------- 01 ----- 1,1 ---------   FCVT Hd, Dn (D->H)
   4006             Rounding, when dst is smaller than src, is per the FPCR.
   4007          */
   4008          UInt dd = dregEnc(i->ARM64in.VCvtHD.dst);
   4009          UInt nn = dregEnc(i->ARM64in.VCvtHD.src);
   4010          if (i->ARM64in.VCvtHD.hToD) {
   4011             *p++ = X_3_5_8_6_5_5(X000, X11110, X11100010, X110000, nn, dd);
   4012          } else {
   4013             *p++ = X_3_5_8_6_5_5(X000, X11110, X01100011, X110000, nn, dd);
   4014          }
   4015          goto done;
   4016       }
   4017       case ARM64in_VUnaryD: {
   4018          /* 31        23 21     16 14    9 4
   4019             000,11110 01 1,0000 0,0 10000 n d  FMOV Dd, Dn (not handled)
   4020             ------------------- 0,1 ---------  FABS ------
   4021             ------------------- 1,0 ---------  FNEG ------
   4022             ------------------- 1,1 ---------  FSQRT -----
   4023          */
   4024          UInt dD  = dregEnc(i->ARM64in.VUnaryD.dst);
   4025          UInt dN  = dregEnc(i->ARM64in.VUnaryD.src);
   4026          UInt b16 = 2; /* impossible */
   4027          UInt b15 = 2; /* impossible */
   4028          switch (i->ARM64in.VUnaryD.op) {
   4029             case ARM64fpu_NEG:  b16 = 1; b15 = 0; break;
   4030             case ARM64fpu_SQRT: b16 = 1; b15 = 1; break;
   4031             case ARM64fpu_ABS:  b16 = 0; b15 = 1; break;
   4032             default: break;
   4033          }
   4034          if (b16 < 2 && b15 < 2) {
   4035             *p++ = X_3_8_5_6_5_5(X000, X11110011, (X0000 << 1) | b16,
   4036                                  (b15 << 5) | X10000, dN, dD);
   4037             goto done;
   4038          }
   4039          /*
   4040             000, 11110 01 1,001 11,1 10000 n d  FRINTI Dd, Dm (round per FPCR)
   4041          */
   4042          if (i->ARM64in.VUnaryD.op == ARM64fpu_RINT) {
   4043            *p++ = X_3_8_5_6_5_5(X000, X11110011, X00111, X110000, dN, dD);
   4044            goto done;
   4045          }
   4046          /*
   4047             010, 11110 11 1,0000 1,1111 10 n d  FRECPX Dd, Dm
   4048          */
   4049          if (i->ARM64in.VUnaryD.op == ARM64fpu_RECPX) {
   4050            *p++ = X_3_8_5_6_5_5(X010, X11110111, X00001, X111110, dN, dD);
   4051            goto done;
   4052          }
   4053          goto bad;
   4054       }
   4055       case ARM64in_VUnaryS: {
   4056          /* 31        23 21     16 14    9 4
   4057             000,11110 00 1,0000 0,0 10000 n d  FMOV Sd, Sn (not handled)
   4058             ------------------- 0,1 ---------  FABS ------
   4059             ------------------- 1,0 ---------  FNEG ------
   4060             ------------------- 1,1 ---------  FSQRT -----
   4061          */
   4062          UInt sD  = dregEnc(i->ARM64in.VUnaryS.dst);
   4063          UInt sN  = dregEnc(i->ARM64in.VUnaryS.src);
   4064          UInt b16 = 2; /* impossible */
   4065          UInt b15 = 2; /* impossible */
   4066          switch (i->ARM64in.VUnaryS.op) {
   4067             case ARM64fpu_NEG:  b16 = 1; b15 = 0; break;
   4068             case ARM64fpu_SQRT: b16 = 1; b15 = 1; break;
   4069             case ARM64fpu_ABS:  b16 = 0; b15 = 1; break;
   4070             default: break;
   4071          }
   4072          if (b16 < 2 && b15 < 2) {
   4073             *p++ = X_3_8_5_6_5_5(X000, X11110001, (X0000 << 1) | b16,
   4074                                  (b15 << 5) | X10000, sN, sD);
   4075             goto done;
   4076          }
   4077          /*
   4078             000, 11110 00 1,001 11,1 10000 n d  FRINTI Sd, Sm (round per FPCR)
   4079          */
   4080          if (i->ARM64in.VUnaryS.op == ARM64fpu_RINT) {
   4081            *p++ = X_3_8_5_6_5_5(X000, X11110001, X00111, X110000, sN, sD);
   4082            goto done;
   4083          }
   4084          /*
   4085             010, 11110 10 1,0000 1,1111 10 n d  FRECPX Sd, Sm
   4086          */
   4087          if (i->ARM64in.VUnaryS.op == ARM64fpu_RECPX) {
   4088            *p++ = X_3_8_5_6_5_5(X010, X11110101, X00001, X111110, sN, sD);
   4089            goto done;
   4090          }
   4091          goto bad;
   4092       }
   4093       case ARM64in_VBinD: {
   4094          /* 31        23  20 15   11 9 4
   4095             ---------------- 0000 ------   FMUL  --------
   4096             000 11110 011 m  0001 10 n d   FDIV  Dd,Dn,Dm
   4097             ---------------- 0010 ------   FADD  --------
   4098             ---------------- 0011 ------   FSUB  --------
   4099          */
   4100          UInt dD = dregEnc(i->ARM64in.VBinD.dst);
   4101          UInt dN = dregEnc(i->ARM64in.VBinD.argL);
   4102          UInt dM = dregEnc(i->ARM64in.VBinD.argR);
   4103          UInt b1512 = 16; /* impossible */
   4104          switch (i->ARM64in.VBinD.op) {
   4105             case ARM64fpb_DIV: b1512 = X0001; break;
   4106             case ARM64fpb_MUL: b1512 = X0000; break;
   4107             case ARM64fpb_SUB: b1512 = X0011; break;
   4108             case ARM64fpb_ADD: b1512 = X0010; break;
   4109             default: goto bad;
   4110          }
   4111          vassert(b1512 < 16);
   4112          *p++
   4113             = X_3_8_5_6_5_5(X000, X11110011, dM, (b1512 << 2) | X10, dN, dD);
   4114          goto done;
   4115       }
   4116       case ARM64in_VBinS: {
   4117          /* 31        23  20 15   11 9 4
   4118             ---------------- 0000 ------   FMUL  --------
   4119             000 11110 001 m  0001 10 n d   FDIV  Dd,Dn,Dm
   4120             ---------------- 0010 ------   FADD  --------
   4121             ---------------- 0011 ------   FSUB  --------
   4122          */
   4123          UInt sD = dregEnc(i->ARM64in.VBinS.dst);
   4124          UInt sN = dregEnc(i->ARM64in.VBinS.argL);
   4125          UInt sM = dregEnc(i->ARM64in.VBinS.argR);
   4126          UInt b1512 = 16; /* impossible */
   4127          switch (i->ARM64in.VBinS.op) {
   4128             case ARM64fpb_DIV: b1512 = X0001; break;
   4129             case ARM64fpb_MUL: b1512 = X0000; break;
   4130             case ARM64fpb_SUB: b1512 = X0011; break;
   4131             case ARM64fpb_ADD: b1512 = X0010; break;
   4132             default: goto bad;
   4133          }
   4134          vassert(b1512 < 16);
   4135          *p++
   4136             = X_3_8_5_6_5_5(X000, X11110001, sM, (b1512 << 2) | X10, sN, sD);
   4137          goto done;
   4138       }
   4139       case ARM64in_VCmpD: {
   4140          /* 000 11110 01 1 m 00 1000 n 00 000  FCMP Dn, Dm */
   4141          UInt dN = dregEnc(i->ARM64in.VCmpD.argL);
   4142          UInt dM = dregEnc(i->ARM64in.VCmpD.argR);
   4143          *p++ = X_3_8_5_6_5_5(X000, X11110011, dM, X001000, dN, X00000);
   4144          goto done;
   4145       }
   4146       case ARM64in_VCmpS: {
   4147          /* 000 11110 00 1 m 00 1000 n 00 000  FCMP Sn, Sm */
   4148          UInt sN = dregEnc(i->ARM64in.VCmpS.argL);
   4149          UInt sM = dregEnc(i->ARM64in.VCmpS.argR);
   4150          *p++ = X_3_8_5_6_5_5(X000, X11110001, sM, X001000, sN, X00000);
   4151          goto done;
   4152       }
   4153       case ARM64in_VFCSel: {
   4154          /* 31        23 21 20 15   11 9 5
   4155             000 11110 00 1  m  cond 11 n d  FCSEL Sd,Sn,Sm,cond
   4156             000 11110 01 1  m  cond 11 n d  FCSEL Dd,Dn,Dm,cond
   4157          */
   4158          Bool isD  = i->ARM64in.VFCSel.isD;
   4159          UInt dd   = dregEnc(i->ARM64in.VFCSel.dst);
   4160          UInt nn   = dregEnc(i->ARM64in.VFCSel.argL);
   4161          UInt mm   = dregEnc(i->ARM64in.VFCSel.argR);
   4162          UInt cond = (UInt)i->ARM64in.VFCSel.cond;
   4163          vassert(cond < 16);
   4164          *p++ = X_3_8_5_6_5_5(X000, isD ? X11110011 : X11110001,
   4165                               mm, (cond << 2) | X000011, nn, dd);
   4166          goto done;
   4167       }
   4168       case ARM64in_FPCR: {
   4169          Bool toFPCR = i->ARM64in.FPCR.toFPCR;
   4170          UInt iReg   = iregEnc(i->ARM64in.FPCR.iReg);
   4171          if (toFPCR) {
   4172             /* 0xD51B44 000 Rt  MSR fpcr, rT */
   4173             *p++ = 0xD51B4400 | (iReg & 0x1F);
   4174             goto done;
   4175          }
   4176          goto bad; // FPCR -> iReg case currently ATC
   4177       }
   4178       case ARM64in_FPSR: {
   4179          Bool toFPSR = i->ARM64in.FPSR.toFPSR;
   4180          UInt iReg   = iregEnc(i->ARM64in.FPSR.iReg);
   4181          if (toFPSR) {
   4182             /* 0xD51B44 001 Rt  MSR fpsr, rT */
   4183             *p++ = 0xD51B4420 | (iReg & 0x1F);
   4184          } else {
   4185             /* 0xD53B44 001 Rt  MRS rT, fpsr */
   4186             *p++ = 0xD53B4420 | (iReg & 0x1F);
   4187          }
   4188          goto done;
   4189       }
   4190       case ARM64in_VBinV: {
   4191          /* 31        23   20 15     9 4
   4192             010 01110 11 1 m  100001 n d   ADD Vd.2d,  Vn.2d,  Vm.2d
   4193             010 01110 10 1 m  100001 n d   ADD Vd.4s,  Vn.4s,  Vm.4s
   4194             010 01110 01 1 m  100001 n d   ADD Vd.8h,  Vn.8h,  Vm.8h
   4195             010 01110 00 1 m  100001 n d   ADD Vd.16b, Vn.16b, Vm.16b
   4196 
   4197             011 01110 11 1 m  100001 n d   SUB Vd.2d,  Vn.2d,  Vm.2d
   4198             011 01110 10 1 m  100001 n d   SUB Vd.4s,  Vn.4s,  Vm.4s
   4199             011 01110 01 1 m  100001 n d   SUB Vd.8h,  Vn.8h,  Vm.8h
   4200             011 01110 00 1 m  100001 n d   SUB Vd.16b, Vn.16b, Vm.16b
   4201 
   4202             010 01110 10 1 m  100111 n d   MUL Vd.4s,  Vn.4s,  Vm.4s
   4203             010 01110 01 1 m  100111 n d   MUL Vd.8h,  Vn.8h,  Vm.8h
   4204             010 01110 00 1 m  100111 n d   MUL Vd.16b, Vn.16b, Vm.16b
   4205 
   4206             010 01110 01 1 m  110101 n d   FADD Vd.2d, Vn.2d, Vm.2d
   4207             010 01110 00 1 m  110101 n d   FADD Vd.4s, Vn.4s, Vm.4s
   4208             010 01110 11 1 m  110101 n d   FSUB Vd.2d, Vn.2d, Vm.2d
   4209             010 01110 10 1 m  110101 n d   FSUB Vd.4s, Vn.4s, Vm.4s
   4210 
   4211             011 01110 01 1 m  110111 n d   FMUL Vd.2d, Vn.2d, Vm.2d
   4212             011 01110 00 1 m  110111 n d   FMUL Vd.4s, Vn.4s, Vm.4s
   4213             011 01110 01 1 m  111111 n d   FDIV Vd.2d, Vn.2d, Vm.2d
   4214             011 01110 00 1 m  111111 n d   FDIV Vd.4s, Vn.4s, Vm.4s
   4215 
   4216             010 01110 01 1 m  111101 n d   FMAX Vd.2d, Vn.2d, Vm.2d
   4217             010 01110 00 1 m  111101 n d   FMAX Vd.4s, Vn.4s, Vm.4s
   4218             010 01110 11 1 m  111101 n d   FMIN Vd.2d, Vn.2d, Vm.2d
   4219             010 01110 10 1 m  111101 n d   FMIN Vd.4s, Vn.4s, Vm.4s
   4220 
   4221             011 01110 10 1 m  011001 n d   UMAX Vd.4s,  Vn.4s,  Vm.4s
   4222             011 01110 01 1 m  011001 n d   UMAX Vd.8h,  Vn.8h,  Vm.8h
   4223             011 01110 00 1 m  011001 n d   UMAX Vd.16b, Vn.16b, Vm.16b
   4224 
   4225             011 01110 10 1 m  011011 n d   UMIN Vd.4s,  Vn.4s,  Vm.4s
   4226             011 01110 01 1 m  011011 n d   UMIN Vd.8h,  Vn.8h,  Vm.8h
   4227             011 01110 00 1 m  011011 n d   UMIN Vd.16b, Vn.16b, Vm.16b
   4228 
   4229             010 01110 10 1 m  011001 n d   SMAX Vd.4s,  Vn.4s,  Vm.4s
   4230             010 01110 01 1 m  011001 n d   SMAX Vd.8h,  Vn.8h,  Vm.8h
   4231             010 01110 00 1 m  011001 n d   SMAX Vd.16b, Vn.16b, Vm.16b
   4232 
   4233             010 01110 10 1 m  011011 n d   SMIN Vd.4s,  Vn.4s,  Vm.4s
   4234             010 01110 01 1 m  011011 n d   SMIN Vd.8h,  Vn.8h,  Vm.8h
   4235             010 01110 00 1 m  011011 n d   SMIN Vd.16b, Vn.16b, Vm.16b
   4236 
   4237             010 01110 00 1 m  000111 n d   AND Vd, Vn, Vm
   4238             010 01110 10 1 m  000111 n d   ORR Vd, Vn, Vm
   4239             011 01110 00 1 m  000111 n d   EOR Vd, Vn, Vm
   4240 
   4241             011 01110 11 1 m  100011 n d   CMEQ Vd.2d,  Vn.2d,  Vm.2d
   4242             011 01110 10 1 m  100011 n d   CMEQ Vd.4s,  Vn.4s,  Vm.4s
   4243             011 01110 01 1 m  100011 n d   CMEQ Vd.8h,  Vn.8h,  Vm.8h
   4244             011 01110 00 1 m  100011 n d   CMEQ Vd.16b, Vn.16b, Vm.16b
   4245 
   4246             011 01110 11 1 m  001101 n d   CMHI Vd.2d,  Vn.2d,  Vm.2d
   4247             011 01110 10 1 m  001101 n d   CMHI Vd.4s,  Vn.4s,  Vm.4s
   4248             011 01110 01 1 m  001101 n d   CMHI Vd.8h,  Vn.8h,  Vm.8h
   4249             011 01110 00 1 m  001101 n d   CMHI Vd.16b, Vn.16b, Vm.16b
   4250 
   4251             010 01110 11 1 m  001101 n d   CMGT Vd.2d,  Vn.2d,  Vm.2d
   4252             010 01110 10 1 m  001101 n d   CMGT Vd.4s,  Vn.4s,  Vm.4s
   4253             010 01110 01 1 m  001101 n d   CMGT Vd.8h,  Vn.8h,  Vm.8h
   4254             010 01110 00 1 m  001101 n d   CMGT Vd.16b, Vn.16b, Vm.16b
   4255 
   4256             010 01110 01 1 m  111001 n d   FCMEQ Vd.2d, Vn.2d, Vm.2d
   4257             010 01110 00 1 m  111001 n d   FCMEQ Vd.4s, Vn.4s, Vm.4s
   4258 
   4259             011 01110 01 1 m  111001 n d   FCMGE Vd.2d, Vn.2d, Vm.2d
   4260             011 01110 00 1 m  111001 n d   FCMGE Vd.4s, Vn.4s, Vm.4s
   4261 
   4262             011 01110 11 1 m  111001 n d   FCMGT Vd.2d, Vn.2d, Vm.2d
   4263             011 01110 10 1 m  111001 n d   FCMGT Vd.4s, Vn.4s, Vm.4s
   4264 
   4265             010 01110 00 0 m  000000 n d   TBL Vd.16b, {Vn.16b}, Vm.16b
   4266 
   4267             010 01110 11 0 m  000110 n d   UZP1 Vd.2d,  Vn.2d,  Vm.2d
   4268             010 01110 10 0 m  000110 n d   UZP1 Vd.4s,  Vn.4s,  Vm.4s
   4269             010 01110 01 0 m  000110 n d   UZP1 Vd.8h,  Vn.8h,  Vm.8h
   4270             010 01110 00 0 m  000110 n d   UZP1 Vd.16b, Vn.16b, Vm.16b
   4271 
   4272             010 01110 11 0 m  010110 n d   UZP2 Vd.2d,  Vn.2d,  Vm.2d
   4273             010 01110 10 0 m  010110 n d   UZP2 Vd.4s,  Vn.4s,  Vm.4s
   4274             010 01110 01 0 m  010110 n d   UZP2 Vd.8h,  Vn.8h,  Vm.8h
   4275             010 01110 00 0 m  010110 n d   UZP2 Vd.16b, Vn.16b, Vm.16b
   4276 
   4277             010 01110 10 0 m  001110 n d   ZIP1 Vd.4s,  Vn.4s,  Vm.4s
   4278             010 01110 01 0 m  001110 n d   ZIP1 Vd.8h,  Vn.8h,  Vm.8h
   4279             010 01110 10 0 m  001110 n d   ZIP1 Vd.16b, Vn.16b, Vm.16b
   4280 
   4281             010 01110 10 0 m  011110 n d   ZIP2 Vd.4s,  Vn.4s,  Vm.4s
   4282             010 01110 01 0 m  011110 n d   ZIP2 Vd.8h,  Vn.8h,  Vm.8h
   4283             010 01110 10 0 m  011110 n d   ZIP2 Vd.16b, Vn.16b, Vm.16b
   4284 
   4285             011 01110 00 1 m  100111 n d   PMUL Vd.16b, Vn.16b, Vm.16b
   4286 
   4287             000 01110 00 1 m  111000 n d   PMULL Vd.8h, Vn.8b, Vm.8b
   4288 
   4289             001 01110 10 1 m  110000 n d   UMULL Vd.2d, Vn.2s, Vm.2s
   4290             001 01110 01 1 m  110000 n d   UMULL Vd.4s, Vn.4h, Vm.4h
   4291             001 01110 00 1 m  110000 n d   UMULL Vd.8h, Vn.8b, Vm.8b
   4292 
   4293             000 01110 10 1 m  110000 n d   SMULL Vd.2d, Vn.2s, Vm.2s
   4294             000 01110 01 1 m  110000 n d   SMULL Vd.4s, Vn.4h, Vm.4h
   4295             000 01110 00 1 m  110000 n d   SMULL Vd.8h, Vn.8b, Vm.8b
   4296 
   4297             010 01110 11 1 m  000011 n d   SQADD Vd.2d,  Vn.2d,  Vm.2d
   4298             010 01110 10 1 m  000011 n d   SQADD Vd.4s,  Vn.4s,  Vm.4s
   4299             010 01110 01 1 m  000011 n d   SQADD Vd.8h,  Vn.8h,  Vm.8h
   4300             010 01110 00 1 m  000011 n d   SQADD Vd.16b, Vn.16b, Vm.16b
   4301 
   4302             011 01110 11 1 m  000011 n d   UQADD Vd.2d,  Vn.2d,  Vm.2d
   4303             011 01110 10 1 m  000011 n d   UQADD Vd.4s,  Vn.4s,  Vm.4s
   4304             011 01110 01 1 m  000011 n d   UQADD Vd.8h,  Vn.8h,  Vm.8h
   4305             011 01110 00 1 m  000011 n d   UQADD Vd.16b, Vn.16b, Vm.16b
   4306 
   4307             010 01110 11 1 m  001011 n d   SQSUB Vd.2d,  Vn.2d,  Vm.2d
   4308             010 01110 10 1 m  001011 n d   SQSUB Vd.4s,  Vn.4s,  Vm.4s
   4309             010 01110 01 1 m  001011 n d   SQSUB Vd.8h,  Vn.8h,  Vm.8h
   4310             010 01110 00 1 m  001011 n d   SQSUB Vd.16b, Vn.16b, Vm.16b
   4311 
   4312             011 01110 11 1 m  001011 n d   UQSUB Vd.2d,  Vn.2d,  Vm.2d
   4313             011 01110 10 1 m  001011 n d   UQSUB Vd.4s,  Vn.4s,  Vm.4s
   4314             011 01110 01 1 m  001011 n d   UQSUB Vd.8h,  Vn.8h,  Vm.8h
   4315             011 01110 00 1 m  001011 n d   UQSUB Vd.16b, Vn.16b, Vm.16b
   4316 
   4317             000 01110 10 1 m  110100 n d   SQDMULL Vd.2d, Vn.2s, Vm.2s
   4318             000 01110 01 1 m  110100 n d   SQDMULL Vd.4s, Vn.4h, Vm.4h
   4319 
   4320             010 01110 10 1 m  101101 n d   SQDMULH   Vd.4s,  Vn.4s,  Vm.4s
   4321             010 01110 01 1 m  101101 n d   SQDMULH   Vd.8h,  Vn.8h,  Vm.8h
   4322             011 01110 10 1 m  101101 n d   SQRDMULH  Vd.4s,  Vn.4s,  Vm.4s
   4323             011 01110 10 1 m  101101 n d   SQRDMULH  Vd.8h,  Vn.8h,  Vm.8h
   4324 
   4325             010 01110 sz 1 m  010011 n d   SQSHL@sz   Vd, Vn, Vm
   4326             010 01110 sz 1 m  010111 n d   SQRSHL@sz  Vd, Vn, Vm
   4327             011 01110 sz 1 m  010011 n d   UQSHL@sz   Vd, Vn, Vm
   4328             011 01110 sz 1 m  010111 n d   URQSHL@sz  Vd, Vn, Vm
   4329 
   4330             010 01110 sz 1 m  010001 n d   SSHL@sz   Vd, Vn, Vm
   4331             010 01110 sz 1 m  010101 n d   SRSHL@sz  Vd, Vn, Vm
   4332             011 01110 sz 1 m  010001 n d   USHL@sz   Vd, Vn, Vm
   4333             011 01110 sz 1 m  010101 n d   URSHL@sz  Vd, Vn, Vm
   4334 
   4335             010 01110 01 1 m  111111 n d   FRECPS  Vd.2d, Vn.2d, Vm.2d
   4336             010 01110 00 1 m  111111 n d   FRECPS  Vd.4s, Vn.4s, Vm.4s
   4337             010 01110 11 1 m  111111 n d   FRSQRTS Vd.2d, Vn.2d, Vm.2d
   4338             010 01110 10 1 m  111111 n d   FRSQRTS Vd.4s, Vn.4s, Vm.4s
   4339          */
   4340          UInt vD = qregEnc(i->ARM64in.VBinV.dst);
   4341          UInt vN = qregEnc(i->ARM64in.VBinV.argL);
   4342          UInt vM = qregEnc(i->ARM64in.VBinV.argR);
   4343          switch (i->ARM64in.VBinV.op) {
   4344             case ARM64vecb_ADD64x2:
   4345                *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X100001, vN, vD);
   4346                break;
   4347             case ARM64vecb_ADD32x4:
   4348                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X100001, vN, vD);
   4349                break;
   4350             case ARM64vecb_ADD16x8:
   4351                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100001, vN, vD);
   4352                break;
   4353             case ARM64vecb_ADD8x16:
   4354                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X100001, vN, vD);
   4355                break;
   4356             case ARM64vecb_SUB64x2:
   4357                *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X100001, vN, vD);
   4358                break;
   4359             case ARM64vecb_SUB32x4:
   4360                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X100001, vN, vD);
   4361                break;
   4362             case ARM64vecb_SUB16x8:
   4363                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X100001, vN, vD);
   4364                break;
   4365             case ARM64vecb_SUB8x16:
   4366                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100001, vN, vD);
   4367                break;
   4368             case ARM64vecb_MUL32x4:
   4369                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X100111, vN, vD);
   4370                break;
   4371             case ARM64vecb_MUL16x8:
   4372                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100111, vN, vD);
   4373                break;
   4374             case ARM64vecb_MUL8x16:
   4375                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X100111, vN, vD);
   4376                break;
   4377             case ARM64vecb_FADD64x2:
   4378                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X110101, vN, vD);
   4379                break;
   4380             case ARM64vecb_FADD32x4:
   4381                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X110101, vN, vD);
   4382                break;
   4383             case ARM64vecb_FSUB64x2:
   4384                *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X110101, vN, vD);
   4385                break;
   4386             case ARM64vecb_FSUB32x4:
   4387                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X110101, vN, vD);
   4388                break;
   4389             case ARM64vecb_FMUL64x2:
   4390                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X110111, vN, vD);
   4391                break;
   4392             case ARM64vecb_FMUL32x4:
   4393                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X110111, vN, vD);
   4394                break;
   4395             case ARM64vecb_FDIV64x2:
   4396                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X111111, vN, vD);
   4397                break;
   4398             case ARM64vecb_FDIV32x4:
   4399                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X111111, vN, vD);
   4400                break;
   4401 
   4402             case ARM64vecb_FMAX64x2:
   4403                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111101, vN, vD);
   4404                break;
   4405             case ARM64vecb_FMAX32x4:
   4406                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111101, vN, vD);
   4407                break;
   4408             case ARM64vecb_FMIN64x2:
   4409                *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X111101, vN, vD);
   4410                break;
   4411             case ARM64vecb_FMIN32x4:
   4412                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X111101, vN, vD);
   4413                break;
   4414 
   4415             case ARM64vecb_UMAX32x4:
   4416                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X011001, vN, vD);
   4417                break;
   4418             case ARM64vecb_UMAX16x8:
   4419                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X011001, vN, vD);
   4420                break;
   4421             case ARM64vecb_UMAX8x16:
   4422                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X011001, vN, vD);
   4423                break;
   4424 
   4425             case ARM64vecb_UMIN32x4:
   4426                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X011011, vN, vD);
   4427                break;
   4428             case ARM64vecb_UMIN16x8:
   4429                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X011011, vN, vD);
   4430                break;
   4431             case ARM64vecb_UMIN8x16:
   4432                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X011011, vN, vD);
   4433                break;
   4434 
   4435             case ARM64vecb_SMAX32x4:
   4436                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011001, vN, vD);
   4437                break;
   4438             case ARM64vecb_SMAX16x8:
   4439                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011001, vN, vD);
   4440                break;
   4441             case ARM64vecb_SMAX8x16:
   4442                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X011001, vN, vD);
   4443                break;
   4444 
   4445             case ARM64vecb_SMIN32x4:
   4446                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011011, vN, vD);
   4447                break;
   4448             case ARM64vecb_SMIN16x8:
   4449                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011011, vN, vD);
   4450                break;
   4451             case ARM64vecb_SMIN8x16:
   4452                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X011011, vN, vD);
   4453                break;
   4454 
   4455             case ARM64vecb_AND:
   4456                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X000111, vN, vD);
   4457                break;
   4458             case ARM64vecb_ORR:
   4459                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X000111, vN, vD);
   4460                break;
   4461             case ARM64vecb_XOR:
   4462                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X000111, vN, vD);
   4463                break;
   4464 
   4465             case ARM64vecb_CMEQ64x2:
   4466                *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X100011, vN, vD);
   4467                break;
   4468             case ARM64vecb_CMEQ32x4:
   4469                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X100011, vN, vD);
   4470                break;
   4471             case ARM64vecb_CMEQ16x8:
   4472                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X100011, vN, vD);
   4473                break;
   4474             case ARM64vecb_CMEQ8x16:
   4475                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100011, vN, vD);
   4476                break;
   4477 
   4478             case ARM64vecb_CMHI64x2:
   4479                *p++ = X_3_8_5_6_5_5(X011, X01110111, vM,  X001101, vN, vD);
   4480                break;
   4481             case ARM64vecb_CMHI32x4:
   4482                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM,  X001101, vN, vD);
   4483                break;
   4484             case ARM64vecb_CMHI16x8:
   4485                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM,  X001101, vN, vD);
   4486                break;
   4487             case ARM64vecb_CMHI8x16:
   4488                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM,  X001101, vN, vD);
   4489                break;
   4490 
   4491             case ARM64vecb_CMGT64x2:
   4492                *p++ = X_3_8_5_6_5_5(X010, X01110111, vM,  X001101, vN, vD);
   4493                break;
   4494             case ARM64vecb_CMGT32x4:
   4495                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM,  X001101, vN, vD);
   4496                break;
   4497             case ARM64vecb_CMGT16x8:
   4498                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM,  X001101, vN, vD);
   4499                break;
   4500             case ARM64vecb_CMGT8x16:
   4501                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM,  X001101, vN, vD);
   4502                break;
   4503 
   4504             case ARM64vecb_FCMEQ64x2:
   4505                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111001, vN, vD);
   4506                break;
   4507             case ARM64vecb_FCMEQ32x4:
   4508                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111001, vN, vD);
   4509                break;
   4510 
   4511             case ARM64vecb_FCMGE64x2:
   4512                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X111001, vN, vD);
   4513                break;
   4514             case ARM64vecb_FCMGE32x4:
   4515                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X111001, vN, vD);
   4516                break;
   4517 
   4518             case ARM64vecb_FCMGT64x2:
   4519                *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X111001, vN, vD);
   4520                break;
   4521             case ARM64vecb_FCMGT32x4:
   4522                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X111001, vN, vD);
   4523                break;
   4524 
   4525             case ARM64vecb_TBL1:
   4526                *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X000000, vN, vD);
   4527                break;
   4528 
   4529             case ARM64vecb_UZP164x2:
   4530                *p++ = X_3_8_5_6_5_5(X010, X01110110, vM, X000110, vN, vD);
   4531                break;
   4532             case ARM64vecb_UZP132x4:
   4533                *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X000110, vN, vD);
   4534                break;
   4535             case ARM64vecb_UZP116x8:
   4536                *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X000110, vN, vD);
   4537                break;
   4538             case ARM64vecb_UZP18x16:
   4539                *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X000110, vN, vD);
   4540                break;
   4541 
   4542             case ARM64vecb_UZP264x2:
   4543                *p++ = X_3_8_5_6_5_5(X010, X01110110, vM, X010110, vN, vD);
   4544                break;
   4545             case ARM64vecb_UZP232x4:
   4546                *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X010110, vN, vD);
   4547                break;
   4548             case ARM64vecb_UZP216x8:
   4549                *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X010110, vN, vD);
   4550                break;
   4551             case ARM64vecb_UZP28x16:
   4552                *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X010110, vN, vD);
   4553                break;
   4554 
   4555             case ARM64vecb_ZIP132x4:
   4556                *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X001110, vN, vD);
   4557                break;
   4558             case ARM64vecb_ZIP116x8:
   4559                *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X001110, vN, vD);
   4560                break;
   4561             case ARM64vecb_ZIP18x16:
   4562                *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X001110, vN, vD);
   4563                break;
   4564 
   4565             case ARM64vecb_ZIP232x4:
   4566                *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X011110, vN, vD);
   4567                break;
   4568             case ARM64vecb_ZIP216x8:
   4569                *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X011110, vN, vD);
   4570                break;
   4571             case ARM64vecb_ZIP28x16:
   4572                *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X011110, vN, vD);
   4573                break;
   4574 
   4575             case ARM64vecb_PMUL8x16:
   4576                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100111, vN, vD);
   4577                break;
   4578 
   4579             case ARM64vecb_PMULL8x8:
   4580                *p++ = X_3_8_5_6_5_5(X000, X01110001, vM, X111000, vN, vD);
   4581                break;
   4582 
   4583             case ARM64vecb_UMULL2DSS:
   4584                *p++ = X_3_8_5_6_5_5(X001, X01110101, vM, X110000, vN, vD);
   4585                break;
   4586             case ARM64vecb_UMULL4SHH:
   4587                *p++ = X_3_8_5_6_5_5(X001, X01110011, vM, X110000, vN, vD);
   4588                break;
   4589             case ARM64vecb_UMULL8HBB:
   4590                *p++ = X_3_8_5_6_5_5(X001, X01110001, vM, X110000, vN, vD);
   4591                break;
   4592 
   4593             case ARM64vecb_SMULL2DSS:
   4594                *p++ = X_3_8_5_6_5_5(X000, X01110101, vM, X110000, vN, vD);
   4595                break;
   4596             case ARM64vecb_SMULL4SHH:
   4597                *p++ = X_3_8_5_6_5_5(X000, X01110011, vM, X110000, vN, vD);
   4598                break;
   4599             case ARM64vecb_SMULL8HBB:
   4600                *p++ = X_3_8_5_6_5_5(X000, X01110001, vM, X110000, vN, vD);
   4601                break;
   4602 
   4603             case ARM64vecb_SQADD64x2:
   4604                *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X000011, vN, vD);
   4605                break;
   4606             case ARM64vecb_SQADD32x4:
   4607                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X000011, vN, vD);
   4608                break;
   4609             case ARM64vecb_SQADD16x8:
   4610                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X000011, vN, vD);
   4611                break;
   4612             case ARM64vecb_SQADD8x16:
   4613                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X000011, vN, vD);
   4614                break;
   4615 
   4616             case ARM64vecb_UQADD64x2:
   4617                *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X000011, vN, vD);
   4618                break;
   4619             case ARM64vecb_UQADD32x4:
   4620                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X000011, vN, vD);
   4621                break;
   4622             case ARM64vecb_UQADD16x8:
   4623                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X000011, vN, vD);
   4624                break;
   4625             case ARM64vecb_UQADD8x16:
   4626                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X000011, vN, vD);
   4627                break;
   4628 
   4629             case ARM64vecb_SQSUB64x2:
   4630                *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X001011, vN, vD);
   4631                break;
   4632             case ARM64vecb_SQSUB32x4:
   4633                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X001011, vN, vD);
   4634                break;
   4635             case ARM64vecb_SQSUB16x8:
   4636                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X001011, vN, vD);
   4637                break;
   4638             case ARM64vecb_SQSUB8x16:
   4639                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X001011, vN, vD);
   4640                break;
   4641 
   4642             case ARM64vecb_UQSUB64x2:
   4643                *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X001011, vN, vD);
   4644                break;
   4645             case ARM64vecb_UQSUB32x4:
   4646                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X001011, vN, vD);
   4647                break;
   4648             case ARM64vecb_UQSUB16x8:
   4649                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X001011, vN, vD);
   4650                break;
   4651             case ARM64vecb_UQSUB8x16:
   4652                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X001011, vN, vD);
   4653                break;
   4654 
   4655             case ARM64vecb_SQDMULL2DSS:
   4656                *p++ = X_3_8_5_6_5_5(X000, X01110101, vM, X110100, vN, vD);
   4657                break;
   4658             case ARM64vecb_SQDMULL4SHH:
   4659                *p++ = X_3_8_5_6_5_5(X000, X01110011, vM, X110100, vN, vD);
   4660                break;
   4661 
   4662             case ARM64vecb_SQDMULH32x4:
   4663                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X101101, vN, vD);
   4664                break;
   4665             case ARM64vecb_SQDMULH16x8:
   4666                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X101101, vN, vD);
   4667                break;
   4668             case ARM64vecb_SQRDMULH32x4:
   4669                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X101101, vN, vD);
   4670                break;
   4671             case ARM64vecb_SQRDMULH16x8:
   4672                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X101101, vN, vD);
   4673                break;
   4674 
   4675             case ARM64vecb_SQSHL64x2:
   4676                *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010011, vN, vD);
   4677                break;
   4678             case ARM64vecb_SQSHL32x4:
   4679                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010011, vN, vD);
   4680                break;
   4681             case ARM64vecb_SQSHL16x8:
   4682                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010011, vN, vD);
   4683                break;
   4684             case ARM64vecb_SQSHL8x16:
   4685                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010011, vN, vD);
   4686                break;
   4687 
   4688             case ARM64vecb_SQRSHL64x2:
   4689                *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010111, vN, vD);
   4690                break;
   4691             case ARM64vecb_SQRSHL32x4:
   4692                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010111, vN, vD);
   4693                break;
   4694             case ARM64vecb_SQRSHL16x8:
   4695                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010111, vN, vD);
   4696                break;
   4697             case ARM64vecb_SQRSHL8x16:
   4698                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010111, vN, vD);
   4699                break;
   4700 
   4701             case ARM64vecb_UQSHL64x2:
   4702                *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010011, vN, vD);
   4703                break;
   4704             case ARM64vecb_UQSHL32x4:
   4705                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010011, vN, vD);
   4706                break;
   4707             case ARM64vecb_UQSHL16x8:
   4708                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010011, vN, vD);
   4709                break;
   4710             case ARM64vecb_UQSHL8x16:
   4711                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010011, vN, vD);
   4712                break;
   4713 
   4714             case ARM64vecb_UQRSHL64x2:
   4715                *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010111, vN, vD);
   4716                break;
   4717             case ARM64vecb_UQRSHL32x4:
   4718                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010111, vN, vD);
   4719                break;
   4720             case ARM64vecb_UQRSHL16x8:
   4721                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010111, vN, vD);
   4722                break;
   4723             case ARM64vecb_UQRSHL8x16:
   4724                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010111, vN, vD);
   4725                break;
   4726 
   4727             case ARM64vecb_SSHL64x2:
   4728                *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010001, vN, vD);
   4729                break;
   4730             case ARM64vecb_SSHL32x4:
   4731                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010001, vN, vD);
   4732                break;
   4733             case ARM64vecb_SSHL16x8:
   4734                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010001, vN, vD);
   4735                break;
   4736             case ARM64vecb_SSHL8x16:
   4737                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010001, vN, vD);
   4738                break;
   4739 
   4740             case ARM64vecb_SRSHL64x2:
   4741                *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010101, vN, vD);
   4742                break;
   4743             case ARM64vecb_SRSHL32x4:
   4744                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010101, vN, vD);
   4745                break;
   4746             case ARM64vecb_SRSHL16x8:
   4747                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010101, vN, vD);
   4748                break;
   4749             case ARM64vecb_SRSHL8x16:
   4750                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010101, vN, vD);
   4751                break;
   4752 
   4753             case ARM64vecb_USHL64x2:
   4754                *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010001, vN, vD);
   4755                break;
   4756             case ARM64vecb_USHL32x4:
   4757                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010001, vN, vD);
   4758                break;
   4759             case ARM64vecb_USHL16x8:
   4760                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010001, vN, vD);
   4761                break;
   4762             case ARM64vecb_USHL8x16:
   4763                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010001, vN, vD);
   4764                break;
   4765 
   4766             case ARM64vecb_URSHL64x2:
   4767                *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010101, vN, vD);
   4768                break;
   4769             case ARM64vecb_URSHL32x4:
   4770                *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010101, vN, vD);
   4771                break;
   4772             case ARM64vecb_URSHL16x8:
   4773                *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010101, vN, vD);
   4774                break;
   4775             case ARM64vecb_URSHL8x16:
   4776                *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010101, vN, vD);
   4777                break;
   4778 
   4779             case ARM64vecb_FRECPS64x2:
   4780                *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111111, vN, vD);
   4781                break;
   4782             case ARM64vecb_FRECPS32x4:
   4783                *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111111, vN, vD);
   4784                break;
   4785             case ARM64vecb_FRSQRTS64x2:
   4786                *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X111111, vN, vD);
   4787                break;
   4788             case ARM64vecb_FRSQRTS32x4:
   4789                *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X111111, vN, vD);
   4790                break;
   4791 
   4792             default:
   4793                goto bad;
   4794          }
   4795          goto done;
   4796       }
   4797       case ARM64in_VModifyV: {
   4798          /* 31        23   20    15     9 4
   4799             010 01110 sz 1 00000 001110 n d   SUQADD@sz  Vd, Vn
   4800             011 01110 sz 1 00000 001110 n d   USQADD@sz  Vd, Vn
   4801          */
   4802          UInt vD = qregEnc(i->ARM64in.VModifyV.mod);
   4803          UInt vN = qregEnc(i->ARM64in.VModifyV.arg);
   4804          switch (i->ARM64in.VModifyV.op) {
   4805             case ARM64vecmo_SUQADD64x2:
   4806                *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X001110, vN, vD);
   4807                break;
   4808             case ARM64vecmo_SUQADD32x4:
   4809                *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X001110, vN, vD);
   4810                break;
   4811             case ARM64vecmo_SUQADD16x8:
   4812                *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X001110, vN, vD);
   4813                break;
   4814             case ARM64vecmo_SUQADD8x16:
   4815                *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X001110, vN, vD);
   4816                break;
   4817             case ARM64vecmo_USQADD64x2:
   4818                *p++ = X_3_8_5_6_5_5(X011, X01110111, X00000, X001110, vN, vD);
   4819                break;
   4820             case ARM64vecmo_USQADD32x4:
   4821                *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X001110, vN, vD);
   4822                break;
   4823             case ARM64vecmo_USQADD16x8:
   4824                *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X001110, vN, vD);
   4825                break;
   4826             case ARM64vecmo_USQADD8x16:
   4827                *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X001110, vN, vD);
   4828                break;
   4829             default:
   4830                goto bad;
   4831          }
   4832          goto done;
   4833       }
   4834       case ARM64in_VUnaryV: {
   4835          /* 31        23   20    15     9 4
   4836             010 01110 11 1 00000 111110 n d  FABS Vd.2d,  Vn.2d
   4837             010 01110 10 1 00000 111110 n d  FABS Vd.4s,  Vn.4s
   4838             011 01110 11 1 00000 111110 n d  FNEG Vd.2d,  Vn.2d
   4839             011 01110 10 1 00000 111110 n d  FNEG Vd.4s,  Vn.4s
   4840             011 01110 00 1 00000 010110 n d  NOT  Vd.16b, Vn.16b
   4841 
   4842             010 01110 11 1 00000 101110 n d  ABS  Vd.2d,  Vn.2d
   4843             010 01110 10 1 00000 101110 n d  ABS  Vd.4s,  Vn.4s
   4844             010 01110 01 1 00000 101110 n d  ABS  Vd.8h,  Vn.8h
   4845             010 01110 00 1 00000 101110 n d  ABS  Vd.16b, Vn.16b
   4846 
   4847             010 01110 10 1 00000 010010 n d  CLS  Vd.4s,  Vn.4s
   4848             010 01110 01 1 00000 010010 n d  CLS  Vd.8h,  Vn.8h
   4849             010 01110 00 1 00000 010010 n d  CLS  Vd.16b, Vn.16b
   4850 
   4851             011 01110 10 1 00000 010010 n d  CLZ  Vd.4s,  Vn.4s
   4852             011 01110 01 1 00000 010010 n d  CLZ  Vd.8h,  Vn.8h
   4853             011 01110 00 1 00000 010010 n d  CLZ  Vd.16b, Vn.16b
   4854 
   4855             010 01110 00 1 00000 010110 n d  CNT  Vd.16b, Vn.16b
   4856 
   4857             011 01110 01 1 00000 010110 n d  RBIT  Vd.16b, Vn.16b
   4858             010 01110 00 1 00000 000110 n d  REV16 Vd.16b, Vn.16b
   4859             011 01110 00 1 00000 000010 n d  REV32 Vd.16b, Vn.16b
   4860             011 01110 01 1 00000 000010 n d  REV32 Vd.8h, Vn.8h
   4861 
   4862             010 01110 00 1 00000 000010 n d  REV64 Vd.16b, Vn.16b
   4863             010 01110 01 1 00000 000010 n d  REV64 Vd.8h, Vn.8h
   4864             010 01110 10 1 00000 000010 n d  REV64 Vd.4s, Vn.4s
   4865 
   4866             010 01110 10 1 00001 110010 n d  URECPE Vd.4s, Vn.4s
   4867             011 01110 10 1 00001 110010 n d  URSQRTE Vd.4s, Vn.4s
   4868 
   4869             010 01110 11 1 00001 110110 n d  FRECPE Vd.2d, Vn.2d
   4870             010 01110 10 1 00001 110110 n d  FRECPE Vd.4s, Vn.4s
   4871 
   4872             011 01110 11 1 00001 110110 n d  FRECPE Vd.2d, Vn.2d
   4873             011 01110 10 1 00001 110110 n d  FRECPE Vd.4s, Vn.4s
   4874 
   4875             011 01110 11 1 00001 111110 n d  FSQRT Vd.2d, Vn.2d
   4876             011 01110 10 1 00001 111110 n d  FSQRT Vd.4s, Vn.4s
   4877          */
   4878          UInt vD = qregEnc(i->ARM64in.VUnaryV.dst);
   4879          UInt vN = qregEnc(i->ARM64in.VUnaryV.arg);
   4880          switch (i->ARM64in.VUnaryV.op) {
   4881             case ARM64vecu_FABS64x2:
   4882                *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X111110, vN, vD);
   4883                break;
   4884             case ARM64vecu_FABS32x4:
   4885                *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X111110, vN, vD);
   4886                break;
   4887             case ARM64vecu_FNEG64x2:
   4888                *p++ = X_3_8_5_6_5_5(X011, X01110111, X00000, X111110, vN, vD);
   4889                break;
   4890             case ARM64vecu_FNEG32x4:
   4891                *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X111110, vN, vD);
   4892                break;
   4893             case ARM64vecu_NOT:
   4894                *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010110, vN, vD);
   4895                break;
   4896             case ARM64vecu_ABS64x2:
   4897                *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X101110, vN, vD);
   4898                break;
   4899             case ARM64vecu_ABS32x4:
   4900                *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X101110, vN, vD);
   4901                break;
   4902             case ARM64vecu_ABS16x8:
   4903                *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X101110, vN, vD);
   4904                break;
   4905             case ARM64vecu_ABS8x16:
   4906                *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X101110, vN, vD);
   4907                break;
   4908             case ARM64vecu_CLS32x4:
   4909                *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X010010, vN, vD);
   4910                break;
   4911             case ARM64vecu_CLS16x8:
   4912                *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X010010, vN, vD);
   4913                break;
   4914             case ARM64vecu_CLS8x16:
   4915                *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X010010, vN, vD);
   4916                break;
   4917             case ARM64vecu_CLZ32x4:
   4918                *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X010010, vN, vD);
   4919                break;
   4920             case ARM64vecu_CLZ16x8:
   4921                *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X010010, vN, vD);
   4922                break;
   4923             case ARM64vecu_CLZ8x16:
   4924                *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010010, vN, vD);
   4925                break;
   4926             case ARM64vecu_CNT8x16:
   4927                *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X010110, vN, vD);
   4928                break;
   4929             case ARM64vecu_RBIT:
   4930                *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X010110, vN, vD);
   4931                break;
   4932             case ARM64vecu_REV1616B:
   4933                *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X000110, vN, vD);
   4934                break;
   4935             case ARM64vecu_REV3216B:
   4936                *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X000010, vN, vD);
   4937                break;
   4938             case ARM64vecu_REV328H:
   4939                *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X000010, vN, vD);
   4940                break;
   4941             case ARM64vecu_REV6416B:
   4942                *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X000010, vN, vD);
   4943                break;
   4944             case ARM64vecu_REV648H:
   4945                *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X000010, vN, vD);
   4946                break;
   4947             case ARM64vecu_REV644S:
   4948                *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X000010, vN, vD);
   4949                break;
   4950             case ARM64vecu_URECPE32x4:
   4951                *p++ = X_3_8_5_6_5_5(X010, X01110101, X00001, X110010, vN, vD);
   4952                break;
   4953             case ARM64vecu_URSQRTE32x4:
   4954                *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X110010, vN, vD);
   4955                break;
   4956             case ARM64vecu_FRECPE64x2:
   4957                *p++ = X_3_8_5_6_5_5(X010, X01110111, X00001, X110110, vN, vD);
   4958                break;
   4959             case ARM64vecu_FRECPE32x4:
   4960                *p++ = X_3_8_5_6_5_5(X010, X01110101, X00001, X110110, vN, vD);
   4961                break;
   4962             case ARM64vecu_FRSQRTE64x2:
   4963                *p++ = X_3_8_5_6_5_5(X011, X01110111, X00001, X110110, vN, vD);
   4964                break;
   4965             case ARM64vecu_FRSQRTE32x4:
   4966                *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X110110, vN, vD);
   4967                break;
   4968             case ARM64vecu_FSQRT64x2:
   4969                *p++ = X_3_8_5_6_5_5(X011, X01110111, X00001, X111110, vN, vD);
   4970                break;
   4971             case ARM64vecu_FSQRT32x4:
   4972                *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X111110, vN, vD);
   4973                break;
   4974             default:
   4975                goto bad;
   4976          }
   4977          goto done;
   4978       }
   4979       case ARM64in_VNarrowV: {
   4980          /* 31        23 21      15     9 4
   4981             000 01110 00 1,00001 001010 n d  XTN Vd.8b, Vn.8h
   4982             000 01110 01 1,00001 001010 n d  XTN Vd.4h, Vn.4s
   4983             000 01110 10 1,00001 001010 n d  XTN Vd.2s, Vn.2d
   4984 
   4985             001 01110 00 1,00001 001010 n d  SQXTUN Vd.8b, Vn.8h
   4986             001 01110 01 1,00001 001010 n d  SQXTUN Vd.4h, Vn.4s
   4987             001 01110 10 1,00001 001010 n d  SQXTUN Vd.2s, Vn.2d
   4988 
   4989             000 01110 00 1,00001 010010 n d  SQXTN Vd.8b, Vn.8h
   4990             000 01110 01 1,00001 010010 n d  SQXTN Vd.4h, Vn.4s
   4991             000 01110 10 1,00001 010010 n d  SQXTN Vd.2s, Vn.2d
   4992 
   4993             001 01110 00 1,00001 010010 n d  UQXTN Vd.8b, Vn.8h
   4994             001 01110 01 1,00001 010010 n d  UQXTN Vd.4h, Vn.4s
   4995             001 01110 10 1,00001 010010 n d  UQXTN Vd.2s, Vn.2d
   4996          */
   4997          UInt vD = qregEnc(i->ARM64in.VNarrowV.dst);
   4998          UInt vN = qregEnc(i->ARM64in.VNarrowV.src);
   4999          UInt dszBlg2 = i->ARM64in.VNarrowV.dszBlg2;
   5000          vassert(dszBlg2 >= 0 && dszBlg2 <= 2);
   5001          switch (i->ARM64in.VNarrowV.op) {
   5002             case ARM64vecna_XTN:
   5003                *p++ = X_3_8_5_6_5_5(X000, X01110001 | (dszBlg2 << 1),
   5004                                     X00001, X001010, vN, vD);
   5005                goto done;
   5006             case ARM64vecna_SQXTUN:
   5007                *p++ = X_3_8_5_6_5_5(X001, X01110001 | (dszBlg2 << 1),
   5008                                     X00001, X001010, vN, vD);
   5009                goto done;
   5010             case ARM64vecna_SQXTN:
   5011                *p++ = X_3_8_5_6_5_5(X000, X01110001 | (dszBlg2 << 1),
   5012                                     X00001, X010010, vN, vD);
   5013                goto done;
   5014             case ARM64vecna_UQXTN:
   5015                *p++ = X_3_8_5_6_5_5(X001, X01110001 | (dszBlg2 << 1),
   5016                                     X00001, X010010, vN, vD);
   5017                goto done;
   5018             default:
   5019                break;
   5020          }
   5021          goto bad;
   5022       }
   5023       case ARM64in_VShiftImmV: {
   5024          /*
   5025             011 011110 immh immb 000001 n d  USHR     Vd.T, Vn.T, #sh
   5026             010 011110 immh immb 000001 n d  SSHR     Vd.T, Vn.T, #sh
   5027 
   5028             001 011110 immh immb 100101 n d  UQSHRN   ,,#sh
   5029             000 011110 immh immb 100101 n d  SQSHRN   ,,#sh
   5030             001 011110 immh immb 100001 n d  SQSHRUN  ,,#sh
   5031 
   5032             001 011110 immh immb 100111 n d  UQRSHRN  ,,#sh
   5033             000 011110 immh immb 100111 n d  SQRSHRN  ,,#sh
   5034             001 011110 immh immb 100011 n d  SQRSHRUN ,,#sh
   5035 
   5036             where immh:immb
   5037                = case T of
   5038                     2d  | sh in 1..64 -> let xxxxxx = 64-sh in 1xxx:xxx
   5039                     4s  | sh in 1..32 -> let  xxxxx = 32-sh in 01xx:xxx
   5040                     8h  | sh in 1..16 -> let   xxxx = 16-sh in 001x:xxx
   5041                     16b | sh in 1..8  -> let    xxx =  8-sh in 0001:xxx
   5042 
   5043             010 011110 immh immb 010101 n d  SHL    Vd.T, Vn.T, #sh
   5044 
   5045             011 011110 immh immb 011101 n d  UQSHL  Vd.T, Vn.T, #sh
   5046             010 011110 immh immb 011101 n d  SQSHL  Vd.T, Vn.T, #sh
   5047             011 011110 immh immb 011001 n d  SQSHLU Vd.T, Vn.T, #sh
   5048 
   5049             where immh:immb
   5050                = case T of
   5051                     2d  | sh in 0..63 -> let xxxxxx = sh in 1xxx:xxx
   5052                     4s  | sh in 0..31 -> let  xxxxx = sh in 01xx:xxx
   5053                     8h  | sh in 0..15 -> let   xxxx = sh in 001x:xxx
   5054                     16b | sh in 0..7  -> let    xxx = sh in 0001:xxx
   5055          */
   5056          UInt vD   = qregEnc(i->ARM64in.VShiftImmV.dst);
   5057          UInt vN   = qregEnc(i->ARM64in.VShiftImmV.src);
   5058          UInt sh   = i->ARM64in.VShiftImmV.amt;
   5059          UInt tmpl = 0; /* invalid */
   5060 
   5061          const UInt tmpl_USHR
   5062             = X_3_6_7_6_5_5(X011, X011110, 0, X000001, vN, vD);
   5063          const UInt tmpl_SSHR
   5064             = X_3_6_7_6_5_5(X010, X011110, 0, X000001, vN, vD);
   5065 
   5066          const UInt tmpl_UQSHRN
   5067             = X_3_6_7_6_5_5(X001, X011110, 0, X100101, vN, vD);
   5068          const UInt tmpl_SQSHRN
   5069             = X_3_6_7_6_5_5(X000, X011110, 0, X100101, vN, vD);
   5070          const UInt tmpl_SQSHRUN
   5071             = X_3_6_7_6_5_5(X001, X011110, 0, X100001, vN, vD);
   5072 
   5073          const UInt tmpl_UQRSHRN
   5074             = X_3_6_7_6_5_5(X001, X011110, 0, X100111, vN, vD);
   5075          const UInt tmpl_SQRSHRN
   5076             = X_3_6_7_6_5_5(X000, X011110, 0, X100111, vN, vD);
   5077          const UInt tmpl_SQRSHRUN
   5078             = X_3_6_7_6_5_5(X001, X011110, 0, X100011, vN, vD);
   5079 
   5080          const UInt tmpl_SHL
   5081             = X_3_6_7_6_5_5(X010, X011110, 0, X010101, vN, vD);
   5082 
   5083          const UInt tmpl_UQSHL
   5084             = X_3_6_7_6_5_5(X011, X011110, 0, X011101, vN, vD);
   5085          const UInt tmpl_SQSHL
   5086             = X_3_6_7_6_5_5(X010, X011110, 0, X011101, vN, vD);
   5087          const UInt tmpl_SQSHLU
   5088             = X_3_6_7_6_5_5(X011, X011110, 0, X011001, vN, vD);
   5089 
   5090          switch (i->ARM64in.VShiftImmV.op) {
   5091             case ARM64vecshi_SSHR64x2:    tmpl = tmpl_SSHR;     goto right64x2;
   5092             case ARM64vecshi_USHR64x2:    tmpl = tmpl_USHR;     goto right64x2;
   5093             case ARM64vecshi_SHL64x2:     tmpl = tmpl_SHL;      goto left64x2;
   5094             case ARM64vecshi_UQSHL64x2:   tmpl = tmpl_UQSHL;    goto left64x2;
   5095             case ARM64vecshi_SQSHL64x2:   tmpl = tmpl_SQSHL;    goto left64x2;
   5096             case ARM64vecshi_SQSHLU64x2:  tmpl = tmpl_SQSHLU;   goto left64x2;
   5097             case ARM64vecshi_SSHR32x4:    tmpl = tmpl_SSHR;     goto right32x4;
   5098             case ARM64vecshi_USHR32x4:    tmpl = tmpl_USHR;     goto right32x4;
   5099             case ARM64vecshi_UQSHRN2SD:   tmpl = tmpl_UQSHRN;   goto right32x4;
   5100             case ARM64vecshi_SQSHRN2SD:   tmpl = tmpl_SQSHRN;   goto right32x4;
   5101             case ARM64vecshi_SQSHRUN2SD:  tmpl = tmpl_SQSHRUN;  goto right32x4;
   5102             case ARM64vecshi_UQRSHRN2SD:  tmpl = tmpl_UQRSHRN;  goto right32x4;
   5103             case ARM64vecshi_SQRSHRN2SD:  tmpl = tmpl_SQRSHRN;  goto right32x4;
   5104             case ARM64vecshi_SQRSHRUN2SD: tmpl = tmpl_SQRSHRUN; goto right32x4;
   5105             case ARM64vecshi_SHL32x4:     tmpl = tmpl_SHL;      goto left32x4;
   5106             case ARM64vecshi_UQSHL32x4:   tmpl = tmpl_UQSHL;    goto left32x4;
   5107             case ARM64vecshi_SQSHL32x4:   tmpl = tmpl_SQSHL;    goto left32x4;
   5108             case ARM64vecshi_SQSHLU32x4:  tmpl = tmpl_SQSHLU;   goto left32x4;
   5109             case ARM64vecshi_SSHR16x8:    tmpl = tmpl_SSHR;     goto right16x8;
   5110             case ARM64vecshi_USHR16x8:    tmpl = tmpl_USHR;     goto right16x8;
   5111             case ARM64vecshi_UQSHRN4HS:   tmpl = tmpl_UQSHRN;   goto right16x8;
   5112             case ARM64vecshi_SQSHRN4HS:   tmpl = tmpl_SQSHRN;   goto right16x8;
   5113             case ARM64vecshi_SQSHRUN4HS:  tmpl = tmpl_SQSHRUN;  goto right16x8;
   5114             case ARM64vecshi_UQRSHRN4HS:  tmpl = tmpl_UQRSHRN;  goto right16x8;
   5115             case ARM64vecshi_SQRSHRN4HS:  tmpl = tmpl_SQRSHRN;  goto right16x8;
   5116             case ARM64vecshi_SQRSHRUN4HS: tmpl = tmpl_SQRSHRUN; goto right16x8;
   5117             case ARM64vecshi_SHL16x8:     tmpl = tmpl_SHL;      goto left16x8;
   5118             case ARM64vecshi_UQSHL16x8:   tmpl = tmpl_UQSHL;    goto left16x8;
   5119             case ARM64vecshi_SQSHL16x8:   tmpl = tmpl_SQSHL;    goto left16x8;
   5120             case ARM64vecshi_SQSHLU16x8:  tmpl = tmpl_SQSHLU;   goto left16x8;
   5121             case ARM64vecshi_SSHR8x16:    tmpl = tmpl_SSHR;     goto right8x16;
   5122             case ARM64vecshi_USHR8x16:    tmpl = tmpl_USHR;     goto right8x16;
   5123             case ARM64vecshi_UQSHRN8BH:   tmpl = tmpl_UQSHRN;   goto right8x16;
   5124             case ARM64vecshi_SQSHRN8BH:   tmpl = tmpl_SQSHRN;   goto right8x16;
   5125             case ARM64vecshi_SQSHRUN8BH:  tmpl = tmpl_SQSHRUN;  goto right8x16;
   5126             case ARM64vecshi_UQRSHRN8BH:  tmpl = tmpl_UQRSHRN;  goto right8x16;
   5127             case ARM64vecshi_SQRSHRN8BH:  tmpl = tmpl_SQRSHRN;  goto right8x16;
   5128             case ARM64vecshi_SQRSHRUN8BH: tmpl = tmpl_SQRSHRUN; goto right8x16;
   5129             case ARM64vecshi_SHL8x16:     tmpl = tmpl_SHL;      goto left8x16;
   5130             case ARM64vecshi_UQSHL8x16:   tmpl = tmpl_UQSHL;    goto left8x16;
   5131             case ARM64vecshi_SQSHL8x16:   tmpl = tmpl_SQSHL;    goto left8x16;
   5132             case ARM64vecshi_SQSHLU8x16:  tmpl = tmpl_SQSHLU;   goto left8x16;
   5133 
   5134             default: break;
   5135 
   5136             right64x2:
   5137                if (sh >= 1 && sh <= 63) {
   5138                   *p++ = tmpl | X_3_6_7_6_5_5(0,0, X1000000 | (64-sh), 0,0,0);
   5139                   goto done;
   5140                }
   5141                break;
   5142             right32x4:
   5143                if (sh >= 1 && sh <= 32) {
   5144                   *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0100000 | (32-sh), 0,0,0);
   5145                   goto done;
   5146                }
   5147                break;
   5148             right16x8:
   5149                if (sh >= 1 && sh <= 16) {
   5150                   *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0010000 | (16-sh), 0,0,0);
   5151                   goto done;
   5152                }
   5153                break;
   5154             right8x16:
   5155                if (sh >= 1 && sh <= 8) {
   5156                   *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0001000 | (8-sh), 0,0,0);
   5157                   goto done;
   5158                }
   5159                break;
   5160 
   5161             left64x2:
   5162                if (sh >= 0 && sh <= 63) {
   5163                   *p++ = tmpl | X_3_6_7_6_5_5(0,0, X1000000 | sh, 0,0,0);
   5164                   goto done;
   5165                }
   5166                break;
   5167             left32x4:
   5168                if (sh >= 0 && sh <= 31) {
   5169                   *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0100000 | sh, 0,0,0);
   5170                   goto done;
   5171                }
   5172                break;
   5173             left16x8:
   5174                if (sh >= 0 && sh <= 15) {
   5175                   *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0010000 | sh, 0,0,0);
   5176                   goto done;
   5177                }
   5178                break;
   5179             left8x16:
   5180                if (sh >= 0 && sh <= 7) {
   5181                   *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0001000 | sh, 0,0,0);
   5182                   goto done;
   5183                }
   5184                break;
   5185          }
   5186          goto bad;
   5187       }
   5188       case ARM64in_VExtV: {
   5189          /*
   5190             011 01110 000 m 0 imm4 0 n d  EXT Vd.16b, Vn.16b, Vm.16b, #imm4
   5191             where imm4 = the shift amount, in bytes,
   5192                   Vn is low operand, Vm is high operand
   5193          */
   5194          UInt vD   = qregEnc(i->ARM64in.VExtV.dst);
   5195          UInt vN   = qregEnc(i->ARM64in.VExtV.srcLo);
   5196          UInt vM   = qregEnc(i->ARM64in.VExtV.srcHi);
   5197          UInt imm4 = i->ARM64in.VExtV.amtB;
   5198          vassert(imm4 >= 1 && imm4 <= 15);
   5199          *p++ = X_3_8_5_6_5_5(X011, X01110000, vM,
   5200                               X000000 | (imm4 << 1), vN, vD);
   5201          goto done;
   5202       }
   5203       case ARM64in_VImmQ: {
   5204          UInt   rQ  = qregEnc(i->ARM64in.VImmQ.rQ);
   5205          UShort imm = i->ARM64in.VImmQ.imm;
   5206          vassert(rQ < 32);
   5207          switch (imm) {
   5208             case 0x0000:
   5209                // movi rQ.4s, #0x0 == 0x4F 0x00 0x04 000 rQ
   5210                *p++ = 0x4F000400 | rQ;
   5211                goto done;
   5212             case 0x0001:
   5213                // movi rQ, #0xFF == 0x2F 0x00 0xE4 001 rQ
   5214                *p++ = 0x2F00E420 | rQ;
   5215                goto done;
   5216             case 0x0003:
   5217                // movi rQ, #0xFFFF == 0x2F 0x00 0xE4 011 rQ
   5218                *p++ = 0x2F00E460 | rQ;
   5219                goto done;
   5220             case 0x000F:
   5221                // movi rQ, #0xFFFFFFFF == 0x2F 0x00 0xE5 111 rQ
   5222                *p++ = 0x2F00E5E0 | rQ;
   5223                goto done;
   5224             case 0x003F:
   5225                // movi rQ, #0xFFFFFFFFFFFF == 0x2F 0x01 0xE7 111 rQ
   5226                *p++ = 0x2F01E7E0 | rQ;
   5227                goto done;
   5228             case 0x00FF:
   5229                // movi rQ, #0xFFFFFFFFFFFFFFFF == 0x2F 0x07 0xE7 111 rQ
   5230                *p++ = 0x2F07E7E0 | rQ;
   5231                goto done;
   5232             case 0xFFFF:
   5233                // mvni rQ.4s, #0x0 == 0x6F 0x00 0x04 000 rQ
   5234                *p++ = 0x6F000400 | rQ;
   5235                goto done;
   5236             default:
   5237                break;
   5238          }
   5239          goto bad; /* no other handled cases right now */
   5240       }
   5241 
   5242       case ARM64in_VDfromX: {
   5243          /* INS Vd.D[0], rX
   5244             0100 1110 0000 1000 0001 11 nn dd   INS Vd.D[0], Xn
   5245             This isn't wonderful, in the sense that the upper half of
   5246             the vector register stays unchanged and thus the insn is
   5247             data dependent on its output register. */
   5248          UInt dd = dregEnc(i->ARM64in.VDfromX.rD);
   5249          UInt xx = iregEnc(i->ARM64in.VDfromX.rX);
   5250          vassert(xx < 31);
   5251          *p++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xx,dd);
   5252          goto done;
   5253       }
   5254 
   5255       case ARM64in_VQfromX: {
   5256          /* FMOV D, X
   5257             1001 1110 0110 0111 0000 00 nn dd   FMOV Vd.D[0], Xn
   5258             I think this zeroes out the top half of the destination, which
   5259             is what we need.  TODO: can we do VDfromX and VQfromXX better? */
   5260          UInt dd = qregEnc(i->ARM64in.VQfromX.rQ);
   5261          UInt xx = iregEnc(i->ARM64in.VQfromX.rXlo);
   5262          vassert(xx < 31);
   5263          *p++ = 0x9E670000 | X_2_6_2_12_5_5(0,0,0,0,xx,dd);
   5264          goto done;
   5265       }
   5266 
   5267       case ARM64in_VQfromXX: {
   5268          /* What we really generate is a two insn sequence:
   5269                INS Vd.D[0], Xlo; INS Vd.D[1], Xhi
   5270             0100 1110 0000 1000 0001 11 nn dd   INS Vd.D[0], Xn
   5271             0100 1110 0001 1000 0001 11 nn dd   INS Vd.D[1], Xn
   5272          */
   5273          UInt qq  = qregEnc(i->ARM64in.VQfromXX.rQ);
   5274          UInt xhi = iregEnc(i->ARM64in.VQfromXX.rXhi);
   5275          UInt xlo = iregEnc(i->ARM64in.VQfromXX.rXlo);
   5276          vassert(xhi < 31 && xlo < 31);
   5277          *p++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xlo,qq);
   5278          *p++ = 0x4E181C00 | X_2_6_2_12_5_5(0,0,0,0,xhi,qq);
   5279          goto done;
   5280       }
   5281 
   5282       case ARM64in_VXfromQ: {
   5283          /* 010 0111 0000 01000 001111 nn dd  UMOV Xd, Vn.D[0]
   5284             010 0111 0000 11000 001111 nn dd  UMOV Xd, Vn.D[1]
   5285          */
   5286          UInt dd     = iregEnc(i->ARM64in.VXfromQ.rX);
   5287          UInt nn     = qregEnc(i->ARM64in.VXfromQ.rQ);
   5288          UInt laneNo = i->ARM64in.VXfromQ.laneNo;
   5289          vassert(dd < 31);
   5290          vassert(laneNo < 2);
   5291          *p++ = X_3_8_5_6_5_5(X010, X01110000,
   5292                               laneNo == 1 ? X11000 : X01000, X001111, nn, dd);
   5293          goto done;
   5294       }
   5295 
   5296       case ARM64in_VXfromDorS: {
   5297          /* 000 11110001 00110 000000 n d     FMOV Wd, Sn
   5298             100 11110011 00110 000000 n d     FMOV Xd, Dn
   5299          */
   5300          UInt dd    = iregEnc(i->ARM64in.VXfromDorS.rX);
   5301          UInt nn    = dregEnc(i->ARM64in.VXfromDorS.rDorS);
   5302          Bool fromD = i->ARM64in.VXfromDorS.fromD;
   5303          vassert(dd < 31);
   5304          *p++ = X_3_8_5_6_5_5(fromD ? X100 : X000,
   5305                               fromD ? X11110011 : X11110001,
   5306                               X00110, X000000, nn, dd);
   5307          goto done;
   5308       }
   5309 
   5310       case ARM64in_VMov: {
   5311          /* 000 11110 00 10000 00 10000 n d   FMOV Sd, Sn
   5312             000 11110 01 10000 00 10000 n d   FMOV Dd, Dn
   5313             010 01110 10 1 n    0 00111 n d   MOV Vd.16b, Vn.16b
   5314          */
   5315         HReg rD = i->ARM64in.VMov.dst;
   5316         HReg rN = i->ARM64in.VMov.src;
   5317         switch (i->ARM64in.VMov.szB) {
   5318            case 16: {
   5319               UInt dd = qregEnc(rD);
   5320               UInt nn = qregEnc(rN);
   5321               *p++ = X_3_8_5_6_5_5(X010, X01110101, nn, X000111, nn, dd);
   5322               goto done;
   5323            }
   5324            case 8: {
   5325               UInt dd = dregEnc(rD);
   5326               UInt nn = dregEnc(rN);
   5327               *p++ = X_3_8_5_6_5_5(X000, X11110011, X00000, X010000, nn, dd);
   5328               goto done;
   5329            }
   5330            default:
   5331               break;
   5332         }
   5333         goto bad;
   5334       }
   5335 
   5336       case ARM64in_EvCheck: {
   5337          /* The sequence is fixed (canned) except for the two amodes
   5338             supplied by the insn.  These don't change the length, though.
   5339             We generate:
   5340                ldr  w9, [x21 + #8]   8 == offsetof(host_EvC_COUNTER)
   5341                subs w9, w9, #1
   5342                str  w9, [x21 + #8]   8 == offsetof(host_EvC_COUNTER)
   5343                bpl  nofail
   5344                ldr  x9, [x21 + #0]   0 == offsetof(host_EvC_FAILADDR)
   5345                br   x9
   5346               nofail:
   5347          */
   5348          UInt* p0 = p;
   5349          p = do_load_or_store32(p, True/*isLoad*/, /*w*/9,
   5350                                 i->ARM64in.EvCheck.amCounter);
   5351          *p++ = 0x71000529; /* subs w9, w9, #1 */
   5352          p = do_load_or_store32(p, False/*!isLoad*/, /*w*/9,
   5353                                 i->ARM64in.EvCheck.amCounter);
   5354          *p++ = 0x54000065; /* bpl nofail */
   5355          p = do_load_or_store64(p, True/*isLoad*/, /*x*/9,
   5356                                 i->ARM64in.EvCheck.amFailAddr);
   5357          *p++ = 0xD61F0120; /* br x9 */
   5358          /* nofail: */
   5359 
   5360          /* Crosscheck */
   5361          vassert(evCheckSzB_ARM64() == (UChar*)p - (UChar*)p0);
   5362          goto done;
   5363       }
   5364 
   5365       case ARM64in_ProfInc: {
   5366          /* We generate:
   5367               (ctrP is unknown now, so use 0x6555'7555'8555'9566 in the
   5368               expectation that a later call to LibVEX_patchProfCtr
   5369               will be used to fill in the immediate fields once the
   5370               right value is known.)
   5371             imm64-exactly4 x9, 0x6555'7555'8555'9566
   5372             ldr  x8, [x9]
   5373             add  x8, x8, #1
   5374             str  x8, [x9]
   5375          */
   5376          p = imm64_to_ireg_EXACTLY4(p, /*x*/9, 0x6555755585559566ULL);
   5377          *p++ = 0xF9400128;
   5378          *p++ = 0x91000508;
   5379          *p++ = 0xF9000128;
   5380          /* Tell the caller .. */
   5381          vassert(!(*is_profInc));
   5382          *is_profInc = True;
   5383          goto done;
   5384       }
   5385 
   5386       /* ... */
   5387       default:
   5388          goto bad;
   5389     }
   5390 
   5391   bad:
   5392    ppARM64Instr(i);
   5393    vpanic("emit_ARM64Instr");
   5394    /*NOTREACHED*/
   5395 
   5396   done:
   5397    vassert(((UChar*)p) - &buf[0] <= 36);
   5398    return ((UChar*)p) - &buf[0];
   5399 }
   5400 
   5401 
   5402 /* How big is an event check?  See case for ARM64in_EvCheck in
   5403    emit_ARM64Instr just above.  That crosschecks what this returns, so
   5404    we can tell if we're inconsistent. */
   5405 Int evCheckSzB_ARM64 (void)
   5406 {
   5407    return 24;
   5408 }
   5409 
   5410 
   5411 /* NB: what goes on here has to be very closely coordinated with the
   5412    emitInstr case for XDirect, above. */
   5413 VexInvalRange chainXDirect_ARM64 ( VexEndness endness_host,
   5414                                    void* place_to_chain,
   5415                                    const void* disp_cp_chain_me_EXPECTED,
   5416                                    const void* place_to_jump_to )
   5417 {
   5418    vassert(endness_host == VexEndnessLE);
   5419 
   5420    /* What we're expecting to see is:
   5421         movw x9, disp_cp_chain_me_to_EXPECTED[15:0]
   5422         movk x9, disp_cp_chain_me_to_EXPECTED[31:15], lsl 16
   5423         movk x9, disp_cp_chain_me_to_EXPECTED[47:32], lsl 32
   5424         movk x9, disp_cp_chain_me_to_EXPECTED[63:48], lsl 48
   5425         blr  x9
   5426       viz
   5427         <16 bytes generated by imm64_to_ireg_EXACTLY4>
   5428         D6 3F 01 20
   5429    */
   5430    UInt* p = (UInt*)place_to_chain;
   5431    vassert(0 == (3 & (HWord)p));
   5432    vassert(is_imm64_to_ireg_EXACTLY4(
   5433               p, /*x*/9, (Addr)disp_cp_chain_me_EXPECTED));
   5434    vassert(p[4] == 0xD63F0120);
   5435 
   5436    /* And what we want to change it to is:
   5437         movw x9, place_to_jump_to[15:0]
   5438         movk x9, place_to_jump_to[31:15], lsl 16
   5439         movk x9, place_to_jump_to[47:32], lsl 32
   5440         movk x9, place_to_jump_to[63:48], lsl 48
   5441         br   x9
   5442       viz
   5443         <16 bytes generated by imm64_to_ireg_EXACTLY4>
   5444         D6 1F 01 20
   5445 
   5446       The replacement has the same length as the original.
   5447    */
   5448    (void)imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)place_to_jump_to);
   5449    p[4] = 0xD61F0120;
   5450 
   5451    VexInvalRange vir = {(HWord)p, 20};
   5452    return vir;
   5453 }
   5454 
   5455 
   5456 /* NB: what goes on here has to be very closely coordinated with the
   5457    emitInstr case for XDirect, above. */
   5458 VexInvalRange unchainXDirect_ARM64 ( VexEndness endness_host,
   5459                                      void* place_to_unchain,
   5460                                      const void* place_to_jump_to_EXPECTED,
   5461                                      const void* disp_cp_chain_me )
   5462 {
   5463    vassert(endness_host == VexEndnessLE);
   5464 
   5465    /* What we're expecting to see is:
   5466         movw x9, place_to_jump_to_EXPECTED[15:0]
   5467         movk x9, place_to_jump_to_EXPECTED[31:15], lsl 16
   5468         movk x9, place_to_jump_to_EXPECTED[47:32], lsl 32
   5469         movk x9, place_to_jump_to_EXPECTED[63:48], lsl 48
   5470         br   x9
   5471       viz
   5472         <16 bytes generated by imm64_to_ireg_EXACTLY4>
   5473         D6 1F 01 20
   5474    */
   5475    UInt* p = (UInt*)place_to_unchain;
   5476    vassert(0 == (3 & (HWord)p));
   5477    vassert(is_imm64_to_ireg_EXACTLY4(
   5478               p, /*x*/9, (Addr)place_to_jump_to_EXPECTED));
   5479    vassert(p[4] == 0xD61F0120);
   5480 
   5481    /* And what we want to change it to is:
   5482         movw x9, disp_cp_chain_me_to[15:0]
   5483         movk x9, disp_cp_chain_me_to[31:15], lsl 16
   5484         movk x9, disp_cp_chain_me_to[47:32], lsl 32
   5485         movk x9, disp_cp_chain_me_to[63:48], lsl 48
   5486         blr  x9
   5487       viz
   5488         <16 bytes generated by imm64_to_ireg_EXACTLY4>
   5489         D6 3F 01 20
   5490    */
   5491    (void)imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)disp_cp_chain_me);
   5492    p[4] = 0xD63F0120;
   5493 
   5494    VexInvalRange vir = {(HWord)p, 20};
   5495    return vir;
   5496 }
   5497 
   5498 
   5499 /* Patch the counter address into a profile inc point, as previously
   5500    created by the ARM64in_ProfInc case for emit_ARM64Instr. */
   5501 VexInvalRange patchProfInc_ARM64 ( VexEndness endness_host,
   5502                                    void*  place_to_patch,
   5503                                    const ULong* location_of_counter )
   5504 {
   5505    vassert(sizeof(ULong*) == 8);
   5506    vassert(endness_host == VexEndnessLE);
   5507    UInt* p = (UInt*)place_to_patch;
   5508    vassert(0 == (3 & (HWord)p));
   5509    vassert(is_imm64_to_ireg_EXACTLY4(p, /*x*/9, 0x6555755585559566ULL));
   5510    vassert(p[4] == 0xF9400128);
   5511    vassert(p[5] == 0x91000508);
   5512    vassert(p[6] == 0xF9000128);
   5513    imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)location_of_counter);
   5514    VexInvalRange vir = {(HWord)p, 4*4};
   5515    return vir;
   5516 }
   5517 
   5518 /*---------------------------------------------------------------*/
   5519 /*--- end                                   host_arm64_defs.c ---*/
   5520 /*---------------------------------------------------------------*/
   5521